]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
Add x86 pcommit instruction.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
105
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
109
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
113
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
121
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
125
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
127
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
134
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
201 };
202
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
210
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
277 };
278
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
285
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
354 };
355
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
362
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
429 };
430
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
512 };
513
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
551
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
587 };
588
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
664 };
665
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
741 };
742
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
814
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
828 };
829
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
908
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
922 };
923
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
937
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1003
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1017 };
1018
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1022
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1033
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1099
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1113 };
1114
1115
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1186
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1200 };
1201
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1272
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1286 };
1287
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1361
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1375 };
1376
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1460 };
1461
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1469
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1536 };
1537
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1542
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1548
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1615 };
1616
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1692 };
1693
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1769 };
1770
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1846 };
1847
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1850
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1933 };
1934
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1947
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2019 };
2020
2021
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2024
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2027
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_INTEL (1<<PROCESSOR_INTEL)
2044
2045 #define m_GEODE (1<<PROCESSOR_GEODE)
2046 #define m_K6 (1<<PROCESSOR_K6)
2047 #define m_K6_GEODE (m_K6 | m_GEODE)
2048 #define m_K8 (1<<PROCESSOR_K8)
2049 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2050 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2051 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2052 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2053 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2054 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2055 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2056 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2057 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2058 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2059 #define m_BTVER (m_BTVER1 | m_BTVER2)
2060 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2061
2062 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2063
2064 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2065 #undef DEF_TUNE
2066 #define DEF_TUNE(tune, name, selector) name,
2067 #include "x86-tune.def"
2068 #undef DEF_TUNE
2069 };
2070
2071 /* Feature tests against the various tunings. */
2072 unsigned char ix86_tune_features[X86_TUNE_LAST];
2073
2074 /* Feature tests against the various tunings used to create ix86_tune_features
2075 based on the processor mask. */
2076 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2077 #undef DEF_TUNE
2078 #define DEF_TUNE(tune, name, selector) selector,
2079 #include "x86-tune.def"
2080 #undef DEF_TUNE
2081 };
2082
2083 /* Feature tests against the various architecture variations. */
2084 unsigned char ix86_arch_features[X86_ARCH_LAST];
2085
2086 /* Feature tests against the various architecture variations, used to create
2087 ix86_arch_features based on the processor mask. */
2088 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2089 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2090 ~(m_386 | m_486 | m_PENT | m_K6),
2091
2092 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2093 ~m_386,
2094
2095 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2096 ~(m_386 | m_486),
2097
2098 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2099 ~m_386,
2100
2101 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2102 ~m_386,
2103 };
2104
2105 /* In case the average insn count for single function invocation is
2106 lower than this constant, emit fast (but longer) prologue and
2107 epilogue code. */
2108 #define FAST_PROLOGUE_INSN_COUNT 20
2109
2110 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2111 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2112 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2113 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2114
2115 /* Array of the smallest class containing reg number REGNO, indexed by
2116 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2117
2118 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2119 {
2120 /* ax, dx, cx, bx */
2121 AREG, DREG, CREG, BREG,
2122 /* si, di, bp, sp */
2123 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2124 /* FP registers */
2125 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2126 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2127 /* arg pointer */
2128 NON_Q_REGS,
2129 /* flags, fpsr, fpcr, frame */
2130 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2131 /* SSE registers */
2132 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2133 SSE_REGS, SSE_REGS,
2134 /* MMX registers */
2135 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2136 MMX_REGS, MMX_REGS,
2137 /* REX registers */
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 /* SSE REX registers */
2141 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 SSE_REGS, SSE_REGS,
2143 /* AVX-512 SSE registers */
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 /* Mask registers. */
2149 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 /* MPX bound registers */
2152 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2153 };
2154
2155 /* The "default" register map used in 32bit mode. */
2156
2157 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2158 {
2159 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2160 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2168 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2169 101, 102, 103, 104, /* bound registers */
2170 };
2171
2172 /* The "default" register map used in 64bit mode. */
2173
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2175 {
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2183 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2184 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2185 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2186 126, 127, 128, 129, /* bound registers */
2187 };
2188
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2233 numbers.
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2242 */
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2244 {
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2254 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2255 101, 102, 103, 104, /* bound registers */
2256 };
2257
2258 /* Define parameter passing and return registers. */
2259
2260 static int const x86_64_int_parameter_registers[6] =
2261 {
2262 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2263 };
2264
2265 static int const x86_64_ms_abi_int_parameter_registers[4] =
2266 {
2267 CX_REG, DX_REG, R8_REG, R9_REG
2268 };
2269
2270 static int const x86_64_int_return_registers[4] =
2271 {
2272 AX_REG, DX_REG, DI_REG, SI_REG
2273 };
2274
2275 /* Additional registers that are clobbered by SYSV calls. */
2276
2277 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2278 {
2279 SI_REG, DI_REG,
2280 XMM6_REG, XMM7_REG,
2281 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2282 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2283 };
2284
2285 /* Define the structure for the machine field in struct function. */
2286
2287 struct GTY(()) stack_local_entry {
2288 unsigned short mode;
2289 unsigned short n;
2290 rtx rtl;
2291 struct stack_local_entry *next;
2292 };
2293
2294 /* Structure describing stack frame layout.
2295 Stack grows downward:
2296
2297 [arguments]
2298 <- ARG_POINTER
2299 saved pc
2300
2301 saved static chain if ix86_static_chain_on_stack
2302
2303 saved frame pointer if frame_pointer_needed
2304 <- HARD_FRAME_POINTER
2305 [saved regs]
2306 <- regs_save_offset
2307 [padding0]
2308
2309 [saved SSE regs]
2310 <- sse_regs_save_offset
2311 [padding1] |
2312 | <- FRAME_POINTER
2313 [va_arg registers] |
2314 |
2315 [frame] |
2316 |
2317 [padding2] | = to_allocate
2318 <- STACK_POINTER
2319 */
2320 struct ix86_frame
2321 {
2322 int nsseregs;
2323 int nregs;
2324 int va_arg_size;
2325 int red_zone_size;
2326 int outgoing_arguments_size;
2327
2328 /* The offsets relative to ARG_POINTER. */
2329 HOST_WIDE_INT frame_pointer_offset;
2330 HOST_WIDE_INT hard_frame_pointer_offset;
2331 HOST_WIDE_INT stack_pointer_offset;
2332 HOST_WIDE_INT hfp_save_offset;
2333 HOST_WIDE_INT reg_save_offset;
2334 HOST_WIDE_INT sse_reg_save_offset;
2335
2336 /* When save_regs_using_mov is set, emit prologue using
2337 move instead of push instructions. */
2338 bool save_regs_using_mov;
2339 };
2340
2341 /* Which cpu are we scheduling for. */
2342 enum attr_cpu ix86_schedule;
2343
2344 /* Which cpu are we optimizing for. */
2345 enum processor_type ix86_tune;
2346
2347 /* Which instruction set architecture to use. */
2348 enum processor_type ix86_arch;
2349
2350 /* True if processor has SSE prefetch instruction. */
2351 unsigned char x86_prefetch_sse;
2352
2353 /* -mstackrealign option */
2354 static const char ix86_force_align_arg_pointer_string[]
2355 = "force_align_arg_pointer";
2356
2357 static rtx (*ix86_gen_leave) (void);
2358 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2361 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2362 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2369
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2372
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2376
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2379
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2382
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2386
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2390
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2393
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2398
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2402 {
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2414 };
2415
2416 #define MAX_CLASSES 8
2417
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2421
2422 \f
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2438
2439 enum ix86_function_specific_strings
2440 {
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2444 };
2445
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_print (FILE *, int,
2453 struct cl_target_option *);
2454 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2455 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2456 struct gcc_options *,
2457 struct gcc_options *,
2458 struct gcc_options *);
2459 static bool ix86_can_inline_p (tree, tree);
2460 static void ix86_set_current_function (tree);
2461 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2462
2463 static enum calling_abi ix86_function_abi (const_tree);
2464
2465 \f
2466 #ifndef SUBTARGET32_DEFAULT_CPU
2467 #define SUBTARGET32_DEFAULT_CPU "i386"
2468 #endif
2469
2470 /* Whether -mtune= or -march= were specified */
2471 static int ix86_tune_defaulted;
2472 static int ix86_arch_specified;
2473
2474 /* Vectorization library interface and handlers. */
2475 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2476
2477 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2479
2480 /* Processor target table, indexed by processor number */
2481 struct ptt
2482 {
2483 const char *const name; /* processor name */
2484 const struct processor_costs *cost; /* Processor costs */
2485 const int align_loop; /* Default alignments. */
2486 const int align_loop_max_skip;
2487 const int align_jump;
2488 const int align_jump_max_skip;
2489 const int align_func;
2490 };
2491
2492 /* This table must be in sync with enum processor_type in i386.h. */
2493 static const struct ptt processor_target_table[PROCESSOR_max] =
2494 {
2495 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2496 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2497 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2498 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2499 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2500 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2501 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2502 {"core2", &core_cost, 16, 10, 16, 10, 16},
2503 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2504 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2505 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2506 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2507 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2508 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2509 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2510 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2511 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2512 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2513 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2514 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2515 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2516 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2517 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2518 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2519 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2520 };
2521 \f
2522 static unsigned int
2523 rest_of_handle_insert_vzeroupper (void)
2524 {
2525 int i;
2526
2527 /* vzeroupper instructions are inserted immediately after reload to
2528 account for possible spills from 256bit registers. The pass
2529 reuses mode switching infrastructure by re-running mode insertion
2530 pass, so disable entities that have already been processed. */
2531 for (i = 0; i < MAX_386_ENTITIES; i++)
2532 ix86_optimize_mode_switching[i] = 0;
2533
2534 ix86_optimize_mode_switching[AVX_U128] = 1;
2535
2536 /* Call optimize_mode_switching. */
2537 g->get_passes ()->execute_pass_mode_switching ();
2538 return 0;
2539 }
2540
2541 namespace {
2542
2543 const pass_data pass_data_insert_vzeroupper =
2544 {
2545 RTL_PASS, /* type */
2546 "vzeroupper", /* name */
2547 OPTGROUP_NONE, /* optinfo_flags */
2548 TV_NONE, /* tv_id */
2549 0, /* properties_required */
2550 0, /* properties_provided */
2551 0, /* properties_destroyed */
2552 0, /* todo_flags_start */
2553 TODO_df_finish, /* todo_flags_finish */
2554 };
2555
2556 class pass_insert_vzeroupper : public rtl_opt_pass
2557 {
2558 public:
2559 pass_insert_vzeroupper(gcc::context *ctxt)
2560 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2561 {}
2562
2563 /* opt_pass methods: */
2564 virtual bool gate (function *)
2565 {
2566 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2567 }
2568
2569 virtual unsigned int execute (function *)
2570 {
2571 return rest_of_handle_insert_vzeroupper ();
2572 }
2573
2574 }; // class pass_insert_vzeroupper
2575
2576 } // anon namespace
2577
2578 rtl_opt_pass *
2579 make_pass_insert_vzeroupper (gcc::context *ctxt)
2580 {
2581 return new pass_insert_vzeroupper (ctxt);
2582 }
2583
2584 /* Return true if a red-zone is in use. */
2585
2586 static inline bool
2587 ix86_using_red_zone (void)
2588 {
2589 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2590 }
2591 \f
2592 /* Return a string that documents the current -m options. The caller is
2593 responsible for freeing the string. */
2594
2595 static char *
2596 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2597 const char *tune, enum fpmath_unit fpmath,
2598 bool add_nl_p)
2599 {
2600 struct ix86_target_opts
2601 {
2602 const char *option; /* option string */
2603 HOST_WIDE_INT mask; /* isa mask options */
2604 };
2605
2606 /* This table is ordered so that options like -msse4.2 that imply
2607 preceding options while match those first. */
2608 static struct ix86_target_opts isa_opts[] =
2609 {
2610 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2611 { "-mfma", OPTION_MASK_ISA_FMA },
2612 { "-mxop", OPTION_MASK_ISA_XOP },
2613 { "-mlwp", OPTION_MASK_ISA_LWP },
2614 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2615 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2616 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2617 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2618 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2619 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2620 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2621 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2622 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2623 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2624 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2625 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2626 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2627 { "-msse3", OPTION_MASK_ISA_SSE3 },
2628 { "-msse2", OPTION_MASK_ISA_SSE2 },
2629 { "-msse", OPTION_MASK_ISA_SSE },
2630 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2631 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2632 { "-mmmx", OPTION_MASK_ISA_MMX },
2633 { "-mabm", OPTION_MASK_ISA_ABM },
2634 { "-mbmi", OPTION_MASK_ISA_BMI },
2635 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2636 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2637 { "-mhle", OPTION_MASK_ISA_HLE },
2638 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2639 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2640 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2641 { "-madx", OPTION_MASK_ISA_ADX },
2642 { "-mtbm", OPTION_MASK_ISA_TBM },
2643 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2644 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2645 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2646 { "-maes", OPTION_MASK_ISA_AES },
2647 { "-msha", OPTION_MASK_ISA_SHA },
2648 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2649 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2650 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2651 { "-mf16c", OPTION_MASK_ISA_F16C },
2652 { "-mrtm", OPTION_MASK_ISA_RTM },
2653 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2654 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2655 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2656 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2657 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2658 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2659 { "-mmpx", OPTION_MASK_ISA_MPX },
2660 { "-mclwb", OPTION_MASK_ISA_CLWB },
2661 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2662 };
2663
2664 /* Flag options. */
2665 static struct ix86_target_opts flag_opts[] =
2666 {
2667 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2668 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2669 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2670 { "-m80387", MASK_80387 },
2671 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2672 { "-malign-double", MASK_ALIGN_DOUBLE },
2673 { "-mcld", MASK_CLD },
2674 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2675 { "-mieee-fp", MASK_IEEE_FP },
2676 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2677 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2678 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2679 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2680 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2681 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2682 { "-mno-red-zone", MASK_NO_RED_ZONE },
2683 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2684 { "-mrecip", MASK_RECIP },
2685 { "-mrtd", MASK_RTD },
2686 { "-msseregparm", MASK_SSEREGPARM },
2687 { "-mstack-arg-probe", MASK_STACK_PROBE },
2688 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2689 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2690 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2691 { "-mvzeroupper", MASK_VZEROUPPER },
2692 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2693 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2694 { "-mprefer-avx128", MASK_PREFER_AVX128},
2695 };
2696
2697 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2698
2699 char isa_other[40];
2700 char target_other[40];
2701 unsigned num = 0;
2702 unsigned i, j;
2703 char *ret;
2704 char *ptr;
2705 size_t len;
2706 size_t line_len;
2707 size_t sep_len;
2708 const char *abi;
2709
2710 memset (opts, '\0', sizeof (opts));
2711
2712 /* Add -march= option. */
2713 if (arch)
2714 {
2715 opts[num][0] = "-march=";
2716 opts[num++][1] = arch;
2717 }
2718
2719 /* Add -mtune= option. */
2720 if (tune)
2721 {
2722 opts[num][0] = "-mtune=";
2723 opts[num++][1] = tune;
2724 }
2725
2726 /* Add -m32/-m64/-mx32. */
2727 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2728 {
2729 if ((isa & OPTION_MASK_ABI_64) != 0)
2730 abi = "-m64";
2731 else
2732 abi = "-mx32";
2733 isa &= ~ (OPTION_MASK_ISA_64BIT
2734 | OPTION_MASK_ABI_64
2735 | OPTION_MASK_ABI_X32);
2736 }
2737 else
2738 abi = "-m32";
2739 opts[num++][0] = abi;
2740
2741 /* Pick out the options in isa options. */
2742 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2743 {
2744 if ((isa & isa_opts[i].mask) != 0)
2745 {
2746 opts[num++][0] = isa_opts[i].option;
2747 isa &= ~ isa_opts[i].mask;
2748 }
2749 }
2750
2751 if (isa && add_nl_p)
2752 {
2753 opts[num++][0] = isa_other;
2754 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2755 isa);
2756 }
2757
2758 /* Add flag options. */
2759 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2760 {
2761 if ((flags & flag_opts[i].mask) != 0)
2762 {
2763 opts[num++][0] = flag_opts[i].option;
2764 flags &= ~ flag_opts[i].mask;
2765 }
2766 }
2767
2768 if (flags && add_nl_p)
2769 {
2770 opts[num++][0] = target_other;
2771 sprintf (target_other, "(other flags: %#x)", flags);
2772 }
2773
2774 /* Add -fpmath= option. */
2775 if (fpmath)
2776 {
2777 opts[num][0] = "-mfpmath=";
2778 switch ((int) fpmath)
2779 {
2780 case FPMATH_387:
2781 opts[num++][1] = "387";
2782 break;
2783
2784 case FPMATH_SSE:
2785 opts[num++][1] = "sse";
2786 break;
2787
2788 case FPMATH_387 | FPMATH_SSE:
2789 opts[num++][1] = "sse+387";
2790 break;
2791
2792 default:
2793 gcc_unreachable ();
2794 }
2795 }
2796
2797 /* Any options? */
2798 if (num == 0)
2799 return NULL;
2800
2801 gcc_assert (num < ARRAY_SIZE (opts));
2802
2803 /* Size the string. */
2804 len = 0;
2805 sep_len = (add_nl_p) ? 3 : 1;
2806 for (i = 0; i < num; i++)
2807 {
2808 len += sep_len;
2809 for (j = 0; j < 2; j++)
2810 if (opts[i][j])
2811 len += strlen (opts[i][j]);
2812 }
2813
2814 /* Build the string. */
2815 ret = ptr = (char *) xmalloc (len);
2816 line_len = 0;
2817
2818 for (i = 0; i < num; i++)
2819 {
2820 size_t len2[2];
2821
2822 for (j = 0; j < 2; j++)
2823 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2824
2825 if (i != 0)
2826 {
2827 *ptr++ = ' ';
2828 line_len++;
2829
2830 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2831 {
2832 *ptr++ = '\\';
2833 *ptr++ = '\n';
2834 line_len = 0;
2835 }
2836 }
2837
2838 for (j = 0; j < 2; j++)
2839 if (opts[i][j])
2840 {
2841 memcpy (ptr, opts[i][j], len2[j]);
2842 ptr += len2[j];
2843 line_len += len2[j];
2844 }
2845 }
2846
2847 *ptr = '\0';
2848 gcc_assert (ret + len >= ptr);
2849
2850 return ret;
2851 }
2852
2853 /* Return true, if profiling code should be emitted before
2854 prologue. Otherwise it returns false.
2855 Note: For x86 with "hotfix" it is sorried. */
2856 static bool
2857 ix86_profile_before_prologue (void)
2858 {
2859 return flag_fentry != 0;
2860 }
2861
2862 /* Function that is callable from the debugger to print the current
2863 options. */
2864 void ATTRIBUTE_UNUSED
2865 ix86_debug_options (void)
2866 {
2867 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2868 ix86_arch_string, ix86_tune_string,
2869 ix86_fpmath, true);
2870
2871 if (opts)
2872 {
2873 fprintf (stderr, "%s\n\n", opts);
2874 free (opts);
2875 }
2876 else
2877 fputs ("<no options>\n\n", stderr);
2878
2879 return;
2880 }
2881
2882 static const char *stringop_alg_names[] = {
2883 #define DEF_ENUM
2884 #define DEF_ALG(alg, name) #name,
2885 #include "stringop.def"
2886 #undef DEF_ENUM
2887 #undef DEF_ALG
2888 };
2889
2890 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2891 The string is of the following form (or comma separated list of it):
2892
2893 strategy_alg:max_size:[align|noalign]
2894
2895 where the full size range for the strategy is either [0, max_size] or
2896 [min_size, max_size], in which min_size is the max_size + 1 of the
2897 preceding range. The last size range must have max_size == -1.
2898
2899 Examples:
2900
2901 1.
2902 -mmemcpy-strategy=libcall:-1:noalign
2903
2904 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2905
2906
2907 2.
2908 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2909
2910 This is to tell the compiler to use the following strategy for memset
2911 1) when the expected size is between [1, 16], use rep_8byte strategy;
2912 2) when the size is between [17, 2048], use vector_loop;
2913 3) when the size is > 2048, use libcall. */
2914
2915 struct stringop_size_range
2916 {
2917 int max;
2918 stringop_alg alg;
2919 bool noalign;
2920 };
2921
2922 static void
2923 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2924 {
2925 const struct stringop_algs *default_algs;
2926 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2927 char *curr_range_str, *next_range_str;
2928 int i = 0, n = 0;
2929
2930 if (is_memset)
2931 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2932 else
2933 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2934
2935 curr_range_str = strategy_str;
2936
2937 do
2938 {
2939 int maxs;
2940 char alg_name[128];
2941 char align[16];
2942 next_range_str = strchr (curr_range_str, ',');
2943 if (next_range_str)
2944 *next_range_str++ = '\0';
2945
2946 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2947 alg_name, &maxs, align))
2948 {
2949 error ("wrong arg %s to option %s", curr_range_str,
2950 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2951 return;
2952 }
2953
2954 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2955 {
2956 error ("size ranges of option %s should be increasing",
2957 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2959 }
2960
2961 for (i = 0; i < last_alg; i++)
2962 if (!strcmp (alg_name, stringop_alg_names[i]))
2963 break;
2964
2965 if (i == last_alg)
2966 {
2967 error ("wrong stringop strategy name %s specified for option %s",
2968 alg_name,
2969 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2970 return;
2971 }
2972
2973 input_ranges[n].max = maxs;
2974 input_ranges[n].alg = (stringop_alg) i;
2975 if (!strcmp (align, "align"))
2976 input_ranges[n].noalign = false;
2977 else if (!strcmp (align, "noalign"))
2978 input_ranges[n].noalign = true;
2979 else
2980 {
2981 error ("unknown alignment %s specified for option %s",
2982 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2983 return;
2984 }
2985 n++;
2986 curr_range_str = next_range_str;
2987 }
2988 while (curr_range_str);
2989
2990 if (input_ranges[n - 1].max != -1)
2991 {
2992 error ("the max value for the last size range should be -1"
2993 " for option %s",
2994 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2995 return;
2996 }
2997
2998 if (n > MAX_STRINGOP_ALGS)
2999 {
3000 error ("too many size ranges specified in option %s",
3001 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3002 return;
3003 }
3004
3005 /* Now override the default algs array. */
3006 for (i = 0; i < n; i++)
3007 {
3008 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3009 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3010 = input_ranges[i].alg;
3011 *const_cast<int *>(&default_algs->size[i].noalign)
3012 = input_ranges[i].noalign;
3013 }
3014 }
3015
3016 \f
3017 /* parse -mtune-ctrl= option. When DUMP is true,
3018 print the features that are explicitly set. */
3019
3020 static void
3021 parse_mtune_ctrl_str (bool dump)
3022 {
3023 if (!ix86_tune_ctrl_string)
3024 return;
3025
3026 char *next_feature_string = NULL;
3027 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3028 char *orig = curr_feature_string;
3029 int i;
3030 do
3031 {
3032 bool clear = false;
3033
3034 next_feature_string = strchr (curr_feature_string, ',');
3035 if (next_feature_string)
3036 *next_feature_string++ = '\0';
3037 if (*curr_feature_string == '^')
3038 {
3039 curr_feature_string++;
3040 clear = true;
3041 }
3042 for (i = 0; i < X86_TUNE_LAST; i++)
3043 {
3044 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3045 {
3046 ix86_tune_features[i] = !clear;
3047 if (dump)
3048 fprintf (stderr, "Explicitly %s feature %s\n",
3049 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3050 break;
3051 }
3052 }
3053 if (i == X86_TUNE_LAST)
3054 error ("Unknown parameter to option -mtune-ctrl: %s",
3055 clear ? curr_feature_string - 1 : curr_feature_string);
3056 curr_feature_string = next_feature_string;
3057 }
3058 while (curr_feature_string);
3059 free (orig);
3060 }
3061
3062 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3063 processor type. */
3064
3065 static void
3066 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3067 {
3068 unsigned int ix86_tune_mask = 1u << ix86_tune;
3069 int i;
3070
3071 for (i = 0; i < X86_TUNE_LAST; ++i)
3072 {
3073 if (ix86_tune_no_default)
3074 ix86_tune_features[i] = 0;
3075 else
3076 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3077 }
3078
3079 if (dump)
3080 {
3081 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3082 for (i = 0; i < X86_TUNE_LAST; i++)
3083 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3084 ix86_tune_features[i] ? "on" : "off");
3085 }
3086
3087 parse_mtune_ctrl_str (dump);
3088 }
3089
3090
3091 /* Override various settings based on options. If MAIN_ARGS_P, the
3092 options are from the command line, otherwise they are from
3093 attributes. */
3094
3095 static void
3096 ix86_option_override_internal (bool main_args_p,
3097 struct gcc_options *opts,
3098 struct gcc_options *opts_set)
3099 {
3100 int i;
3101 unsigned int ix86_arch_mask;
3102 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3103 const char *prefix;
3104 const char *suffix;
3105 const char *sw;
3106
3107 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3108 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3109 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3110 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3111 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3112 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3113 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3114 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3115 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3116 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3117 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3118 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3119 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3120 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3121 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3122 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3123 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3124 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3125 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3126 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3127 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3128 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3129 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3130 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3131 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3132 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3133 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3134 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3135 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3136 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3137 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3138 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3139 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3140 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3141 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3142 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3143 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3144 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3145 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3146 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3147 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3148 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3149 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3150 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3151 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3152 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3153 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3154 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3155 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3156 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3157 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3158 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3159 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3160 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3161 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3162 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3163 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3164
3165 #define PTA_CORE2 \
3166 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3167 | PTA_CX16 | PTA_FXSR)
3168 #define PTA_NEHALEM \
3169 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3170 #define PTA_WESTMERE \
3171 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3172 #define PTA_SANDYBRIDGE \
3173 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3174 #define PTA_IVYBRIDGE \
3175 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3176 #define PTA_HASWELL \
3177 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3178 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3179 #define PTA_BROADWELL \
3180 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3181 #define PTA_BONNELL \
3182 (PTA_CORE2 | PTA_MOVBE)
3183 #define PTA_SILVERMONT \
3184 (PTA_WESTMERE | PTA_MOVBE)
3185
3186 /* if this reaches 64, need to widen struct pta flags below */
3187
3188 static struct pta
3189 {
3190 const char *const name; /* processor name or nickname. */
3191 const enum processor_type processor;
3192 const enum attr_cpu schedule;
3193 const unsigned HOST_WIDE_INT flags;
3194 }
3195 const processor_alias_table[] =
3196 {
3197 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3198 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3199 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3200 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3201 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3202 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3203 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3204 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3205 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3206 PTA_MMX | PTA_SSE | PTA_FXSR},
3207 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3208 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3209 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3210 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3211 PTA_MMX | PTA_SSE | PTA_FXSR},
3212 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3213 PTA_MMX | PTA_SSE | PTA_FXSR},
3214 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3215 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3216 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3217 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3218 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3219 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3220 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3221 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3222 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3224 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3225 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3226 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3227 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3228 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3229 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3230 PTA_SANDYBRIDGE},
3231 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3232 PTA_SANDYBRIDGE},
3233 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3234 PTA_IVYBRIDGE},
3235 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3236 PTA_IVYBRIDGE},
3237 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3238 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3239 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3240 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3241 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3242 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3243 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3244 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3245 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3246 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3247 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3248 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3249 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3250 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3252 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3253 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3254 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3255 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3256 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3257 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3258 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3259 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3260 {"x86-64", PROCESSOR_K8, CPU_K8,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3262 {"k8", PROCESSOR_K8, CPU_K8,
3263 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3264 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3265 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3266 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3267 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3268 {"opteron", PROCESSOR_K8, CPU_K8,
3269 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3270 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3271 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3272 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3273 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3274 {"athlon64", PROCESSOR_K8, CPU_K8,
3275 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3276 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3278 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3279 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3280 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3281 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3282 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3283 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3285 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3286 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3288 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3289 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3290 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3291 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3292 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3293 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3294 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3295 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3296 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3297 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3298 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3299 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3300 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3301 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3302 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3303 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3304 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3305 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3306 | PTA_XSAVEOPT | PTA_FSGSBASE},
3307 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3308 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3309 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3310 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3311 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3312 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3313 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3314 | PTA_MOVBE},
3315 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3318 | PTA_FXSR | PTA_XSAVE},
3319 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3320 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3321 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3322 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3323 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3324 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3325
3326 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3327 PTA_64BIT
3328 | PTA_HLE /* flags are only used for -march switch. */ },
3329 };
3330
3331 /* -mrecip options. */
3332 static struct
3333 {
3334 const char *string; /* option name */
3335 unsigned int mask; /* mask bits to set */
3336 }
3337 const recip_options[] =
3338 {
3339 { "all", RECIP_MASK_ALL },
3340 { "none", RECIP_MASK_NONE },
3341 { "div", RECIP_MASK_DIV },
3342 { "sqrt", RECIP_MASK_SQRT },
3343 { "vec-div", RECIP_MASK_VEC_DIV },
3344 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3345 };
3346
3347 int const pta_size = ARRAY_SIZE (processor_alias_table);
3348
3349 /* Set up prefix/suffix so the error messages refer to either the command
3350 line argument, or the attribute(target). */
3351 if (main_args_p)
3352 {
3353 prefix = "-m";
3354 suffix = "";
3355 sw = "switch";
3356 }
3357 else
3358 {
3359 prefix = "option(\"";
3360 suffix = "\")";
3361 sw = "attribute";
3362 }
3363
3364 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3365 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3366 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3367 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3368 #ifdef TARGET_BI_ARCH
3369 else
3370 {
3371 #if TARGET_BI_ARCH == 1
3372 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3373 is on and OPTION_MASK_ABI_X32 is off. We turn off
3374 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3375 -mx32. */
3376 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3378 #else
3379 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3380 on and OPTION_MASK_ABI_64 is off. We turn off
3381 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3382 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3383 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3384 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3386 #endif
3387 }
3388 #endif
3389
3390 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3391 {
3392 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3393 OPTION_MASK_ABI_64 for TARGET_X32. */
3394 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3395 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3396 }
3397 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3399 | OPTION_MASK_ABI_X32
3400 | OPTION_MASK_ABI_64);
3401 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3402 {
3403 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3404 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3405 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3407 }
3408
3409 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3410 SUBTARGET_OVERRIDE_OPTIONS;
3411 #endif
3412
3413 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3414 SUBSUBTARGET_OVERRIDE_OPTIONS;
3415 #endif
3416
3417 /* -fPIC is the default for x86_64. */
3418 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_flag_pic = 2;
3420
3421 /* Need to check -mtune=generic first. */
3422 if (opts->x_ix86_tune_string)
3423 {
3424 /* As special support for cross compilers we read -mtune=native
3425 as -mtune=generic. With native compilers we won't see the
3426 -mtune=native, as it was changed by the driver. */
3427 if (!strcmp (opts->x_ix86_tune_string, "native"))
3428 {
3429 opts->x_ix86_tune_string = "generic";
3430 }
3431 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3432 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3433 "%stune=k8%s or %stune=generic%s instead as appropriate",
3434 prefix, suffix, prefix, suffix, prefix, suffix);
3435 }
3436 else
3437 {
3438 if (opts->x_ix86_arch_string)
3439 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3440 if (!opts->x_ix86_tune_string)
3441 {
3442 opts->x_ix86_tune_string
3443 = processor_target_table[TARGET_CPU_DEFAULT].name;
3444 ix86_tune_defaulted = 1;
3445 }
3446
3447 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3448 or defaulted. We need to use a sensible tune option. */
3449 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3450 {
3451 opts->x_ix86_tune_string = "generic";
3452 }
3453 }
3454
3455 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3456 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3457 {
3458 /* rep; movq isn't available in 32-bit code. */
3459 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3460 opts->x_ix86_stringop_alg = no_stringop;
3461 }
3462
3463 if (!opts->x_ix86_arch_string)
3464 opts->x_ix86_arch_string
3465 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3466 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3467 else
3468 ix86_arch_specified = 1;
3469
3470 if (opts_set->x_ix86_pmode)
3471 {
3472 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3473 && opts->x_ix86_pmode == PMODE_SI)
3474 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3475 && opts->x_ix86_pmode == PMODE_DI))
3476 error ("address mode %qs not supported in the %s bit mode",
3477 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3478 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3479 }
3480 else
3481 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3482 ? PMODE_DI : PMODE_SI;
3483
3484 if (!opts_set->x_ix86_abi)
3485 opts->x_ix86_abi = DEFAULT_ABI;
3486
3487 /* For targets using ms ABI enable ms-extensions, if not
3488 explicit turned off. For non-ms ABI we turn off this
3489 option. */
3490 if (!opts_set->x_flag_ms_extensions)
3491 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3492
3493 if (opts_set->x_ix86_cmodel)
3494 {
3495 switch (opts->x_ix86_cmodel)
3496 {
3497 case CM_SMALL:
3498 case CM_SMALL_PIC:
3499 if (opts->x_flag_pic)
3500 opts->x_ix86_cmodel = CM_SMALL_PIC;
3501 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3502 error ("code model %qs not supported in the %s bit mode",
3503 "small", "32");
3504 break;
3505
3506 case CM_MEDIUM:
3507 case CM_MEDIUM_PIC:
3508 if (opts->x_flag_pic)
3509 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "medium", "32");
3513 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3514 error ("code model %qs not supported in x32 mode",
3515 "medium");
3516 break;
3517
3518 case CM_LARGE:
3519 case CM_LARGE_PIC:
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_LARGE_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "large", "32");
3525 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3526 error ("code model %qs not supported in x32 mode",
3527 "large");
3528 break;
3529
3530 case CM_32:
3531 if (opts->x_flag_pic)
3532 error ("code model %s does not support PIC mode", "32");
3533 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3534 error ("code model %qs not supported in the %s bit mode",
3535 "32", "64");
3536 break;
3537
3538 case CM_KERNEL:
3539 if (opts->x_flag_pic)
3540 {
3541 error ("code model %s does not support PIC mode", "kernel");
3542 opts->x_ix86_cmodel = CM_32;
3543 }
3544 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3545 error ("code model %qs not supported in the %s bit mode",
3546 "kernel", "32");
3547 break;
3548
3549 default:
3550 gcc_unreachable ();
3551 }
3552 }
3553 else
3554 {
3555 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3556 use of rip-relative addressing. This eliminates fixups that
3557 would otherwise be needed if this object is to be placed in a
3558 DLL, and is essentially just as efficient as direct addressing. */
3559 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3560 && (TARGET_RDOS || TARGET_PECOFF))
3561 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3562 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3563 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3564 else
3565 opts->x_ix86_cmodel = CM_32;
3566 }
3567 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3568 {
3569 error ("-masm=intel not supported in this configuration");
3570 opts->x_ix86_asm_dialect = ASM_ATT;
3571 }
3572 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3573 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3574 sorry ("%i-bit mode not compiled in",
3575 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3576
3577 for (i = 0; i < pta_size; i++)
3578 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3579 {
3580 ix86_schedule = processor_alias_table[i].schedule;
3581 ix86_arch = processor_alias_table[i].processor;
3582 /* Default cpu tuning to the architecture. */
3583 ix86_tune = ix86_arch;
3584
3585 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3586 && !(processor_alias_table[i].flags & PTA_64BIT))
3587 error ("CPU you selected does not support x86-64 "
3588 "instruction set");
3589
3590 if (processor_alias_table[i].flags & PTA_MMX
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3593 if (processor_alias_table[i].flags & PTA_3DNOW
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3596 if (processor_alias_table[i].flags & PTA_3DNOW_A
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3599 if (processor_alias_table[i].flags & PTA_SSE
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3602 if (processor_alias_table[i].flags & PTA_SSE2
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3605 if (processor_alias_table[i].flags & PTA_SSE3
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3608 if (processor_alias_table[i].flags & PTA_SSSE3
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3611 if (processor_alias_table[i].flags & PTA_SSE4_1
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3614 if (processor_alias_table[i].flags & PTA_SSE4_2
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3617 if (processor_alias_table[i].flags & PTA_AVX
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3620 if (processor_alias_table[i].flags & PTA_AVX2
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3623 if (processor_alias_table[i].flags & PTA_FMA
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3626 if (processor_alias_table[i].flags & PTA_SSE4A
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3629 if (processor_alias_table[i].flags & PTA_FMA4
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3632 if (processor_alias_table[i].flags & PTA_XOP
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3635 if (processor_alias_table[i].flags & PTA_LWP
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3638 if (processor_alias_table[i].flags & PTA_ABM
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3641 if (processor_alias_table[i].flags & PTA_BMI
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3644 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3647 if (processor_alias_table[i].flags & PTA_TBM
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3650 if (processor_alias_table[i].flags & PTA_BMI2
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3653 if (processor_alias_table[i].flags & PTA_CX16
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3656 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3659 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3660 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3663 if (processor_alias_table[i].flags & PTA_MOVBE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3666 if (processor_alias_table[i].flags & PTA_AES
3667 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3668 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3669 if (processor_alias_table[i].flags & PTA_SHA
3670 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3671 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3672 if (processor_alias_table[i].flags & PTA_PCLMUL
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3675 if (processor_alias_table[i].flags & PTA_FSGSBASE
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3678 if (processor_alias_table[i].flags & PTA_RDRND
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3681 if (processor_alias_table[i].flags & PTA_F16C
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3684 if (processor_alias_table[i].flags & PTA_RTM
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3687 if (processor_alias_table[i].flags & PTA_HLE
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3690 if (processor_alias_table[i].flags & PTA_PRFCHW
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3693 if (processor_alias_table[i].flags & PTA_RDSEED
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3696 if (processor_alias_table[i].flags & PTA_ADX
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3699 if (processor_alias_table[i].flags & PTA_FXSR
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3702 if (processor_alias_table[i].flags & PTA_XSAVE
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3705 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3708 if (processor_alias_table[i].flags & PTA_AVX512F
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3711 if (processor_alias_table[i].flags & PTA_AVX512ER
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3714 if (processor_alias_table[i].flags & PTA_AVX512PF
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3717 if (processor_alias_table[i].flags & PTA_AVX512CD
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3720 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3723 if (processor_alias_table[i].flags & PTA_PCOMMIT
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3726 if (processor_alias_table[i].flags & PTA_CLWB
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3729 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3732 if (processor_alias_table[i].flags & PTA_XSAVEC
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3735 if (processor_alias_table[i].flags & PTA_XSAVES
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3738 if (processor_alias_table[i].flags & PTA_AVX512DQ
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3741 if (processor_alias_table[i].flags & PTA_AVX512BW
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3744 if (processor_alias_table[i].flags & PTA_AVX512VL
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3747 if (processor_alias_table[i].flags & PTA_MPX
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3750 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3753 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3756 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3757 x86_prefetch_sse = true;
3758
3759 break;
3760 }
3761
3762 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3763 error ("Intel MPX does not support x32");
3764
3765 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3766 error ("Intel MPX does not support x32");
3767
3768 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3769 error ("generic CPU can be used only for %stune=%s %s",
3770 prefix, suffix, sw);
3771 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3772 error ("intel CPU can be used only for %stune=%s %s",
3773 prefix, suffix, sw);
3774 else if (i == pta_size)
3775 error ("bad value (%s) for %sarch=%s %s",
3776 opts->x_ix86_arch_string, prefix, suffix, sw);
3777
3778 ix86_arch_mask = 1u << ix86_arch;
3779 for (i = 0; i < X86_ARCH_LAST; ++i)
3780 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3781
3782 for (i = 0; i < pta_size; i++)
3783 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3784 {
3785 ix86_schedule = processor_alias_table[i].schedule;
3786 ix86_tune = processor_alias_table[i].processor;
3787 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3788 {
3789 if (!(processor_alias_table[i].flags & PTA_64BIT))
3790 {
3791 if (ix86_tune_defaulted)
3792 {
3793 opts->x_ix86_tune_string = "x86-64";
3794 for (i = 0; i < pta_size; i++)
3795 if (! strcmp (opts->x_ix86_tune_string,
3796 processor_alias_table[i].name))
3797 break;
3798 ix86_schedule = processor_alias_table[i].schedule;
3799 ix86_tune = processor_alias_table[i].processor;
3800 }
3801 else
3802 error ("CPU you selected does not support x86-64 "
3803 "instruction set");
3804 }
3805 }
3806 /* Intel CPUs have always interpreted SSE prefetch instructions as
3807 NOPs; so, we can enable SSE prefetch instructions even when
3808 -mtune (rather than -march) points us to a processor that has them.
3809 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3810 higher processors. */
3811 if (TARGET_CMOV
3812 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3813 x86_prefetch_sse = true;
3814 break;
3815 }
3816
3817 if (ix86_tune_specified && i == pta_size)
3818 error ("bad value (%s) for %stune=%s %s",
3819 opts->x_ix86_tune_string, prefix, suffix, sw);
3820
3821 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3822
3823 #ifndef USE_IX86_FRAME_POINTER
3824 #define USE_IX86_FRAME_POINTER 0
3825 #endif
3826
3827 #ifndef USE_X86_64_FRAME_POINTER
3828 #define USE_X86_64_FRAME_POINTER 0
3829 #endif
3830
3831 /* Set the default values for switches whose default depends on TARGET_64BIT
3832 in case they weren't overwritten by command line options. */
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3834 {
3835 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3836 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3837 if (opts->x_flag_asynchronous_unwind_tables
3838 && !opts_set->x_flag_unwind_tables
3839 && TARGET_64BIT_MS_ABI)
3840 opts->x_flag_unwind_tables = 1;
3841 if (opts->x_flag_asynchronous_unwind_tables == 2)
3842 opts->x_flag_unwind_tables
3843 = opts->x_flag_asynchronous_unwind_tables = 1;
3844 if (opts->x_flag_pcc_struct_return == 2)
3845 opts->x_flag_pcc_struct_return = 0;
3846 }
3847 else
3848 {
3849 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3850 opts->x_flag_omit_frame_pointer
3851 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3852 if (opts->x_flag_asynchronous_unwind_tables == 2)
3853 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3854 if (opts->x_flag_pcc_struct_return == 2)
3855 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3856 }
3857
3858 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3859 if (opts->x_optimize_size)
3860 ix86_cost = &ix86_size_cost;
3861 else
3862 ix86_cost = ix86_tune_cost;
3863
3864 /* Arrange to set up i386_stack_locals for all functions. */
3865 init_machine_status = ix86_init_machine_status;
3866
3867 /* Validate -mregparm= value. */
3868 if (opts_set->x_ix86_regparm)
3869 {
3870 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3871 warning (0, "-mregparm is ignored in 64-bit mode");
3872 if (opts->x_ix86_regparm > REGPARM_MAX)
3873 {
3874 error ("-mregparm=%d is not between 0 and %d",
3875 opts->x_ix86_regparm, REGPARM_MAX);
3876 opts->x_ix86_regparm = 0;
3877 }
3878 }
3879 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_regparm = REGPARM_MAX;
3881
3882 /* Default align_* from the processor table. */
3883 if (opts->x_align_loops == 0)
3884 {
3885 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3886 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3887 }
3888 if (opts->x_align_jumps == 0)
3889 {
3890 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3891 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3892 }
3893 if (opts->x_align_functions == 0)
3894 {
3895 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3896 }
3897
3898 /* Provide default for -mbranch-cost= value. */
3899 if (!opts_set->x_ix86_branch_cost)
3900 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3901
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3903 {
3904 opts->x_target_flags
3905 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3906
3907 /* Enable by default the SSE and MMX builtins. Do allow the user to
3908 explicitly disable any of these. In particular, disabling SSE and
3909 MMX for kernel code is extremely useful. */
3910 if (!ix86_arch_specified)
3911 opts->x_ix86_isa_flags
3912 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3913 | TARGET_SUBTARGET64_ISA_DEFAULT)
3914 & ~opts->x_ix86_isa_flags_explicit);
3915
3916 if (TARGET_RTD_P (opts->x_target_flags))
3917 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3918 }
3919 else
3920 {
3921 opts->x_target_flags
3922 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3923
3924 if (!ix86_arch_specified)
3925 opts->x_ix86_isa_flags
3926 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3927
3928 /* i386 ABI does not specify red zone. It still makes sense to use it
3929 when programmer takes care to stack from being destroyed. */
3930 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3931 opts->x_target_flags |= MASK_NO_RED_ZONE;
3932 }
3933
3934 /* Keep nonleaf frame pointers. */
3935 if (opts->x_flag_omit_frame_pointer)
3936 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3937 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3938 opts->x_flag_omit_frame_pointer = 1;
3939
3940 /* If we're doing fast math, we don't care about comparison order
3941 wrt NaNs. This lets us use a shorter comparison sequence. */
3942 if (opts->x_flag_finite_math_only)
3943 opts->x_target_flags &= ~MASK_IEEE_FP;
3944
3945 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3946 since the insns won't need emulation. */
3947 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3948 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3949
3950 /* Likewise, if the target doesn't have a 387, or we've specified
3951 software floating point, don't use 387 inline intrinsics. */
3952 if (!TARGET_80387_P (opts->x_target_flags))
3953 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3954
3955 /* Turn on MMX builtins for -msse. */
3956 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3957 opts->x_ix86_isa_flags
3958 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3959
3960 /* Enable SSE prefetch. */
3961 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3962 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3963 x86_prefetch_sse = true;
3964
3965 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3966 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3967 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3968 opts->x_ix86_isa_flags
3969 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3970
3971 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3972 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3973 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3974 opts->x_ix86_isa_flags
3975 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3976
3977 /* Enable lzcnt instruction for -mabm. */
3978 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3981
3982 /* Validate -mpreferred-stack-boundary= value or default it to
3983 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3984 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3985 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3986 {
3987 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3988 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3989 int max = (TARGET_SEH ? 4 : 12);
3990
3991 if (opts->x_ix86_preferred_stack_boundary_arg < min
3992 || opts->x_ix86_preferred_stack_boundary_arg > max)
3993 {
3994 if (min == max)
3995 error ("-mpreferred-stack-boundary is not supported "
3996 "for this target");
3997 else
3998 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3999 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4000 }
4001 else
4002 ix86_preferred_stack_boundary
4003 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4004 }
4005
4006 /* Set the default value for -mstackrealign. */
4007 if (opts->x_ix86_force_align_arg_pointer == -1)
4008 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4009
4010 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4011
4012 /* Validate -mincoming-stack-boundary= value or default it to
4013 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4014 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4015 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4016 {
4017 if (opts->x_ix86_incoming_stack_boundary_arg
4018 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4019 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4020 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4021 opts->x_ix86_incoming_stack_boundary_arg,
4022 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4023 else
4024 {
4025 ix86_user_incoming_stack_boundary
4026 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4027 ix86_incoming_stack_boundary
4028 = ix86_user_incoming_stack_boundary;
4029 }
4030 }
4031
4032 #ifndef NO_PROFILE_COUNTERS
4033 if (flag_nop_mcount)
4034 error ("-mnop-mcount is not compatible with this target");
4035 #endif
4036 if (flag_nop_mcount && flag_pic)
4037 error ("-mnop-mcount is not implemented for -fPIC");
4038
4039 /* Accept -msseregparm only if at least SSE support is enabled. */
4040 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4041 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4042 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4043
4044 if (opts_set->x_ix86_fpmath)
4045 {
4046 if (opts->x_ix86_fpmath & FPMATH_SSE)
4047 {
4048 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4049 {
4050 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4051 opts->x_ix86_fpmath = FPMATH_387;
4052 }
4053 else if ((opts->x_ix86_fpmath & FPMATH_387)
4054 && !TARGET_80387_P (opts->x_target_flags))
4055 {
4056 warning (0, "387 instruction set disabled, using SSE arithmetics");
4057 opts->x_ix86_fpmath = FPMATH_SSE;
4058 }
4059 }
4060 }
4061 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4062 fpmath=387. The second is however default at many targets since the
4063 extra 80bit precision of temporaries is considered to be part of ABI.
4064 Overwrite the default at least for -ffast-math.
4065 TODO: -mfpmath=both seems to produce same performing code with bit
4066 smaller binaries. It is however not clear if register allocation is
4067 ready for this setting.
4068 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4069 codegen. We may switch to 387 with -ffast-math for size optimized
4070 functions. */
4071 else if (fast_math_flags_set_p (&global_options)
4072 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4073 opts->x_ix86_fpmath = FPMATH_SSE;
4074 else
4075 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4076
4077 /* If the i387 is disabled, then do not return values in it. */
4078 if (!TARGET_80387_P (opts->x_target_flags))
4079 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4080
4081 /* Use external vectorized library in vectorizing intrinsics. */
4082 if (opts_set->x_ix86_veclibabi_type)
4083 switch (opts->x_ix86_veclibabi_type)
4084 {
4085 case ix86_veclibabi_type_svml:
4086 ix86_veclib_handler = ix86_veclibabi_svml;
4087 break;
4088
4089 case ix86_veclibabi_type_acml:
4090 ix86_veclib_handler = ix86_veclibabi_acml;
4091 break;
4092
4093 default:
4094 gcc_unreachable ();
4095 }
4096
4097 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4098 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4099 && !opts->x_optimize_size)
4100 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4101
4102 /* If stack probes are required, the space used for large function
4103 arguments on the stack must also be probed, so enable
4104 -maccumulate-outgoing-args so this happens in the prologue. */
4105 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4106 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4107 {
4108 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4109 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4110 "for correctness", prefix, suffix);
4111 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4112 }
4113
4114 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4115 {
4116 char *p;
4117 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4118 p = strchr (internal_label_prefix, 'X');
4119 internal_label_prefix_len = p - internal_label_prefix;
4120 *p = '\0';
4121 }
4122
4123 /* When scheduling description is not available, disable scheduler pass
4124 so it won't slow down the compilation and make x87 code slower. */
4125 if (!TARGET_SCHEDULE)
4126 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4127
4128 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4129 ix86_tune_cost->simultaneous_prefetches,
4130 opts->x_param_values,
4131 opts_set->x_param_values);
4132 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4133 ix86_tune_cost->prefetch_block,
4134 opts->x_param_values,
4135 opts_set->x_param_values);
4136 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4137 ix86_tune_cost->l1_cache_size,
4138 opts->x_param_values,
4139 opts_set->x_param_values);
4140 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4141 ix86_tune_cost->l2_cache_size,
4142 opts->x_param_values,
4143 opts_set->x_param_values);
4144
4145 /* Increase full peel max insns parameter for x86. */
4146 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
4147 200,
4148 opts->x_param_values,
4149 opts_set->x_param_values);
4150
4151 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4152 if (opts->x_flag_prefetch_loop_arrays < 0
4153 && HAVE_prefetch
4154 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4155 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4156 opts->x_flag_prefetch_loop_arrays = 1;
4157
4158 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4159 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4160 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4161 targetm.expand_builtin_va_start = NULL;
4162
4163 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4164 {
4165 ix86_gen_leave = gen_leave_rex64;
4166 if (Pmode == DImode)
4167 {
4168 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4169 ix86_gen_tls_local_dynamic_base_64
4170 = gen_tls_local_dynamic_base_64_di;
4171 }
4172 else
4173 {
4174 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4175 ix86_gen_tls_local_dynamic_base_64
4176 = gen_tls_local_dynamic_base_64_si;
4177 }
4178 }
4179 else
4180 ix86_gen_leave = gen_leave;
4181
4182 if (Pmode == DImode)
4183 {
4184 ix86_gen_add3 = gen_adddi3;
4185 ix86_gen_sub3 = gen_subdi3;
4186 ix86_gen_sub3_carry = gen_subdi3_carry;
4187 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4188 ix86_gen_andsp = gen_anddi3;
4189 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4190 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4191 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4192 ix86_gen_monitor = gen_sse3_monitor_di;
4193 }
4194 else
4195 {
4196 ix86_gen_add3 = gen_addsi3;
4197 ix86_gen_sub3 = gen_subsi3;
4198 ix86_gen_sub3_carry = gen_subsi3_carry;
4199 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4200 ix86_gen_andsp = gen_andsi3;
4201 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4202 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4203 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4204 ix86_gen_monitor = gen_sse3_monitor_si;
4205 }
4206
4207 #ifdef USE_IX86_CLD
4208 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4209 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4210 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4211 #endif
4212
4213 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4214 {
4215 if (opts->x_flag_fentry > 0)
4216 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4217 "with -fpic");
4218 opts->x_flag_fentry = 0;
4219 }
4220 else if (TARGET_SEH)
4221 {
4222 if (opts->x_flag_fentry == 0)
4223 sorry ("-mno-fentry isn%'t compatible with SEH");
4224 opts->x_flag_fentry = 1;
4225 }
4226 else if (opts->x_flag_fentry < 0)
4227 {
4228 #if defined(PROFILE_BEFORE_PROLOGUE)
4229 opts->x_flag_fentry = 1;
4230 #else
4231 opts->x_flag_fentry = 0;
4232 #endif
4233 }
4234
4235 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4236 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4237 AVX unaligned load/store. */
4238 if (!opts->x_optimize_size)
4239 {
4240 if (flag_expensive_optimizations
4241 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4242 opts->x_target_flags |= MASK_VZEROUPPER;
4243 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4244 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4245 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4246 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4247 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4248 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4249 /* Enable 128-bit AVX instruction generation
4250 for the auto-vectorizer. */
4251 if (TARGET_AVX128_OPTIMAL
4252 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4253 opts->x_target_flags |= MASK_PREFER_AVX128;
4254 }
4255
4256 if (opts->x_ix86_recip_name)
4257 {
4258 char *p = ASTRDUP (opts->x_ix86_recip_name);
4259 char *q;
4260 unsigned int mask, i;
4261 bool invert;
4262
4263 while ((q = strtok (p, ",")) != NULL)
4264 {
4265 p = NULL;
4266 if (*q == '!')
4267 {
4268 invert = true;
4269 q++;
4270 }
4271 else
4272 invert = false;
4273
4274 if (!strcmp (q, "default"))
4275 mask = RECIP_MASK_ALL;
4276 else
4277 {
4278 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4279 if (!strcmp (q, recip_options[i].string))
4280 {
4281 mask = recip_options[i].mask;
4282 break;
4283 }
4284
4285 if (i == ARRAY_SIZE (recip_options))
4286 {
4287 error ("unknown option for -mrecip=%s", q);
4288 invert = false;
4289 mask = RECIP_MASK_NONE;
4290 }
4291 }
4292
4293 opts->x_recip_mask_explicit |= mask;
4294 if (invert)
4295 opts->x_recip_mask &= ~mask;
4296 else
4297 opts->x_recip_mask |= mask;
4298 }
4299 }
4300
4301 if (TARGET_RECIP_P (opts->x_target_flags))
4302 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4303 else if (opts_set->x_target_flags & MASK_RECIP)
4304 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4305
4306 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4307 for 64-bit Bionic. */
4308 if (TARGET_HAS_BIONIC
4309 && !(opts_set->x_target_flags
4310 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4311 opts->x_target_flags |= (TARGET_64BIT
4312 ? MASK_LONG_DOUBLE_128
4313 : MASK_LONG_DOUBLE_64);
4314
4315 /* Only one of them can be active. */
4316 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4317 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4318
4319 /* Save the initial options in case the user does function specific
4320 options. */
4321 if (main_args_p)
4322 target_option_default_node = target_option_current_node
4323 = build_target_option_node (opts);
4324
4325 /* Handle stack protector */
4326 if (!opts_set->x_ix86_stack_protector_guard)
4327 opts->x_ix86_stack_protector_guard
4328 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4329
4330 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4331 if (opts->x_ix86_tune_memcpy_strategy)
4332 {
4333 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4334 ix86_parse_stringop_strategy_string (str, false);
4335 free (str);
4336 }
4337
4338 if (opts->x_ix86_tune_memset_strategy)
4339 {
4340 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4341 ix86_parse_stringop_strategy_string (str, true);
4342 free (str);
4343 }
4344 }
4345
4346 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4347
4348 static void
4349 ix86_option_override (void)
4350 {
4351 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4352 struct register_pass_info insert_vzeroupper_info
4353 = { pass_insert_vzeroupper, "reload",
4354 1, PASS_POS_INSERT_AFTER
4355 };
4356
4357 ix86_option_override_internal (true, &global_options, &global_options_set);
4358
4359
4360 /* This needs to be done at start up. It's convenient to do it here. */
4361 register_pass (&insert_vzeroupper_info);
4362 }
4363
4364 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4365 static char *
4366 ix86_offload_options (void)
4367 {
4368 if (TARGET_LP64)
4369 return xstrdup ("-foffload-abi=lp64");
4370 return xstrdup ("-foffload-abi=ilp32");
4371 }
4372
4373 /* Update register usage after having seen the compiler flags. */
4374
4375 static void
4376 ix86_conditional_register_usage (void)
4377 {
4378 int i, c_mask;
4379 unsigned int j;
4380
4381 /* The PIC register, if it exists, is fixed. */
4382 j = PIC_OFFSET_TABLE_REGNUM;
4383 if (j != INVALID_REGNUM)
4384 fixed_regs[j] = call_used_regs[j] = 1;
4385
4386 /* For 32-bit targets, squash the REX registers. */
4387 if (! TARGET_64BIT)
4388 {
4389 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4390 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4391 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4392 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4393 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4394 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4395 }
4396
4397 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4398 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4399 : TARGET_64BIT ? (1 << 2)
4400 : (1 << 1));
4401
4402 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4403
4404 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4405 {
4406 /* Set/reset conditionally defined registers from
4407 CALL_USED_REGISTERS initializer. */
4408 if (call_used_regs[i] > 1)
4409 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4410
4411 /* Calculate registers of CLOBBERED_REGS register set
4412 as call used registers from GENERAL_REGS register set. */
4413 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4414 && call_used_regs[i])
4415 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4416 }
4417
4418 /* If MMX is disabled, squash the registers. */
4419 if (! TARGET_MMX)
4420 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4421 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4422 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4423
4424 /* If SSE is disabled, squash the registers. */
4425 if (! TARGET_SSE)
4426 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4427 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4428 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4429
4430 /* If the FPU is disabled, squash the registers. */
4431 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4432 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4433 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4434 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4435
4436 /* If AVX512F is disabled, squash the registers. */
4437 if (! TARGET_AVX512F)
4438 {
4439 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4440 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4441
4442 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4444 }
4445
4446 /* If MPX is disabled, squash the registers. */
4447 if (! TARGET_MPX)
4448 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4449 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4450 }
4451
4452 \f
4453 /* Save the current options */
4454
4455 static void
4456 ix86_function_specific_save (struct cl_target_option *ptr,
4457 struct gcc_options *opts)
4458 {
4459 ptr->arch = ix86_arch;
4460 ptr->schedule = ix86_schedule;
4461 ptr->tune = ix86_tune;
4462 ptr->branch_cost = ix86_branch_cost;
4463 ptr->tune_defaulted = ix86_tune_defaulted;
4464 ptr->arch_specified = ix86_arch_specified;
4465 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4466 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4467 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4468 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4469 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4470 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4471 ptr->x_ix86_abi = opts->x_ix86_abi;
4472 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4473 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4474 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4475 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4476 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4477 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4478 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4479 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4480 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4481 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4482 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4483 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4484 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4485 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4486 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4487 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4488 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4489 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4490 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4491 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4492
4493 /* The fields are char but the variables are not; make sure the
4494 values fit in the fields. */
4495 gcc_assert (ptr->arch == ix86_arch);
4496 gcc_assert (ptr->schedule == ix86_schedule);
4497 gcc_assert (ptr->tune == ix86_tune);
4498 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4499 }
4500
4501 /* Restore the current options */
4502
4503 static void
4504 ix86_function_specific_restore (struct gcc_options *opts,
4505 struct cl_target_option *ptr)
4506 {
4507 enum processor_type old_tune = ix86_tune;
4508 enum processor_type old_arch = ix86_arch;
4509 unsigned int ix86_arch_mask;
4510 int i;
4511
4512 /* We don't change -fPIC. */
4513 opts->x_flag_pic = flag_pic;
4514
4515 ix86_arch = (enum processor_type) ptr->arch;
4516 ix86_schedule = (enum attr_cpu) ptr->schedule;
4517 ix86_tune = (enum processor_type) ptr->tune;
4518 opts->x_ix86_branch_cost = ptr->branch_cost;
4519 ix86_tune_defaulted = ptr->tune_defaulted;
4520 ix86_arch_specified = ptr->arch_specified;
4521 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4522 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4523 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4524 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4525 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4526 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4527 opts->x_ix86_abi = ptr->x_ix86_abi;
4528 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4529 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4530 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4531 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4532 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4533 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4534 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4535 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4536 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4537 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4538 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4539 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4540 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4541 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4542 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4543 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4544 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4545 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4546 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4547 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4548
4549 /* Recreate the arch feature tests if the arch changed */
4550 if (old_arch != ix86_arch)
4551 {
4552 ix86_arch_mask = 1u << ix86_arch;
4553 for (i = 0; i < X86_ARCH_LAST; ++i)
4554 ix86_arch_features[i]
4555 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4556 }
4557
4558 /* Recreate the tune optimization tests */
4559 if (old_tune != ix86_tune)
4560 set_ix86_tune_features (ix86_tune, false);
4561 }
4562
4563 /* Print the current options */
4564
4565 static void
4566 ix86_function_specific_print (FILE *file, int indent,
4567 struct cl_target_option *ptr)
4568 {
4569 char *target_string
4570 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4571 NULL, NULL, ptr->x_ix86_fpmath, false);
4572
4573 gcc_assert (ptr->arch < PROCESSOR_max);
4574 fprintf (file, "%*sarch = %d (%s)\n",
4575 indent, "",
4576 ptr->arch, processor_target_table[ptr->arch].name);
4577
4578 gcc_assert (ptr->tune < PROCESSOR_max);
4579 fprintf (file, "%*stune = %d (%s)\n",
4580 indent, "",
4581 ptr->tune, processor_target_table[ptr->tune].name);
4582
4583 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4584
4585 if (target_string)
4586 {
4587 fprintf (file, "%*s%s\n", indent, "", target_string);
4588 free (target_string);
4589 }
4590 }
4591
4592 \f
4593 /* Inner function to process the attribute((target(...))), take an argument and
4594 set the current options from the argument. If we have a list, recursively go
4595 over the list. */
4596
4597 static bool
4598 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4599 struct gcc_options *opts,
4600 struct gcc_options *opts_set,
4601 struct gcc_options *enum_opts_set)
4602 {
4603 char *next_optstr;
4604 bool ret = true;
4605
4606 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4607 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4608 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4609 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4610 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4611
4612 enum ix86_opt_type
4613 {
4614 ix86_opt_unknown,
4615 ix86_opt_yes,
4616 ix86_opt_no,
4617 ix86_opt_str,
4618 ix86_opt_enum,
4619 ix86_opt_isa
4620 };
4621
4622 static const struct
4623 {
4624 const char *string;
4625 size_t len;
4626 enum ix86_opt_type type;
4627 int opt;
4628 int mask;
4629 } attrs[] = {
4630 /* isa options */
4631 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4632 IX86_ATTR_ISA ("abm", OPT_mabm),
4633 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4634 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4635 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4636 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4637 IX86_ATTR_ISA ("aes", OPT_maes),
4638 IX86_ATTR_ISA ("sha", OPT_msha),
4639 IX86_ATTR_ISA ("avx", OPT_mavx),
4640 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4641 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4642 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4643 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4644 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4645 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4646 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4647 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4648 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4649 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4650 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4651 IX86_ATTR_ISA ("sse", OPT_msse),
4652 IX86_ATTR_ISA ("sse2", OPT_msse2),
4653 IX86_ATTR_ISA ("sse3", OPT_msse3),
4654 IX86_ATTR_ISA ("sse4", OPT_msse4),
4655 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4656 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4657 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4658 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4659 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4660 IX86_ATTR_ISA ("fma", OPT_mfma),
4661 IX86_ATTR_ISA ("xop", OPT_mxop),
4662 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4663 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4664 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4665 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4666 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4667 IX86_ATTR_ISA ("hle", OPT_mhle),
4668 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4669 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4670 IX86_ATTR_ISA ("adx", OPT_madx),
4671 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4672 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4673 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4674 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4675 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4676 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4677 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4678 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4679 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4680 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4681 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4682
4683 /* enum options */
4684 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4685
4686 /* string options */
4687 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4688 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4689
4690 /* flag options */
4691 IX86_ATTR_YES ("cld",
4692 OPT_mcld,
4693 MASK_CLD),
4694
4695 IX86_ATTR_NO ("fancy-math-387",
4696 OPT_mfancy_math_387,
4697 MASK_NO_FANCY_MATH_387),
4698
4699 IX86_ATTR_YES ("ieee-fp",
4700 OPT_mieee_fp,
4701 MASK_IEEE_FP),
4702
4703 IX86_ATTR_YES ("inline-all-stringops",
4704 OPT_minline_all_stringops,
4705 MASK_INLINE_ALL_STRINGOPS),
4706
4707 IX86_ATTR_YES ("inline-stringops-dynamically",
4708 OPT_minline_stringops_dynamically,
4709 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4710
4711 IX86_ATTR_NO ("align-stringops",
4712 OPT_mno_align_stringops,
4713 MASK_NO_ALIGN_STRINGOPS),
4714
4715 IX86_ATTR_YES ("recip",
4716 OPT_mrecip,
4717 MASK_RECIP),
4718
4719 };
4720
4721 /* If this is a list, recurse to get the options. */
4722 if (TREE_CODE (args) == TREE_LIST)
4723 {
4724 bool ret = true;
4725
4726 for (; args; args = TREE_CHAIN (args))
4727 if (TREE_VALUE (args)
4728 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4729 p_strings, opts, opts_set,
4730 enum_opts_set))
4731 ret = false;
4732
4733 return ret;
4734 }
4735
4736 else if (TREE_CODE (args) != STRING_CST)
4737 {
4738 error ("attribute %<target%> argument not a string");
4739 return false;
4740 }
4741
4742 /* Handle multiple arguments separated by commas. */
4743 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4744
4745 while (next_optstr && *next_optstr != '\0')
4746 {
4747 char *p = next_optstr;
4748 char *orig_p = p;
4749 char *comma = strchr (next_optstr, ',');
4750 const char *opt_string;
4751 size_t len, opt_len;
4752 int opt;
4753 bool opt_set_p;
4754 char ch;
4755 unsigned i;
4756 enum ix86_opt_type type = ix86_opt_unknown;
4757 int mask = 0;
4758
4759 if (comma)
4760 {
4761 *comma = '\0';
4762 len = comma - next_optstr;
4763 next_optstr = comma + 1;
4764 }
4765 else
4766 {
4767 len = strlen (p);
4768 next_optstr = NULL;
4769 }
4770
4771 /* Recognize no-xxx. */
4772 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4773 {
4774 opt_set_p = false;
4775 p += 3;
4776 len -= 3;
4777 }
4778 else
4779 opt_set_p = true;
4780
4781 /* Find the option. */
4782 ch = *p;
4783 opt = N_OPTS;
4784 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4785 {
4786 type = attrs[i].type;
4787 opt_len = attrs[i].len;
4788 if (ch == attrs[i].string[0]
4789 && ((type != ix86_opt_str && type != ix86_opt_enum)
4790 ? len == opt_len
4791 : len > opt_len)
4792 && memcmp (p, attrs[i].string, opt_len) == 0)
4793 {
4794 opt = attrs[i].opt;
4795 mask = attrs[i].mask;
4796 opt_string = attrs[i].string;
4797 break;
4798 }
4799 }
4800
4801 /* Process the option. */
4802 if (opt == N_OPTS)
4803 {
4804 error ("attribute(target(\"%s\")) is unknown", orig_p);
4805 ret = false;
4806 }
4807
4808 else if (type == ix86_opt_isa)
4809 {
4810 struct cl_decoded_option decoded;
4811
4812 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4813 ix86_handle_option (opts, opts_set,
4814 &decoded, input_location);
4815 }
4816
4817 else if (type == ix86_opt_yes || type == ix86_opt_no)
4818 {
4819 if (type == ix86_opt_no)
4820 opt_set_p = !opt_set_p;
4821
4822 if (opt_set_p)
4823 opts->x_target_flags |= mask;
4824 else
4825 opts->x_target_flags &= ~mask;
4826 }
4827
4828 else if (type == ix86_opt_str)
4829 {
4830 if (p_strings[opt])
4831 {
4832 error ("option(\"%s\") was already specified", opt_string);
4833 ret = false;
4834 }
4835 else
4836 p_strings[opt] = xstrdup (p + opt_len);
4837 }
4838
4839 else if (type == ix86_opt_enum)
4840 {
4841 bool arg_ok;
4842 int value;
4843
4844 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4845 if (arg_ok)
4846 set_option (opts, enum_opts_set, opt, value,
4847 p + opt_len, DK_UNSPECIFIED, input_location,
4848 global_dc);
4849 else
4850 {
4851 error ("attribute(target(\"%s\")) is unknown", orig_p);
4852 ret = false;
4853 }
4854 }
4855
4856 else
4857 gcc_unreachable ();
4858 }
4859
4860 return ret;
4861 }
4862
4863 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4864
4865 tree
4866 ix86_valid_target_attribute_tree (tree args,
4867 struct gcc_options *opts,
4868 struct gcc_options *opts_set)
4869 {
4870 const char *orig_arch_string = opts->x_ix86_arch_string;
4871 const char *orig_tune_string = opts->x_ix86_tune_string;
4872 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4873 int orig_tune_defaulted = ix86_tune_defaulted;
4874 int orig_arch_specified = ix86_arch_specified;
4875 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4876 tree t = NULL_TREE;
4877 int i;
4878 struct cl_target_option *def
4879 = TREE_TARGET_OPTION (target_option_default_node);
4880 struct gcc_options enum_opts_set;
4881
4882 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4883
4884 /* Process each of the options on the chain. */
4885 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4886 opts_set, &enum_opts_set))
4887 return error_mark_node;
4888
4889 /* If the changed options are different from the default, rerun
4890 ix86_option_override_internal, and then save the options away.
4891 The string options are are attribute options, and will be undone
4892 when we copy the save structure. */
4893 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4894 || opts->x_target_flags != def->x_target_flags
4895 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4896 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4897 || enum_opts_set.x_ix86_fpmath)
4898 {
4899 /* If we are using the default tune= or arch=, undo the string assigned,
4900 and use the default. */
4901 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4902 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4903 else if (!orig_arch_specified)
4904 opts->x_ix86_arch_string = NULL;
4905
4906 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4907 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4908 else if (orig_tune_defaulted)
4909 opts->x_ix86_tune_string = NULL;
4910
4911 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4912 if (enum_opts_set.x_ix86_fpmath)
4913 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4914 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4915 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4916 {
4917 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4918 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4919 }
4920
4921 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4922 ix86_option_override_internal (false, opts, opts_set);
4923
4924 /* Add any builtin functions with the new isa if any. */
4925 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4926
4927 /* Save the current options unless we are validating options for
4928 #pragma. */
4929 t = build_target_option_node (opts);
4930
4931 opts->x_ix86_arch_string = orig_arch_string;
4932 opts->x_ix86_tune_string = orig_tune_string;
4933 opts_set->x_ix86_fpmath = orig_fpmath_set;
4934
4935 /* Free up memory allocated to hold the strings */
4936 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4937 free (option_strings[i]);
4938 }
4939
4940 return t;
4941 }
4942
4943 /* Hook to validate attribute((target("string"))). */
4944
4945 static bool
4946 ix86_valid_target_attribute_p (tree fndecl,
4947 tree ARG_UNUSED (name),
4948 tree args,
4949 int ARG_UNUSED (flags))
4950 {
4951 struct gcc_options func_options;
4952 tree new_target, new_optimize;
4953 bool ret = true;
4954
4955 /* attribute((target("default"))) does nothing, beyond
4956 affecting multi-versioning. */
4957 if (TREE_VALUE (args)
4958 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4959 && TREE_CHAIN (args) == NULL_TREE
4960 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4961 return true;
4962
4963 tree old_optimize = build_optimization_node (&global_options);
4964
4965 /* Get the optimization options of the current function. */
4966 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4967
4968 if (!func_optimize)
4969 func_optimize = old_optimize;
4970
4971 /* Init func_options. */
4972 memset (&func_options, 0, sizeof (func_options));
4973 init_options_struct (&func_options, NULL);
4974 lang_hooks.init_options_struct (&func_options);
4975
4976 cl_optimization_restore (&func_options,
4977 TREE_OPTIMIZATION (func_optimize));
4978
4979 /* Initialize func_options to the default before its target options can
4980 be set. */
4981 cl_target_option_restore (&func_options,
4982 TREE_TARGET_OPTION (target_option_default_node));
4983
4984 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4985 &global_options_set);
4986
4987 new_optimize = build_optimization_node (&func_options);
4988
4989 if (new_target == error_mark_node)
4990 ret = false;
4991
4992 else if (fndecl && new_target)
4993 {
4994 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4995
4996 if (old_optimize != new_optimize)
4997 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4998 }
4999
5000 return ret;
5001 }
5002
5003 \f
5004 /* Hook to determine if one function can safely inline another. */
5005
5006 static bool
5007 ix86_can_inline_p (tree caller, tree callee)
5008 {
5009 bool ret = false;
5010 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5011 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5012
5013 /* If callee has no option attributes, then it is ok to inline. */
5014 if (!callee_tree)
5015 ret = true;
5016
5017 /* If caller has no option attributes, but callee does then it is not ok to
5018 inline. */
5019 else if (!caller_tree)
5020 ret = false;
5021
5022 else
5023 {
5024 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5025 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5026
5027 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5028 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5029 function. */
5030 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5031 != callee_opts->x_ix86_isa_flags)
5032 ret = false;
5033
5034 /* See if we have the same non-isa options. */
5035 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5036 ret = false;
5037
5038 /* See if arch, tune, etc. are the same. */
5039 else if (caller_opts->arch != callee_opts->arch)
5040 ret = false;
5041
5042 else if (caller_opts->tune != callee_opts->tune)
5043 ret = false;
5044
5045 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5046 ret = false;
5047
5048 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5049 ret = false;
5050
5051 else
5052 ret = true;
5053 }
5054
5055 return ret;
5056 }
5057
5058 \f
5059 /* Remember the last target of ix86_set_current_function. */
5060 static GTY(()) tree ix86_previous_fndecl;
5061
5062 /* Set target globals to default. */
5063
5064 static void
5065 ix86_reset_to_default_globals (void)
5066 {
5067 tree old_tree = (ix86_previous_fndecl
5068 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5069 : NULL_TREE);
5070
5071 if (old_tree)
5072 {
5073 tree new_tree = target_option_current_node;
5074 cl_target_option_restore (&global_options,
5075 TREE_TARGET_OPTION (new_tree));
5076 if (TREE_TARGET_GLOBALS (new_tree))
5077 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5078 else if (new_tree == target_option_default_node)
5079 restore_target_globals (&default_target_globals);
5080 else
5081 TREE_TARGET_GLOBALS (new_tree)
5082 = save_target_globals_default_opts ();
5083 }
5084 }
5085
5086 /* Invalidate ix86_previous_fndecl cache. */
5087 void
5088 ix86_reset_previous_fndecl (void)
5089 {
5090 ix86_reset_to_default_globals ();
5091 ix86_previous_fndecl = NULL_TREE;
5092 }
5093
5094 /* Establish appropriate back-end context for processing the function
5095 FNDECL. The argument might be NULL to indicate processing at top
5096 level, outside of any function scope. */
5097 static void
5098 ix86_set_current_function (tree fndecl)
5099 {
5100 /* Only change the context if the function changes. This hook is called
5101 several times in the course of compiling a function, and we don't want to
5102 slow things down too much or call target_reinit when it isn't safe. */
5103 if (fndecl && fndecl != ix86_previous_fndecl)
5104 {
5105 tree old_tree = (ix86_previous_fndecl
5106 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5107 : NULL_TREE);
5108
5109 tree new_tree = (fndecl
5110 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5111 : NULL_TREE);
5112
5113 if (old_tree == new_tree)
5114 ;
5115
5116 else if (new_tree && new_tree != target_option_default_node)
5117 {
5118 cl_target_option_restore (&global_options,
5119 TREE_TARGET_OPTION (new_tree));
5120 if (TREE_TARGET_GLOBALS (new_tree))
5121 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5122 else
5123 TREE_TARGET_GLOBALS (new_tree)
5124 = save_target_globals_default_opts ();
5125 }
5126
5127 else if (old_tree && old_tree != target_option_default_node)
5128 ix86_reset_to_default_globals ();
5129 ix86_previous_fndecl = fndecl;
5130 }
5131 }
5132
5133 \f
5134 /* Return true if this goes in large data/bss. */
5135
5136 static bool
5137 ix86_in_large_data_p (tree exp)
5138 {
5139 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5140 return false;
5141
5142 /* Functions are never large data. */
5143 if (TREE_CODE (exp) == FUNCTION_DECL)
5144 return false;
5145
5146 /* Automatic variables are never large data. */
5147 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5148 return false;
5149
5150 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5151 {
5152 const char *section = DECL_SECTION_NAME (exp);
5153 if (strcmp (section, ".ldata") == 0
5154 || strcmp (section, ".lbss") == 0)
5155 return true;
5156 return false;
5157 }
5158 else
5159 {
5160 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5161
5162 /* If this is an incomplete type with size 0, then we can't put it
5163 in data because it might be too big when completed. Also,
5164 int_size_in_bytes returns -1 if size can vary or is larger than
5165 an integer in which case also it is safer to assume that it goes in
5166 large data. */
5167 if (size <= 0 || size > ix86_section_threshold)
5168 return true;
5169 }
5170
5171 return false;
5172 }
5173
5174 /* Switch to the appropriate section for output of DECL.
5175 DECL is either a `VAR_DECL' node or a constant of some sort.
5176 RELOC indicates whether forming the initial value of DECL requires
5177 link-time relocations. */
5178
5179 ATTRIBUTE_UNUSED static section *
5180 x86_64_elf_select_section (tree decl, int reloc,
5181 unsigned HOST_WIDE_INT align)
5182 {
5183 if (ix86_in_large_data_p (decl))
5184 {
5185 const char *sname = NULL;
5186 unsigned int flags = SECTION_WRITE;
5187 switch (categorize_decl_for_section (decl, reloc))
5188 {
5189 case SECCAT_DATA:
5190 sname = ".ldata";
5191 break;
5192 case SECCAT_DATA_REL:
5193 sname = ".ldata.rel";
5194 break;
5195 case SECCAT_DATA_REL_LOCAL:
5196 sname = ".ldata.rel.local";
5197 break;
5198 case SECCAT_DATA_REL_RO:
5199 sname = ".ldata.rel.ro";
5200 break;
5201 case SECCAT_DATA_REL_RO_LOCAL:
5202 sname = ".ldata.rel.ro.local";
5203 break;
5204 case SECCAT_BSS:
5205 sname = ".lbss";
5206 flags |= SECTION_BSS;
5207 break;
5208 case SECCAT_RODATA:
5209 case SECCAT_RODATA_MERGE_STR:
5210 case SECCAT_RODATA_MERGE_STR_INIT:
5211 case SECCAT_RODATA_MERGE_CONST:
5212 sname = ".lrodata";
5213 flags = 0;
5214 break;
5215 case SECCAT_SRODATA:
5216 case SECCAT_SDATA:
5217 case SECCAT_SBSS:
5218 gcc_unreachable ();
5219 case SECCAT_TEXT:
5220 case SECCAT_TDATA:
5221 case SECCAT_TBSS:
5222 /* We don't split these for medium model. Place them into
5223 default sections and hope for best. */
5224 break;
5225 }
5226 if (sname)
5227 {
5228 /* We might get called with string constants, but get_named_section
5229 doesn't like them as they are not DECLs. Also, we need to set
5230 flags in that case. */
5231 if (!DECL_P (decl))
5232 return get_section (sname, flags, NULL);
5233 return get_named_section (decl, sname, reloc);
5234 }
5235 }
5236 return default_elf_select_section (decl, reloc, align);
5237 }
5238
5239 /* Select a set of attributes for section NAME based on the properties
5240 of DECL and whether or not RELOC indicates that DECL's initializer
5241 might contain runtime relocations. */
5242
5243 static unsigned int ATTRIBUTE_UNUSED
5244 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5245 {
5246 unsigned int flags = default_section_type_flags (decl, name, reloc);
5247
5248 if (decl == NULL_TREE
5249 && (strcmp (name, ".ldata.rel.ro") == 0
5250 || strcmp (name, ".ldata.rel.ro.local") == 0))
5251 flags |= SECTION_RELRO;
5252
5253 if (strcmp (name, ".lbss") == 0
5254 || strncmp (name, ".lbss.", 5) == 0
5255 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5256 flags |= SECTION_BSS;
5257
5258 return flags;
5259 }
5260
5261 /* Build up a unique section name, expressed as a
5262 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5263 RELOC indicates whether the initial value of EXP requires
5264 link-time relocations. */
5265
5266 static void ATTRIBUTE_UNUSED
5267 x86_64_elf_unique_section (tree decl, int reloc)
5268 {
5269 if (ix86_in_large_data_p (decl))
5270 {
5271 const char *prefix = NULL;
5272 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5273 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5274
5275 switch (categorize_decl_for_section (decl, reloc))
5276 {
5277 case SECCAT_DATA:
5278 case SECCAT_DATA_REL:
5279 case SECCAT_DATA_REL_LOCAL:
5280 case SECCAT_DATA_REL_RO:
5281 case SECCAT_DATA_REL_RO_LOCAL:
5282 prefix = one_only ? ".ld" : ".ldata";
5283 break;
5284 case SECCAT_BSS:
5285 prefix = one_only ? ".lb" : ".lbss";
5286 break;
5287 case SECCAT_RODATA:
5288 case SECCAT_RODATA_MERGE_STR:
5289 case SECCAT_RODATA_MERGE_STR_INIT:
5290 case SECCAT_RODATA_MERGE_CONST:
5291 prefix = one_only ? ".lr" : ".lrodata";
5292 break;
5293 case SECCAT_SRODATA:
5294 case SECCAT_SDATA:
5295 case SECCAT_SBSS:
5296 gcc_unreachable ();
5297 case SECCAT_TEXT:
5298 case SECCAT_TDATA:
5299 case SECCAT_TBSS:
5300 /* We don't split these for medium model. Place them into
5301 default sections and hope for best. */
5302 break;
5303 }
5304 if (prefix)
5305 {
5306 const char *name, *linkonce;
5307 char *string;
5308
5309 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5310 name = targetm.strip_name_encoding (name);
5311
5312 /* If we're using one_only, then there needs to be a .gnu.linkonce
5313 prefix to the section name. */
5314 linkonce = one_only ? ".gnu.linkonce" : "";
5315
5316 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5317
5318 set_decl_section_name (decl, string);
5319 return;
5320 }
5321 }
5322 default_unique_section (decl, reloc);
5323 }
5324
5325 #ifdef COMMON_ASM_OP
5326 /* This says how to output assembler code to declare an
5327 uninitialized external linkage data object.
5328
5329 For medium model x86-64 we need to use .largecomm opcode for
5330 large objects. */
5331 void
5332 x86_elf_aligned_common (FILE *file,
5333 const char *name, unsigned HOST_WIDE_INT size,
5334 int align)
5335 {
5336 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5337 && size > (unsigned int)ix86_section_threshold)
5338 fputs ("\t.largecomm\t", file);
5339 else
5340 fputs (COMMON_ASM_OP, file);
5341 assemble_name (file, name);
5342 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5343 size, align / BITS_PER_UNIT);
5344 }
5345 #endif
5346
5347 /* Utility function for targets to use in implementing
5348 ASM_OUTPUT_ALIGNED_BSS. */
5349
5350 void
5351 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5352 unsigned HOST_WIDE_INT size, int align)
5353 {
5354 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5355 && size > (unsigned int)ix86_section_threshold)
5356 switch_to_section (get_named_section (decl, ".lbss", 0));
5357 else
5358 switch_to_section (bss_section);
5359 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5360 #ifdef ASM_DECLARE_OBJECT_NAME
5361 last_assemble_variable_decl = decl;
5362 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5363 #else
5364 /* Standard thing is just output label for the object. */
5365 ASM_OUTPUT_LABEL (file, name);
5366 #endif /* ASM_DECLARE_OBJECT_NAME */
5367 ASM_OUTPUT_SKIP (file, size ? size : 1);
5368 }
5369 \f
5370 /* Decide whether we must probe the stack before any space allocation
5371 on this target. It's essentially TARGET_STACK_PROBE except when
5372 -fstack-check causes the stack to be already probed differently. */
5373
5374 bool
5375 ix86_target_stack_probe (void)
5376 {
5377 /* Do not probe the stack twice if static stack checking is enabled. */
5378 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5379 return false;
5380
5381 return TARGET_STACK_PROBE;
5382 }
5383 \f
5384 /* Decide whether we can make a sibling call to a function. DECL is the
5385 declaration of the function being targeted by the call and EXP is the
5386 CALL_EXPR representing the call. */
5387
5388 static bool
5389 ix86_function_ok_for_sibcall (tree decl, tree exp)
5390 {
5391 tree type, decl_or_type;
5392 rtx a, b;
5393
5394 /* If we are generating position-independent code, we cannot sibcall
5395 optimize any indirect call, or a direct call to a global function,
5396 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5397 if (!TARGET_MACHO
5398 && !TARGET_64BIT
5399 && flag_pic
5400 && (!decl || !targetm.binds_local_p (decl)))
5401 return false;
5402
5403 /* If we need to align the outgoing stack, then sibcalling would
5404 unalign the stack, which may break the called function. */
5405 if (ix86_minimum_incoming_stack_boundary (true)
5406 < PREFERRED_STACK_BOUNDARY)
5407 return false;
5408
5409 if (decl)
5410 {
5411 decl_or_type = decl;
5412 type = TREE_TYPE (decl);
5413 }
5414 else
5415 {
5416 /* We're looking at the CALL_EXPR, we need the type of the function. */
5417 type = CALL_EXPR_FN (exp); /* pointer expression */
5418 type = TREE_TYPE (type); /* pointer type */
5419 type = TREE_TYPE (type); /* function type */
5420 decl_or_type = type;
5421 }
5422
5423 /* Check that the return value locations are the same. Like
5424 if we are returning floats on the 80387 register stack, we cannot
5425 make a sibcall from a function that doesn't return a float to a
5426 function that does or, conversely, from a function that does return
5427 a float to a function that doesn't; the necessary stack adjustment
5428 would not be executed. This is also the place we notice
5429 differences in the return value ABI. Note that it is ok for one
5430 of the functions to have void return type as long as the return
5431 value of the other is passed in a register. */
5432 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5433 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5434 cfun->decl, false);
5435 if (STACK_REG_P (a) || STACK_REG_P (b))
5436 {
5437 if (!rtx_equal_p (a, b))
5438 return false;
5439 }
5440 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5441 ;
5442 else if (!rtx_equal_p (a, b))
5443 return false;
5444
5445 if (TARGET_64BIT)
5446 {
5447 /* The SYSV ABI has more call-clobbered registers;
5448 disallow sibcalls from MS to SYSV. */
5449 if (cfun->machine->call_abi == MS_ABI
5450 && ix86_function_type_abi (type) == SYSV_ABI)
5451 return false;
5452 }
5453 else
5454 {
5455 /* If this call is indirect, we'll need to be able to use a
5456 call-clobbered register for the address of the target function.
5457 Make sure that all such registers are not used for passing
5458 parameters. Note that DLLIMPORT functions are indirect. */
5459 if (!decl
5460 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5461 {
5462 if (ix86_function_regparm (type, NULL) >= 3)
5463 {
5464 /* ??? Need to count the actual number of registers to be used,
5465 not the possible number of registers. Fix later. */
5466 return false;
5467 }
5468 }
5469 }
5470
5471 /* Otherwise okay. That also includes certain types of indirect calls. */
5472 return true;
5473 }
5474
5475 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5476 and "sseregparm" calling convention attributes;
5477 arguments as in struct attribute_spec.handler. */
5478
5479 static tree
5480 ix86_handle_cconv_attribute (tree *node, tree name,
5481 tree args,
5482 int,
5483 bool *no_add_attrs)
5484 {
5485 if (TREE_CODE (*node) != FUNCTION_TYPE
5486 && TREE_CODE (*node) != METHOD_TYPE
5487 && TREE_CODE (*node) != FIELD_DECL
5488 && TREE_CODE (*node) != TYPE_DECL)
5489 {
5490 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5491 name);
5492 *no_add_attrs = true;
5493 return NULL_TREE;
5494 }
5495
5496 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5497 if (is_attribute_p ("regparm", name))
5498 {
5499 tree cst;
5500
5501 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5502 {
5503 error ("fastcall and regparm attributes are not compatible");
5504 }
5505
5506 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5507 {
5508 error ("regparam and thiscall attributes are not compatible");
5509 }
5510
5511 cst = TREE_VALUE (args);
5512 if (TREE_CODE (cst) != INTEGER_CST)
5513 {
5514 warning (OPT_Wattributes,
5515 "%qE attribute requires an integer constant argument",
5516 name);
5517 *no_add_attrs = true;
5518 }
5519 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5520 {
5521 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5522 name, REGPARM_MAX);
5523 *no_add_attrs = true;
5524 }
5525
5526 return NULL_TREE;
5527 }
5528
5529 if (TARGET_64BIT)
5530 {
5531 /* Do not warn when emulating the MS ABI. */
5532 if ((TREE_CODE (*node) != FUNCTION_TYPE
5533 && TREE_CODE (*node) != METHOD_TYPE)
5534 || ix86_function_type_abi (*node) != MS_ABI)
5535 warning (OPT_Wattributes, "%qE attribute ignored",
5536 name);
5537 *no_add_attrs = true;
5538 return NULL_TREE;
5539 }
5540
5541 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5542 if (is_attribute_p ("fastcall", name))
5543 {
5544 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5545 {
5546 error ("fastcall and cdecl attributes are not compatible");
5547 }
5548 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5549 {
5550 error ("fastcall and stdcall attributes are not compatible");
5551 }
5552 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5553 {
5554 error ("fastcall and regparm attributes are not compatible");
5555 }
5556 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5557 {
5558 error ("fastcall and thiscall attributes are not compatible");
5559 }
5560 }
5561
5562 /* Can combine stdcall with fastcall (redundant), regparm and
5563 sseregparm. */
5564 else if (is_attribute_p ("stdcall", name))
5565 {
5566 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5567 {
5568 error ("stdcall and cdecl attributes are not compatible");
5569 }
5570 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5571 {
5572 error ("stdcall and fastcall attributes are not compatible");
5573 }
5574 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5575 {
5576 error ("stdcall and thiscall attributes are not compatible");
5577 }
5578 }
5579
5580 /* Can combine cdecl with regparm and sseregparm. */
5581 else if (is_attribute_p ("cdecl", name))
5582 {
5583 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5584 {
5585 error ("stdcall and cdecl attributes are not compatible");
5586 }
5587 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5588 {
5589 error ("fastcall and cdecl attributes are not compatible");
5590 }
5591 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5592 {
5593 error ("cdecl and thiscall attributes are not compatible");
5594 }
5595 }
5596 else if (is_attribute_p ("thiscall", name))
5597 {
5598 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5599 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5600 name);
5601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5602 {
5603 error ("stdcall and thiscall attributes are not compatible");
5604 }
5605 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5606 {
5607 error ("fastcall and thiscall attributes are not compatible");
5608 }
5609 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5610 {
5611 error ("cdecl and thiscall attributes are not compatible");
5612 }
5613 }
5614
5615 /* Can combine sseregparm with all attributes. */
5616
5617 return NULL_TREE;
5618 }
5619
5620 /* The transactional memory builtins are implicitly regparm or fastcall
5621 depending on the ABI. Override the generic do-nothing attribute that
5622 these builtins were declared with, and replace it with one of the two
5623 attributes that we expect elsewhere. */
5624
5625 static tree
5626 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5627 int flags, bool *no_add_attrs)
5628 {
5629 tree alt;
5630
5631 /* In no case do we want to add the placeholder attribute. */
5632 *no_add_attrs = true;
5633
5634 /* The 64-bit ABI is unchanged for transactional memory. */
5635 if (TARGET_64BIT)
5636 return NULL_TREE;
5637
5638 /* ??? Is there a better way to validate 32-bit windows? We have
5639 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5640 if (CHECK_STACK_LIMIT > 0)
5641 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5642 else
5643 {
5644 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5645 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5646 }
5647 decl_attributes (node, alt, flags);
5648
5649 return NULL_TREE;
5650 }
5651
5652 /* This function determines from TYPE the calling-convention. */
5653
5654 unsigned int
5655 ix86_get_callcvt (const_tree type)
5656 {
5657 unsigned int ret = 0;
5658 bool is_stdarg;
5659 tree attrs;
5660
5661 if (TARGET_64BIT)
5662 return IX86_CALLCVT_CDECL;
5663
5664 attrs = TYPE_ATTRIBUTES (type);
5665 if (attrs != NULL_TREE)
5666 {
5667 if (lookup_attribute ("cdecl", attrs))
5668 ret |= IX86_CALLCVT_CDECL;
5669 else if (lookup_attribute ("stdcall", attrs))
5670 ret |= IX86_CALLCVT_STDCALL;
5671 else if (lookup_attribute ("fastcall", attrs))
5672 ret |= IX86_CALLCVT_FASTCALL;
5673 else if (lookup_attribute ("thiscall", attrs))
5674 ret |= IX86_CALLCVT_THISCALL;
5675
5676 /* Regparam isn't allowed for thiscall and fastcall. */
5677 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5678 {
5679 if (lookup_attribute ("regparm", attrs))
5680 ret |= IX86_CALLCVT_REGPARM;
5681 if (lookup_attribute ("sseregparm", attrs))
5682 ret |= IX86_CALLCVT_SSEREGPARM;
5683 }
5684
5685 if (IX86_BASE_CALLCVT(ret) != 0)
5686 return ret;
5687 }
5688
5689 is_stdarg = stdarg_p (type);
5690 if (TARGET_RTD && !is_stdarg)
5691 return IX86_CALLCVT_STDCALL | ret;
5692
5693 if (ret != 0
5694 || is_stdarg
5695 || TREE_CODE (type) != METHOD_TYPE
5696 || ix86_function_type_abi (type) != MS_ABI)
5697 return IX86_CALLCVT_CDECL | ret;
5698
5699 return IX86_CALLCVT_THISCALL;
5700 }
5701
5702 /* Return 0 if the attributes for two types are incompatible, 1 if they
5703 are compatible, and 2 if they are nearly compatible (which causes a
5704 warning to be generated). */
5705
5706 static int
5707 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5708 {
5709 unsigned int ccvt1, ccvt2;
5710
5711 if (TREE_CODE (type1) != FUNCTION_TYPE
5712 && TREE_CODE (type1) != METHOD_TYPE)
5713 return 1;
5714
5715 ccvt1 = ix86_get_callcvt (type1);
5716 ccvt2 = ix86_get_callcvt (type2);
5717 if (ccvt1 != ccvt2)
5718 return 0;
5719 if (ix86_function_regparm (type1, NULL)
5720 != ix86_function_regparm (type2, NULL))
5721 return 0;
5722
5723 return 1;
5724 }
5725 \f
5726 /* Return the regparm value for a function with the indicated TYPE and DECL.
5727 DECL may be NULL when calling function indirectly
5728 or considering a libcall. */
5729
5730 static int
5731 ix86_function_regparm (const_tree type, const_tree decl)
5732 {
5733 tree attr;
5734 int regparm;
5735 unsigned int ccvt;
5736
5737 if (TARGET_64BIT)
5738 return (ix86_function_type_abi (type) == SYSV_ABI
5739 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5740 ccvt = ix86_get_callcvt (type);
5741 regparm = ix86_regparm;
5742
5743 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5744 {
5745 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5746 if (attr)
5747 {
5748 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5749 return regparm;
5750 }
5751 }
5752 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5753 return 2;
5754 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5755 return 1;
5756
5757 /* Use register calling convention for local functions when possible. */
5758 if (decl
5759 && TREE_CODE (decl) == FUNCTION_DECL
5760 /* Caller and callee must agree on the calling convention, so
5761 checking here just optimize means that with
5762 __attribute__((optimize (...))) caller could use regparm convention
5763 and callee not, or vice versa. Instead look at whether the callee
5764 is optimized or not. */
5765 && opt_for_fn (decl, optimize)
5766 && !(profile_flag && !flag_fentry))
5767 {
5768 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5769 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5770 if (i && i->local && i->can_change_signature)
5771 {
5772 int local_regparm, globals = 0, regno;
5773
5774 /* Make sure no regparm register is taken by a
5775 fixed register variable. */
5776 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5777 if (fixed_regs[local_regparm])
5778 break;
5779
5780 /* We don't want to use regparm(3) for nested functions as
5781 these use a static chain pointer in the third argument. */
5782 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5783 local_regparm = 2;
5784
5785 /* In 32-bit mode save a register for the split stack. */
5786 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5787 local_regparm = 2;
5788
5789 /* Each fixed register usage increases register pressure,
5790 so less registers should be used for argument passing.
5791 This functionality can be overriden by an explicit
5792 regparm value. */
5793 for (regno = AX_REG; regno <= DI_REG; regno++)
5794 if (fixed_regs[regno])
5795 globals++;
5796
5797 local_regparm
5798 = globals < local_regparm ? local_regparm - globals : 0;
5799
5800 if (local_regparm > regparm)
5801 regparm = local_regparm;
5802 }
5803 }
5804
5805 return regparm;
5806 }
5807
5808 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5809 DFmode (2) arguments in SSE registers for a function with the
5810 indicated TYPE and DECL. DECL may be NULL when calling function
5811 indirectly or considering a libcall. Otherwise return 0. */
5812
5813 static int
5814 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5815 {
5816 gcc_assert (!TARGET_64BIT);
5817
5818 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5819 by the sseregparm attribute. */
5820 if (TARGET_SSEREGPARM
5821 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5822 {
5823 if (!TARGET_SSE)
5824 {
5825 if (warn)
5826 {
5827 if (decl)
5828 error ("calling %qD with attribute sseregparm without "
5829 "SSE/SSE2 enabled", decl);
5830 else
5831 error ("calling %qT with attribute sseregparm without "
5832 "SSE/SSE2 enabled", type);
5833 }
5834 return 0;
5835 }
5836
5837 return 2;
5838 }
5839
5840 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5841 (and DFmode for SSE2) arguments in SSE registers. */
5842 if (decl && TARGET_SSE_MATH && optimize
5843 && !(profile_flag && !flag_fentry))
5844 {
5845 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5846 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5847 if (i && i->local && i->can_change_signature)
5848 return TARGET_SSE2 ? 2 : 1;
5849 }
5850
5851 return 0;
5852 }
5853
5854 /* Return true if EAX is live at the start of the function. Used by
5855 ix86_expand_prologue to determine if we need special help before
5856 calling allocate_stack_worker. */
5857
5858 static bool
5859 ix86_eax_live_at_start_p (void)
5860 {
5861 /* Cheat. Don't bother working forward from ix86_function_regparm
5862 to the function type to whether an actual argument is located in
5863 eax. Instead just look at cfg info, which is still close enough
5864 to correct at this point. This gives false positives for broken
5865 functions that might use uninitialized data that happens to be
5866 allocated in eax, but who cares? */
5867 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5868 }
5869
5870 static bool
5871 ix86_keep_aggregate_return_pointer (tree fntype)
5872 {
5873 tree attr;
5874
5875 if (!TARGET_64BIT)
5876 {
5877 attr = lookup_attribute ("callee_pop_aggregate_return",
5878 TYPE_ATTRIBUTES (fntype));
5879 if (attr)
5880 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5881
5882 /* For 32-bit MS-ABI the default is to keep aggregate
5883 return pointer. */
5884 if (ix86_function_type_abi (fntype) == MS_ABI)
5885 return true;
5886 }
5887 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5888 }
5889
5890 /* Value is the number of bytes of arguments automatically
5891 popped when returning from a subroutine call.
5892 FUNDECL is the declaration node of the function (as a tree),
5893 FUNTYPE is the data type of the function (as a tree),
5894 or for a library call it is an identifier node for the subroutine name.
5895 SIZE is the number of bytes of arguments passed on the stack.
5896
5897 On the 80386, the RTD insn may be used to pop them if the number
5898 of args is fixed, but if the number is variable then the caller
5899 must pop them all. RTD can't be used for library calls now
5900 because the library is compiled with the Unix compiler.
5901 Use of RTD is a selectable option, since it is incompatible with
5902 standard Unix calling sequences. If the option is not selected,
5903 the caller must always pop the args.
5904
5905 The attribute stdcall is equivalent to RTD on a per module basis. */
5906
5907 static int
5908 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5909 {
5910 unsigned int ccvt;
5911
5912 /* None of the 64-bit ABIs pop arguments. */
5913 if (TARGET_64BIT)
5914 return 0;
5915
5916 ccvt = ix86_get_callcvt (funtype);
5917
5918 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5919 | IX86_CALLCVT_THISCALL)) != 0
5920 && ! stdarg_p (funtype))
5921 return size;
5922
5923 /* Lose any fake structure return argument if it is passed on the stack. */
5924 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5925 && !ix86_keep_aggregate_return_pointer (funtype))
5926 {
5927 int nregs = ix86_function_regparm (funtype, fundecl);
5928 if (nregs == 0)
5929 return GET_MODE_SIZE (Pmode);
5930 }
5931
5932 return 0;
5933 }
5934
5935 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5936
5937 static bool
5938 ix86_legitimate_combined_insn (rtx_insn *insn)
5939 {
5940 /* Check operand constraints in case hard registers were propagated
5941 into insn pattern. This check prevents combine pass from
5942 generating insn patterns with invalid hard register operands.
5943 These invalid insns can eventually confuse reload to error out
5944 with a spill failure. See also PRs 46829 and 46843. */
5945 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5946 {
5947 int i;
5948
5949 extract_insn (insn);
5950 preprocess_constraints (insn);
5951
5952 int n_operands = recog_data.n_operands;
5953 int n_alternatives = recog_data.n_alternatives;
5954 for (i = 0; i < n_operands; i++)
5955 {
5956 rtx op = recog_data.operand[i];
5957 machine_mode mode = GET_MODE (op);
5958 const operand_alternative *op_alt;
5959 int offset = 0;
5960 bool win;
5961 int j;
5962
5963 /* For pre-AVX disallow unaligned loads/stores where the
5964 instructions don't support it. */
5965 if (!TARGET_AVX
5966 && VECTOR_MODE_P (GET_MODE (op))
5967 && misaligned_operand (op, GET_MODE (op)))
5968 {
5969 int min_align = get_attr_ssememalign (insn);
5970 if (min_align == 0)
5971 return false;
5972 }
5973
5974 /* A unary operator may be accepted by the predicate, but it
5975 is irrelevant for matching constraints. */
5976 if (UNARY_P (op))
5977 op = XEXP (op, 0);
5978
5979 if (GET_CODE (op) == SUBREG)
5980 {
5981 if (REG_P (SUBREG_REG (op))
5982 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5983 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5984 GET_MODE (SUBREG_REG (op)),
5985 SUBREG_BYTE (op),
5986 GET_MODE (op));
5987 op = SUBREG_REG (op);
5988 }
5989
5990 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5991 continue;
5992
5993 op_alt = recog_op_alt;
5994
5995 /* Operand has no constraints, anything is OK. */
5996 win = !n_alternatives;
5997
5998 alternative_mask preferred = get_preferred_alternatives (insn);
5999 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6000 {
6001 if (!TEST_BIT (preferred, j))
6002 continue;
6003 if (op_alt[i].anything_ok
6004 || (op_alt[i].matches != -1
6005 && operands_match_p
6006 (recog_data.operand[i],
6007 recog_data.operand[op_alt[i].matches]))
6008 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6009 {
6010 win = true;
6011 break;
6012 }
6013 }
6014
6015 if (!win)
6016 return false;
6017 }
6018 }
6019
6020 return true;
6021 }
6022 \f
6023 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6024
6025 static unsigned HOST_WIDE_INT
6026 ix86_asan_shadow_offset (void)
6027 {
6028 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6029 : HOST_WIDE_INT_C (0x7fff8000))
6030 : (HOST_WIDE_INT_1 << 29);
6031 }
6032 \f
6033 /* Argument support functions. */
6034
6035 /* Return true when register may be used to pass function parameters. */
6036 bool
6037 ix86_function_arg_regno_p (int regno)
6038 {
6039 int i;
6040 const int *parm_regs;
6041
6042 if (!TARGET_64BIT)
6043 {
6044 if (TARGET_MACHO)
6045 return (regno < REGPARM_MAX
6046 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6047 else
6048 return (regno < REGPARM_MAX
6049 || (TARGET_MMX && MMX_REGNO_P (regno)
6050 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6051 || (TARGET_SSE && SSE_REGNO_P (regno)
6052 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6053 }
6054
6055 if (TARGET_SSE && SSE_REGNO_P (regno)
6056 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6057 return true;
6058
6059 /* TODO: The function should depend on current function ABI but
6060 builtins.c would need updating then. Therefore we use the
6061 default ABI. */
6062
6063 /* RAX is used as hidden argument to va_arg functions. */
6064 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6065 return true;
6066
6067 if (ix86_abi == MS_ABI)
6068 parm_regs = x86_64_ms_abi_int_parameter_registers;
6069 else
6070 parm_regs = x86_64_int_parameter_registers;
6071 for (i = 0; i < (ix86_abi == MS_ABI
6072 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6073 if (regno == parm_regs[i])
6074 return true;
6075 return false;
6076 }
6077
6078 /* Return if we do not know how to pass TYPE solely in registers. */
6079
6080 static bool
6081 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6082 {
6083 if (must_pass_in_stack_var_size_or_pad (mode, type))
6084 return true;
6085
6086 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6087 The layout_type routine is crafty and tries to trick us into passing
6088 currently unsupported vector types on the stack by using TImode. */
6089 return (!TARGET_64BIT && mode == TImode
6090 && type && TREE_CODE (type) != VECTOR_TYPE);
6091 }
6092
6093 /* It returns the size, in bytes, of the area reserved for arguments passed
6094 in registers for the function represented by fndecl dependent to the used
6095 abi format. */
6096 int
6097 ix86_reg_parm_stack_space (const_tree fndecl)
6098 {
6099 enum calling_abi call_abi = SYSV_ABI;
6100 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6101 call_abi = ix86_function_abi (fndecl);
6102 else
6103 call_abi = ix86_function_type_abi (fndecl);
6104 if (TARGET_64BIT && call_abi == MS_ABI)
6105 return 32;
6106 return 0;
6107 }
6108
6109 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6110 call abi used. */
6111 enum calling_abi
6112 ix86_function_type_abi (const_tree fntype)
6113 {
6114 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6115 {
6116 enum calling_abi abi = ix86_abi;
6117 if (abi == SYSV_ABI)
6118 {
6119 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6120 abi = MS_ABI;
6121 }
6122 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6123 abi = SYSV_ABI;
6124 return abi;
6125 }
6126 return ix86_abi;
6127 }
6128
6129 /* We add this as a workaround in order to use libc_has_function
6130 hook in i386.md. */
6131 bool
6132 ix86_libc_has_function (enum function_class fn_class)
6133 {
6134 return targetm.libc_has_function (fn_class);
6135 }
6136
6137 static bool
6138 ix86_function_ms_hook_prologue (const_tree fn)
6139 {
6140 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6141 {
6142 if (decl_function_context (fn) != NULL_TREE)
6143 error_at (DECL_SOURCE_LOCATION (fn),
6144 "ms_hook_prologue is not compatible with nested function");
6145 else
6146 return true;
6147 }
6148 return false;
6149 }
6150
6151 static enum calling_abi
6152 ix86_function_abi (const_tree fndecl)
6153 {
6154 if (! fndecl)
6155 return ix86_abi;
6156 return ix86_function_type_abi (TREE_TYPE (fndecl));
6157 }
6158
6159 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6160 call abi used. */
6161 enum calling_abi
6162 ix86_cfun_abi (void)
6163 {
6164 if (! cfun)
6165 return ix86_abi;
6166 return cfun->machine->call_abi;
6167 }
6168
6169 /* Write the extra assembler code needed to declare a function properly. */
6170
6171 void
6172 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6173 tree decl)
6174 {
6175 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6176
6177 if (is_ms_hook)
6178 {
6179 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6180 unsigned int filler_cc = 0xcccccccc;
6181
6182 for (i = 0; i < filler_count; i += 4)
6183 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6184 }
6185
6186 #ifdef SUBTARGET_ASM_UNWIND_INIT
6187 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6188 #endif
6189
6190 ASM_OUTPUT_LABEL (asm_out_file, fname);
6191
6192 /* Output magic byte marker, if hot-patch attribute is set. */
6193 if (is_ms_hook)
6194 {
6195 if (TARGET_64BIT)
6196 {
6197 /* leaq [%rsp + 0], %rsp */
6198 asm_fprintf (asm_out_file, ASM_BYTE
6199 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6200 }
6201 else
6202 {
6203 /* movl.s %edi, %edi
6204 push %ebp
6205 movl.s %esp, %ebp */
6206 asm_fprintf (asm_out_file, ASM_BYTE
6207 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6208 }
6209 }
6210 }
6211
6212 /* regclass.c */
6213 extern void init_regs (void);
6214
6215 /* Implementation of call abi switching target hook. Specific to FNDECL
6216 the specific call register sets are set. See also
6217 ix86_conditional_register_usage for more details. */
6218 void
6219 ix86_call_abi_override (const_tree fndecl)
6220 {
6221 if (fndecl == NULL_TREE)
6222 cfun->machine->call_abi = ix86_abi;
6223 else
6224 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6225 }
6226
6227 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6228 expensive re-initialization of init_regs each time we switch function context
6229 since this is needed only during RTL expansion. */
6230 static void
6231 ix86_maybe_switch_abi (void)
6232 {
6233 if (TARGET_64BIT &&
6234 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6235 reinit_regs ();
6236 }
6237
6238 /* Return 1 if pseudo register should be created and used to hold
6239 GOT address for PIC code. */
6240 static bool
6241 ix86_use_pseudo_pic_reg (void)
6242 {
6243 if ((TARGET_64BIT
6244 && (ix86_cmodel == CM_SMALL_PIC
6245 || TARGET_PECOFF))
6246 || !flag_pic)
6247 return false;
6248 return true;
6249 }
6250
6251 /* Initialize large model PIC register. */
6252
6253 static void
6254 ix86_init_large_pic_reg (unsigned int tmp_regno)
6255 {
6256 rtx_code_label *label;
6257 rtx tmp_reg;
6258
6259 gcc_assert (Pmode == DImode);
6260 label = gen_label_rtx ();
6261 emit_label (label);
6262 LABEL_PRESERVE_P (label) = 1;
6263 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6264 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6265 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6266 label));
6267 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6268 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6269 pic_offset_table_rtx, tmp_reg));
6270 }
6271
6272 /* Create and initialize PIC register if required. */
6273 static void
6274 ix86_init_pic_reg (void)
6275 {
6276 edge entry_edge;
6277 rtx_insn *seq;
6278
6279 if (!ix86_use_pseudo_pic_reg ())
6280 return;
6281
6282 start_sequence ();
6283
6284 if (TARGET_64BIT)
6285 {
6286 if (ix86_cmodel == CM_LARGE_PIC)
6287 ix86_init_large_pic_reg (R11_REG);
6288 else
6289 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6290 }
6291 else
6292 {
6293 /* If there is future mcount call in the function it is more profitable
6294 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6295 rtx reg = crtl->profile
6296 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6297 : pic_offset_table_rtx;
6298 rtx insn = emit_insn (gen_set_got (reg));
6299 RTX_FRAME_RELATED_P (insn) = 1;
6300 if (crtl->profile)
6301 emit_move_insn (pic_offset_table_rtx, reg);
6302 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6303 }
6304
6305 seq = get_insns ();
6306 end_sequence ();
6307
6308 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6309 insert_insn_on_edge (seq, entry_edge);
6310 commit_one_edge_insertion (entry_edge);
6311 }
6312
6313 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6314 for a call to a function whose data type is FNTYPE.
6315 For a library call, FNTYPE is 0. */
6316
6317 void
6318 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6319 tree fntype, /* tree ptr for function decl */
6320 rtx libname, /* SYMBOL_REF of library name or 0 */
6321 tree fndecl,
6322 int caller)
6323 {
6324 struct cgraph_local_info *i;
6325
6326 memset (cum, 0, sizeof (*cum));
6327
6328 if (fndecl)
6329 {
6330 i = cgraph_node::local_info (fndecl);
6331 cum->call_abi = ix86_function_abi (fndecl);
6332 }
6333 else
6334 {
6335 i = NULL;
6336 cum->call_abi = ix86_function_type_abi (fntype);
6337 }
6338
6339 cum->caller = caller;
6340
6341 /* Set up the number of registers to use for passing arguments. */
6342 cum->nregs = ix86_regparm;
6343 if (TARGET_64BIT)
6344 {
6345 cum->nregs = (cum->call_abi == SYSV_ABI
6346 ? X86_64_REGPARM_MAX
6347 : X86_64_MS_REGPARM_MAX);
6348 }
6349 if (TARGET_SSE)
6350 {
6351 cum->sse_nregs = SSE_REGPARM_MAX;
6352 if (TARGET_64BIT)
6353 {
6354 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6355 ? X86_64_SSE_REGPARM_MAX
6356 : X86_64_MS_SSE_REGPARM_MAX);
6357 }
6358 }
6359 if (TARGET_MMX)
6360 cum->mmx_nregs = MMX_REGPARM_MAX;
6361 cum->warn_avx512f = true;
6362 cum->warn_avx = true;
6363 cum->warn_sse = true;
6364 cum->warn_mmx = true;
6365
6366 /* Because type might mismatch in between caller and callee, we need to
6367 use actual type of function for local calls.
6368 FIXME: cgraph_analyze can be told to actually record if function uses
6369 va_start so for local functions maybe_vaarg can be made aggressive
6370 helping K&R code.
6371 FIXME: once typesytem is fixed, we won't need this code anymore. */
6372 if (i && i->local && i->can_change_signature)
6373 fntype = TREE_TYPE (fndecl);
6374 cum->stdarg = stdarg_p (fntype);
6375 cum->maybe_vaarg = (fntype
6376 ? (!prototype_p (fntype) || stdarg_p (fntype))
6377 : !libname);
6378
6379 cum->bnd_regno = FIRST_BND_REG;
6380 cum->bnds_in_bt = 0;
6381 cum->force_bnd_pass = 0;
6382
6383 if (!TARGET_64BIT)
6384 {
6385 /* If there are variable arguments, then we won't pass anything
6386 in registers in 32-bit mode. */
6387 if (stdarg_p (fntype))
6388 {
6389 cum->nregs = 0;
6390 cum->sse_nregs = 0;
6391 cum->mmx_nregs = 0;
6392 cum->warn_avx512f = false;
6393 cum->warn_avx = false;
6394 cum->warn_sse = false;
6395 cum->warn_mmx = false;
6396 return;
6397 }
6398
6399 /* Use ecx and edx registers if function has fastcall attribute,
6400 else look for regparm information. */
6401 if (fntype)
6402 {
6403 unsigned int ccvt = ix86_get_callcvt (fntype);
6404 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6405 {
6406 cum->nregs = 1;
6407 cum->fastcall = 1; /* Same first register as in fastcall. */
6408 }
6409 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6410 {
6411 cum->nregs = 2;
6412 cum->fastcall = 1;
6413 }
6414 else
6415 cum->nregs = ix86_function_regparm (fntype, fndecl);
6416 }
6417
6418 /* Set up the number of SSE registers used for passing SFmode
6419 and DFmode arguments. Warn for mismatching ABI. */
6420 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6421 }
6422 }
6423
6424 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6425 But in the case of vector types, it is some vector mode.
6426
6427 When we have only some of our vector isa extensions enabled, then there
6428 are some modes for which vector_mode_supported_p is false. For these
6429 modes, the generic vector support in gcc will choose some non-vector mode
6430 in order to implement the type. By computing the natural mode, we'll
6431 select the proper ABI location for the operand and not depend on whatever
6432 the middle-end decides to do with these vector types.
6433
6434 The midde-end can't deal with the vector types > 16 bytes. In this
6435 case, we return the original mode and warn ABI change if CUM isn't
6436 NULL.
6437
6438 If INT_RETURN is true, warn ABI change if the vector mode isn't
6439 available for function return value. */
6440
6441 static machine_mode
6442 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6443 bool in_return)
6444 {
6445 machine_mode mode = TYPE_MODE (type);
6446
6447 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6448 {
6449 HOST_WIDE_INT size = int_size_in_bytes (type);
6450 if ((size == 8 || size == 16 || size == 32 || size == 64)
6451 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6452 && TYPE_VECTOR_SUBPARTS (type) > 1)
6453 {
6454 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6455
6456 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6457 mode = MIN_MODE_VECTOR_FLOAT;
6458 else
6459 mode = MIN_MODE_VECTOR_INT;
6460
6461 /* Get the mode which has this inner mode and number of units. */
6462 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6463 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6464 && GET_MODE_INNER (mode) == innermode)
6465 {
6466 if (size == 64 && !TARGET_AVX512F)
6467 {
6468 static bool warnedavx512f;
6469 static bool warnedavx512f_ret;
6470
6471 if (cum && cum->warn_avx512f && !warnedavx512f)
6472 {
6473 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6474 "without AVX512F enabled changes the ABI"))
6475 warnedavx512f = true;
6476 }
6477 else if (in_return && !warnedavx512f_ret)
6478 {
6479 if (warning (OPT_Wpsabi, "AVX512F vector return "
6480 "without AVX512F enabled changes the ABI"))
6481 warnedavx512f_ret = true;
6482 }
6483
6484 return TYPE_MODE (type);
6485 }
6486 else if (size == 32 && !TARGET_AVX)
6487 {
6488 static bool warnedavx;
6489 static bool warnedavx_ret;
6490
6491 if (cum && cum->warn_avx && !warnedavx)
6492 {
6493 if (warning (OPT_Wpsabi, "AVX vector argument "
6494 "without AVX enabled changes the ABI"))
6495 warnedavx = true;
6496 }
6497 else if (in_return && !warnedavx_ret)
6498 {
6499 if (warning (OPT_Wpsabi, "AVX vector return "
6500 "without AVX enabled changes the ABI"))
6501 warnedavx_ret = true;
6502 }
6503
6504 return TYPE_MODE (type);
6505 }
6506 else if (((size == 8 && TARGET_64BIT) || size == 16)
6507 && !TARGET_SSE)
6508 {
6509 static bool warnedsse;
6510 static bool warnedsse_ret;
6511
6512 if (cum && cum->warn_sse && !warnedsse)
6513 {
6514 if (warning (OPT_Wpsabi, "SSE vector argument "
6515 "without SSE enabled changes the ABI"))
6516 warnedsse = true;
6517 }
6518 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6519 {
6520 if (warning (OPT_Wpsabi, "SSE vector return "
6521 "without SSE enabled changes the ABI"))
6522 warnedsse_ret = true;
6523 }
6524 }
6525 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6526 {
6527 static bool warnedmmx;
6528 static bool warnedmmx_ret;
6529
6530 if (cum && cum->warn_mmx && !warnedmmx)
6531 {
6532 if (warning (OPT_Wpsabi, "MMX vector argument "
6533 "without MMX enabled changes the ABI"))
6534 warnedmmx = true;
6535 }
6536 else if (in_return && !warnedmmx_ret)
6537 {
6538 if (warning (OPT_Wpsabi, "MMX vector return "
6539 "without MMX enabled changes the ABI"))
6540 warnedmmx_ret = true;
6541 }
6542 }
6543 return mode;
6544 }
6545
6546 gcc_unreachable ();
6547 }
6548 }
6549
6550 return mode;
6551 }
6552
6553 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6554 this may not agree with the mode that the type system has chosen for the
6555 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6556 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6557
6558 static rtx
6559 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6560 unsigned int regno)
6561 {
6562 rtx tmp;
6563
6564 if (orig_mode != BLKmode)
6565 tmp = gen_rtx_REG (orig_mode, regno);
6566 else
6567 {
6568 tmp = gen_rtx_REG (mode, regno);
6569 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6570 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6571 }
6572
6573 return tmp;
6574 }
6575
6576 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6577 of this code is to classify each 8bytes of incoming argument by the register
6578 class and assign registers accordingly. */
6579
6580 /* Return the union class of CLASS1 and CLASS2.
6581 See the x86-64 PS ABI for details. */
6582
6583 static enum x86_64_reg_class
6584 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6585 {
6586 /* Rule #1: If both classes are equal, this is the resulting class. */
6587 if (class1 == class2)
6588 return class1;
6589
6590 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6591 the other class. */
6592 if (class1 == X86_64_NO_CLASS)
6593 return class2;
6594 if (class2 == X86_64_NO_CLASS)
6595 return class1;
6596
6597 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6598 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6599 return X86_64_MEMORY_CLASS;
6600
6601 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6602 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6603 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6604 return X86_64_INTEGERSI_CLASS;
6605 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6606 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6607 return X86_64_INTEGER_CLASS;
6608
6609 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6610 MEMORY is used. */
6611 if (class1 == X86_64_X87_CLASS
6612 || class1 == X86_64_X87UP_CLASS
6613 || class1 == X86_64_COMPLEX_X87_CLASS
6614 || class2 == X86_64_X87_CLASS
6615 || class2 == X86_64_X87UP_CLASS
6616 || class2 == X86_64_COMPLEX_X87_CLASS)
6617 return X86_64_MEMORY_CLASS;
6618
6619 /* Rule #6: Otherwise class SSE is used. */
6620 return X86_64_SSE_CLASS;
6621 }
6622
6623 /* Classify the argument of type TYPE and mode MODE.
6624 CLASSES will be filled by the register class used to pass each word
6625 of the operand. The number of words is returned. In case the parameter
6626 should be passed in memory, 0 is returned. As a special case for zero
6627 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6628
6629 BIT_OFFSET is used internally for handling records and specifies offset
6630 of the offset in bits modulo 512 to avoid overflow cases.
6631
6632 See the x86-64 PS ABI for details.
6633 */
6634
6635 static int
6636 classify_argument (machine_mode mode, const_tree type,
6637 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6638 {
6639 HOST_WIDE_INT bytes =
6640 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6641 int words
6642 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6643
6644 /* Variable sized entities are always passed/returned in memory. */
6645 if (bytes < 0)
6646 return 0;
6647
6648 if (mode != VOIDmode
6649 && targetm.calls.must_pass_in_stack (mode, type))
6650 return 0;
6651
6652 if (type && AGGREGATE_TYPE_P (type))
6653 {
6654 int i;
6655 tree field;
6656 enum x86_64_reg_class subclasses[MAX_CLASSES];
6657
6658 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6659 if (bytes > 64)
6660 return 0;
6661
6662 for (i = 0; i < words; i++)
6663 classes[i] = X86_64_NO_CLASS;
6664
6665 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6666 signalize memory class, so handle it as special case. */
6667 if (!words)
6668 {
6669 classes[0] = X86_64_NO_CLASS;
6670 return 1;
6671 }
6672
6673 /* Classify each field of record and merge classes. */
6674 switch (TREE_CODE (type))
6675 {
6676 case RECORD_TYPE:
6677 /* And now merge the fields of structure. */
6678 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6679 {
6680 if (TREE_CODE (field) == FIELD_DECL)
6681 {
6682 int num;
6683
6684 if (TREE_TYPE (field) == error_mark_node)
6685 continue;
6686
6687 /* Bitfields are always classified as integer. Handle them
6688 early, since later code would consider them to be
6689 misaligned integers. */
6690 if (DECL_BIT_FIELD (field))
6691 {
6692 for (i = (int_bit_position (field)
6693 + (bit_offset % 64)) / 8 / 8;
6694 i < ((int_bit_position (field) + (bit_offset % 64))
6695 + tree_to_shwi (DECL_SIZE (field))
6696 + 63) / 8 / 8; i++)
6697 classes[i] =
6698 merge_classes (X86_64_INTEGER_CLASS,
6699 classes[i]);
6700 }
6701 else
6702 {
6703 int pos;
6704
6705 type = TREE_TYPE (field);
6706
6707 /* Flexible array member is ignored. */
6708 if (TYPE_MODE (type) == BLKmode
6709 && TREE_CODE (type) == ARRAY_TYPE
6710 && TYPE_SIZE (type) == NULL_TREE
6711 && TYPE_DOMAIN (type) != NULL_TREE
6712 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6713 == NULL_TREE))
6714 {
6715 static bool warned;
6716
6717 if (!warned && warn_psabi)
6718 {
6719 warned = true;
6720 inform (input_location,
6721 "the ABI of passing struct with"
6722 " a flexible array member has"
6723 " changed in GCC 4.4");
6724 }
6725 continue;
6726 }
6727 num = classify_argument (TYPE_MODE (type), type,
6728 subclasses,
6729 (int_bit_position (field)
6730 + bit_offset) % 512);
6731 if (!num)
6732 return 0;
6733 pos = (int_bit_position (field)
6734 + (bit_offset % 64)) / 8 / 8;
6735 for (i = 0; i < num && (i + pos) < words; i++)
6736 classes[i + pos] =
6737 merge_classes (subclasses[i], classes[i + pos]);
6738 }
6739 }
6740 }
6741 break;
6742
6743 case ARRAY_TYPE:
6744 /* Arrays are handled as small records. */
6745 {
6746 int num;
6747 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6748 TREE_TYPE (type), subclasses, bit_offset);
6749 if (!num)
6750 return 0;
6751
6752 /* The partial classes are now full classes. */
6753 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6754 subclasses[0] = X86_64_SSE_CLASS;
6755 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6756 && !((bit_offset % 64) == 0 && bytes == 4))
6757 subclasses[0] = X86_64_INTEGER_CLASS;
6758
6759 for (i = 0; i < words; i++)
6760 classes[i] = subclasses[i % num];
6761
6762 break;
6763 }
6764 case UNION_TYPE:
6765 case QUAL_UNION_TYPE:
6766 /* Unions are similar to RECORD_TYPE but offset is always 0.
6767 */
6768 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6769 {
6770 if (TREE_CODE (field) == FIELD_DECL)
6771 {
6772 int num;
6773
6774 if (TREE_TYPE (field) == error_mark_node)
6775 continue;
6776
6777 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6778 TREE_TYPE (field), subclasses,
6779 bit_offset);
6780 if (!num)
6781 return 0;
6782 for (i = 0; i < num && i < words; i++)
6783 classes[i] = merge_classes (subclasses[i], classes[i]);
6784 }
6785 }
6786 break;
6787
6788 default:
6789 gcc_unreachable ();
6790 }
6791
6792 if (words > 2)
6793 {
6794 /* When size > 16 bytes, if the first one isn't
6795 X86_64_SSE_CLASS or any other ones aren't
6796 X86_64_SSEUP_CLASS, everything should be passed in
6797 memory. */
6798 if (classes[0] != X86_64_SSE_CLASS)
6799 return 0;
6800
6801 for (i = 1; i < words; i++)
6802 if (classes[i] != X86_64_SSEUP_CLASS)
6803 return 0;
6804 }
6805
6806 /* Final merger cleanup. */
6807 for (i = 0; i < words; i++)
6808 {
6809 /* If one class is MEMORY, everything should be passed in
6810 memory. */
6811 if (classes[i] == X86_64_MEMORY_CLASS)
6812 return 0;
6813
6814 /* The X86_64_SSEUP_CLASS should be always preceded by
6815 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6816 if (classes[i] == X86_64_SSEUP_CLASS
6817 && classes[i - 1] != X86_64_SSE_CLASS
6818 && classes[i - 1] != X86_64_SSEUP_CLASS)
6819 {
6820 /* The first one should never be X86_64_SSEUP_CLASS. */
6821 gcc_assert (i != 0);
6822 classes[i] = X86_64_SSE_CLASS;
6823 }
6824
6825 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6826 everything should be passed in memory. */
6827 if (classes[i] == X86_64_X87UP_CLASS
6828 && (classes[i - 1] != X86_64_X87_CLASS))
6829 {
6830 static bool warned;
6831
6832 /* The first one should never be X86_64_X87UP_CLASS. */
6833 gcc_assert (i != 0);
6834 if (!warned && warn_psabi)
6835 {
6836 warned = true;
6837 inform (input_location,
6838 "the ABI of passing union with long double"
6839 " has changed in GCC 4.4");
6840 }
6841 return 0;
6842 }
6843 }
6844 return words;
6845 }
6846
6847 /* Compute alignment needed. We align all types to natural boundaries with
6848 exception of XFmode that is aligned to 64bits. */
6849 if (mode != VOIDmode && mode != BLKmode)
6850 {
6851 int mode_alignment = GET_MODE_BITSIZE (mode);
6852
6853 if (mode == XFmode)
6854 mode_alignment = 128;
6855 else if (mode == XCmode)
6856 mode_alignment = 256;
6857 if (COMPLEX_MODE_P (mode))
6858 mode_alignment /= 2;
6859 /* Misaligned fields are always returned in memory. */
6860 if (bit_offset % mode_alignment)
6861 return 0;
6862 }
6863
6864 /* for V1xx modes, just use the base mode */
6865 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6866 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6867 mode = GET_MODE_INNER (mode);
6868
6869 /* Classification of atomic types. */
6870 switch (mode)
6871 {
6872 case SDmode:
6873 case DDmode:
6874 classes[0] = X86_64_SSE_CLASS;
6875 return 1;
6876 case TDmode:
6877 classes[0] = X86_64_SSE_CLASS;
6878 classes[1] = X86_64_SSEUP_CLASS;
6879 return 2;
6880 case DImode:
6881 case SImode:
6882 case HImode:
6883 case QImode:
6884 case CSImode:
6885 case CHImode:
6886 case CQImode:
6887 {
6888 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6889
6890 /* Analyze last 128 bits only. */
6891 size = (size - 1) & 0x7f;
6892
6893 if (size < 32)
6894 {
6895 classes[0] = X86_64_INTEGERSI_CLASS;
6896 return 1;
6897 }
6898 else if (size < 64)
6899 {
6900 classes[0] = X86_64_INTEGER_CLASS;
6901 return 1;
6902 }
6903 else if (size < 64+32)
6904 {
6905 classes[0] = X86_64_INTEGER_CLASS;
6906 classes[1] = X86_64_INTEGERSI_CLASS;
6907 return 2;
6908 }
6909 else if (size < 64+64)
6910 {
6911 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6912 return 2;
6913 }
6914 else
6915 gcc_unreachable ();
6916 }
6917 case CDImode:
6918 case TImode:
6919 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6920 return 2;
6921 case COImode:
6922 case OImode:
6923 /* OImode shouldn't be used directly. */
6924 gcc_unreachable ();
6925 case CTImode:
6926 return 0;
6927 case SFmode:
6928 if (!(bit_offset % 64))
6929 classes[0] = X86_64_SSESF_CLASS;
6930 else
6931 classes[0] = X86_64_SSE_CLASS;
6932 return 1;
6933 case DFmode:
6934 classes[0] = X86_64_SSEDF_CLASS;
6935 return 1;
6936 case XFmode:
6937 classes[0] = X86_64_X87_CLASS;
6938 classes[1] = X86_64_X87UP_CLASS;
6939 return 2;
6940 case TFmode:
6941 classes[0] = X86_64_SSE_CLASS;
6942 classes[1] = X86_64_SSEUP_CLASS;
6943 return 2;
6944 case SCmode:
6945 classes[0] = X86_64_SSE_CLASS;
6946 if (!(bit_offset % 64))
6947 return 1;
6948 else
6949 {
6950 static bool warned;
6951
6952 if (!warned && warn_psabi)
6953 {
6954 warned = true;
6955 inform (input_location,
6956 "the ABI of passing structure with complex float"
6957 " member has changed in GCC 4.4");
6958 }
6959 classes[1] = X86_64_SSESF_CLASS;
6960 return 2;
6961 }
6962 case DCmode:
6963 classes[0] = X86_64_SSEDF_CLASS;
6964 classes[1] = X86_64_SSEDF_CLASS;
6965 return 2;
6966 case XCmode:
6967 classes[0] = X86_64_COMPLEX_X87_CLASS;
6968 return 1;
6969 case TCmode:
6970 /* This modes is larger than 16 bytes. */
6971 return 0;
6972 case V8SFmode:
6973 case V8SImode:
6974 case V32QImode:
6975 case V16HImode:
6976 case V4DFmode:
6977 case V4DImode:
6978 classes[0] = X86_64_SSE_CLASS;
6979 classes[1] = X86_64_SSEUP_CLASS;
6980 classes[2] = X86_64_SSEUP_CLASS;
6981 classes[3] = X86_64_SSEUP_CLASS;
6982 return 4;
6983 case V8DFmode:
6984 case V16SFmode:
6985 case V8DImode:
6986 case V16SImode:
6987 case V32HImode:
6988 case V64QImode:
6989 classes[0] = X86_64_SSE_CLASS;
6990 classes[1] = X86_64_SSEUP_CLASS;
6991 classes[2] = X86_64_SSEUP_CLASS;
6992 classes[3] = X86_64_SSEUP_CLASS;
6993 classes[4] = X86_64_SSEUP_CLASS;
6994 classes[5] = X86_64_SSEUP_CLASS;
6995 classes[6] = X86_64_SSEUP_CLASS;
6996 classes[7] = X86_64_SSEUP_CLASS;
6997 return 8;
6998 case V4SFmode:
6999 case V4SImode:
7000 case V16QImode:
7001 case V8HImode:
7002 case V2DFmode:
7003 case V2DImode:
7004 classes[0] = X86_64_SSE_CLASS;
7005 classes[1] = X86_64_SSEUP_CLASS;
7006 return 2;
7007 case V1TImode:
7008 case V1DImode:
7009 case V2SFmode:
7010 case V2SImode:
7011 case V4HImode:
7012 case V8QImode:
7013 classes[0] = X86_64_SSE_CLASS;
7014 return 1;
7015 case BLKmode:
7016 case VOIDmode:
7017 return 0;
7018 default:
7019 gcc_assert (VECTOR_MODE_P (mode));
7020
7021 if (bytes > 16)
7022 return 0;
7023
7024 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7025
7026 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7027 classes[0] = X86_64_INTEGERSI_CLASS;
7028 else
7029 classes[0] = X86_64_INTEGER_CLASS;
7030 classes[1] = X86_64_INTEGER_CLASS;
7031 return 1 + (bytes > 8);
7032 }
7033 }
7034
7035 /* Examine the argument and return set number of register required in each
7036 class. Return true iff parameter should be passed in memory. */
7037
7038 static bool
7039 examine_argument (machine_mode mode, const_tree type, int in_return,
7040 int *int_nregs, int *sse_nregs)
7041 {
7042 enum x86_64_reg_class regclass[MAX_CLASSES];
7043 int n = classify_argument (mode, type, regclass, 0);
7044
7045 *int_nregs = 0;
7046 *sse_nregs = 0;
7047
7048 if (!n)
7049 return true;
7050 for (n--; n >= 0; n--)
7051 switch (regclass[n])
7052 {
7053 case X86_64_INTEGER_CLASS:
7054 case X86_64_INTEGERSI_CLASS:
7055 (*int_nregs)++;
7056 break;
7057 case X86_64_SSE_CLASS:
7058 case X86_64_SSESF_CLASS:
7059 case X86_64_SSEDF_CLASS:
7060 (*sse_nregs)++;
7061 break;
7062 case X86_64_NO_CLASS:
7063 case X86_64_SSEUP_CLASS:
7064 break;
7065 case X86_64_X87_CLASS:
7066 case X86_64_X87UP_CLASS:
7067 case X86_64_COMPLEX_X87_CLASS:
7068 if (!in_return)
7069 return true;
7070 break;
7071 case X86_64_MEMORY_CLASS:
7072 gcc_unreachable ();
7073 }
7074
7075 return false;
7076 }
7077
7078 /* Construct container for the argument used by GCC interface. See
7079 FUNCTION_ARG for the detailed description. */
7080
7081 static rtx
7082 construct_container (machine_mode mode, machine_mode orig_mode,
7083 const_tree type, int in_return, int nintregs, int nsseregs,
7084 const int *intreg, int sse_regno)
7085 {
7086 /* The following variables hold the static issued_error state. */
7087 static bool issued_sse_arg_error;
7088 static bool issued_sse_ret_error;
7089 static bool issued_x87_ret_error;
7090
7091 machine_mode tmpmode;
7092 int bytes =
7093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7094 enum x86_64_reg_class regclass[MAX_CLASSES];
7095 int n;
7096 int i;
7097 int nexps = 0;
7098 int needed_sseregs, needed_intregs;
7099 rtx exp[MAX_CLASSES];
7100 rtx ret;
7101
7102 n = classify_argument (mode, type, regclass, 0);
7103 if (!n)
7104 return NULL;
7105 if (examine_argument (mode, type, in_return, &needed_intregs,
7106 &needed_sseregs))
7107 return NULL;
7108 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7109 return NULL;
7110
7111 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7112 some less clueful developer tries to use floating-point anyway. */
7113 if (needed_sseregs && !TARGET_SSE)
7114 {
7115 if (in_return)
7116 {
7117 if (!issued_sse_ret_error)
7118 {
7119 error ("SSE register return with SSE disabled");
7120 issued_sse_ret_error = true;
7121 }
7122 }
7123 else if (!issued_sse_arg_error)
7124 {
7125 error ("SSE register argument with SSE disabled");
7126 issued_sse_arg_error = true;
7127 }
7128 return NULL;
7129 }
7130
7131 /* Likewise, error if the ABI requires us to return values in the
7132 x87 registers and the user specified -mno-80387. */
7133 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7134 for (i = 0; i < n; i++)
7135 if (regclass[i] == X86_64_X87_CLASS
7136 || regclass[i] == X86_64_X87UP_CLASS
7137 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7138 {
7139 if (!issued_x87_ret_error)
7140 {
7141 error ("x87 register return with x87 disabled");
7142 issued_x87_ret_error = true;
7143 }
7144 return NULL;
7145 }
7146
7147 /* First construct simple cases. Avoid SCmode, since we want to use
7148 single register to pass this type. */
7149 if (n == 1 && mode != SCmode)
7150 switch (regclass[0])
7151 {
7152 case X86_64_INTEGER_CLASS:
7153 case X86_64_INTEGERSI_CLASS:
7154 return gen_rtx_REG (mode, intreg[0]);
7155 case X86_64_SSE_CLASS:
7156 case X86_64_SSESF_CLASS:
7157 case X86_64_SSEDF_CLASS:
7158 if (mode != BLKmode)
7159 return gen_reg_or_parallel (mode, orig_mode,
7160 SSE_REGNO (sse_regno));
7161 break;
7162 case X86_64_X87_CLASS:
7163 case X86_64_COMPLEX_X87_CLASS:
7164 return gen_rtx_REG (mode, FIRST_STACK_REG);
7165 case X86_64_NO_CLASS:
7166 /* Zero sized array, struct or class. */
7167 return NULL;
7168 default:
7169 gcc_unreachable ();
7170 }
7171 if (n == 2
7172 && regclass[0] == X86_64_SSE_CLASS
7173 && regclass[1] == X86_64_SSEUP_CLASS
7174 && mode != BLKmode)
7175 return gen_reg_or_parallel (mode, orig_mode,
7176 SSE_REGNO (sse_regno));
7177 if (n == 4
7178 && regclass[0] == X86_64_SSE_CLASS
7179 && regclass[1] == X86_64_SSEUP_CLASS
7180 && regclass[2] == X86_64_SSEUP_CLASS
7181 && regclass[3] == X86_64_SSEUP_CLASS
7182 && mode != BLKmode)
7183 return gen_reg_or_parallel (mode, orig_mode,
7184 SSE_REGNO (sse_regno));
7185 if (n == 8
7186 && regclass[0] == X86_64_SSE_CLASS
7187 && regclass[1] == X86_64_SSEUP_CLASS
7188 && regclass[2] == X86_64_SSEUP_CLASS
7189 && regclass[3] == X86_64_SSEUP_CLASS
7190 && regclass[4] == X86_64_SSEUP_CLASS
7191 && regclass[5] == X86_64_SSEUP_CLASS
7192 && regclass[6] == X86_64_SSEUP_CLASS
7193 && regclass[7] == X86_64_SSEUP_CLASS
7194 && mode != BLKmode)
7195 return gen_reg_or_parallel (mode, orig_mode,
7196 SSE_REGNO (sse_regno));
7197 if (n == 2
7198 && regclass[0] == X86_64_X87_CLASS
7199 && regclass[1] == X86_64_X87UP_CLASS)
7200 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7201
7202 if (n == 2
7203 && regclass[0] == X86_64_INTEGER_CLASS
7204 && regclass[1] == X86_64_INTEGER_CLASS
7205 && (mode == CDImode || mode == TImode)
7206 && intreg[0] + 1 == intreg[1])
7207 return gen_rtx_REG (mode, intreg[0]);
7208
7209 /* Otherwise figure out the entries of the PARALLEL. */
7210 for (i = 0; i < n; i++)
7211 {
7212 int pos;
7213
7214 switch (regclass[i])
7215 {
7216 case X86_64_NO_CLASS:
7217 break;
7218 case X86_64_INTEGER_CLASS:
7219 case X86_64_INTEGERSI_CLASS:
7220 /* Merge TImodes on aligned occasions here too. */
7221 if (i * 8 + 8 > bytes)
7222 tmpmode
7223 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7224 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7225 tmpmode = SImode;
7226 else
7227 tmpmode = DImode;
7228 /* We've requested 24 bytes we
7229 don't have mode for. Use DImode. */
7230 if (tmpmode == BLKmode)
7231 tmpmode = DImode;
7232 exp [nexps++]
7233 = gen_rtx_EXPR_LIST (VOIDmode,
7234 gen_rtx_REG (tmpmode, *intreg),
7235 GEN_INT (i*8));
7236 intreg++;
7237 break;
7238 case X86_64_SSESF_CLASS:
7239 exp [nexps++]
7240 = gen_rtx_EXPR_LIST (VOIDmode,
7241 gen_rtx_REG (SFmode,
7242 SSE_REGNO (sse_regno)),
7243 GEN_INT (i*8));
7244 sse_regno++;
7245 break;
7246 case X86_64_SSEDF_CLASS:
7247 exp [nexps++]
7248 = gen_rtx_EXPR_LIST (VOIDmode,
7249 gen_rtx_REG (DFmode,
7250 SSE_REGNO (sse_regno)),
7251 GEN_INT (i*8));
7252 sse_regno++;
7253 break;
7254 case X86_64_SSE_CLASS:
7255 pos = i;
7256 switch (n)
7257 {
7258 case 1:
7259 tmpmode = DImode;
7260 break;
7261 case 2:
7262 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7263 {
7264 tmpmode = TImode;
7265 i++;
7266 }
7267 else
7268 tmpmode = DImode;
7269 break;
7270 case 4:
7271 gcc_assert (i == 0
7272 && regclass[1] == X86_64_SSEUP_CLASS
7273 && regclass[2] == X86_64_SSEUP_CLASS
7274 && regclass[3] == X86_64_SSEUP_CLASS);
7275 tmpmode = OImode;
7276 i += 3;
7277 break;
7278 case 8:
7279 gcc_assert (i == 0
7280 && regclass[1] == X86_64_SSEUP_CLASS
7281 && regclass[2] == X86_64_SSEUP_CLASS
7282 && regclass[3] == X86_64_SSEUP_CLASS
7283 && regclass[4] == X86_64_SSEUP_CLASS
7284 && regclass[5] == X86_64_SSEUP_CLASS
7285 && regclass[6] == X86_64_SSEUP_CLASS
7286 && regclass[7] == X86_64_SSEUP_CLASS);
7287 tmpmode = XImode;
7288 i += 7;
7289 break;
7290 default:
7291 gcc_unreachable ();
7292 }
7293 exp [nexps++]
7294 = gen_rtx_EXPR_LIST (VOIDmode,
7295 gen_rtx_REG (tmpmode,
7296 SSE_REGNO (sse_regno)),
7297 GEN_INT (pos*8));
7298 sse_regno++;
7299 break;
7300 default:
7301 gcc_unreachable ();
7302 }
7303 }
7304
7305 /* Empty aligned struct, union or class. */
7306 if (nexps == 0)
7307 return NULL;
7308
7309 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7310 for (i = 0; i < nexps; i++)
7311 XVECEXP (ret, 0, i) = exp [i];
7312 return ret;
7313 }
7314
7315 /* Update the data in CUM to advance over an argument of mode MODE
7316 and data type TYPE. (TYPE is null for libcalls where that information
7317 may not be available.)
7318
7319 Return a number of integer regsiters advanced over. */
7320
7321 static int
7322 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7323 const_tree type, HOST_WIDE_INT bytes,
7324 HOST_WIDE_INT words)
7325 {
7326 int res = 0;
7327
7328 switch (mode)
7329 {
7330 default:
7331 break;
7332
7333 case BLKmode:
7334 if (bytes < 0)
7335 break;
7336 /* FALLTHRU */
7337
7338 case DImode:
7339 case SImode:
7340 case HImode:
7341 case QImode:
7342 cum->words += words;
7343 cum->nregs -= words;
7344 cum->regno += words;
7345 if (cum->nregs >= 0)
7346 res = words;
7347 if (cum->nregs <= 0)
7348 {
7349 cum->nregs = 0;
7350 cum->regno = 0;
7351 }
7352 break;
7353
7354 case OImode:
7355 /* OImode shouldn't be used directly. */
7356 gcc_unreachable ();
7357
7358 case DFmode:
7359 if (cum->float_in_sse < 2)
7360 break;
7361 case SFmode:
7362 if (cum->float_in_sse < 1)
7363 break;
7364 /* FALLTHRU */
7365
7366 case V8SFmode:
7367 case V8SImode:
7368 case V64QImode:
7369 case V32HImode:
7370 case V16SImode:
7371 case V8DImode:
7372 case V16SFmode:
7373 case V8DFmode:
7374 case V32QImode:
7375 case V16HImode:
7376 case V4DFmode:
7377 case V4DImode:
7378 case TImode:
7379 case V16QImode:
7380 case V8HImode:
7381 case V4SImode:
7382 case V2DImode:
7383 case V4SFmode:
7384 case V2DFmode:
7385 if (!type || !AGGREGATE_TYPE_P (type))
7386 {
7387 cum->sse_words += words;
7388 cum->sse_nregs -= 1;
7389 cum->sse_regno += 1;
7390 if (cum->sse_nregs <= 0)
7391 {
7392 cum->sse_nregs = 0;
7393 cum->sse_regno = 0;
7394 }
7395 }
7396 break;
7397
7398 case V8QImode:
7399 case V4HImode:
7400 case V2SImode:
7401 case V2SFmode:
7402 case V1TImode:
7403 case V1DImode:
7404 if (!type || !AGGREGATE_TYPE_P (type))
7405 {
7406 cum->mmx_words += words;
7407 cum->mmx_nregs -= 1;
7408 cum->mmx_regno += 1;
7409 if (cum->mmx_nregs <= 0)
7410 {
7411 cum->mmx_nregs = 0;
7412 cum->mmx_regno = 0;
7413 }
7414 }
7415 break;
7416 }
7417
7418 return res;
7419 }
7420
7421 static int
7422 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7423 const_tree type, HOST_WIDE_INT words, bool named)
7424 {
7425 int int_nregs, sse_nregs;
7426
7427 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7428 if (!named && (VALID_AVX512F_REG_MODE (mode)
7429 || VALID_AVX256_REG_MODE (mode)))
7430 return 0;
7431
7432 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7433 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7434 {
7435 cum->nregs -= int_nregs;
7436 cum->sse_nregs -= sse_nregs;
7437 cum->regno += int_nregs;
7438 cum->sse_regno += sse_nregs;
7439 return int_nregs;
7440 }
7441 else
7442 {
7443 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7444 cum->words = (cum->words + align - 1) & ~(align - 1);
7445 cum->words += words;
7446 return 0;
7447 }
7448 }
7449
7450 static int
7451 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7452 HOST_WIDE_INT words)
7453 {
7454 /* Otherwise, this should be passed indirect. */
7455 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7456
7457 cum->words += words;
7458 if (cum->nregs > 0)
7459 {
7460 cum->nregs -= 1;
7461 cum->regno += 1;
7462 return 1;
7463 }
7464 return 0;
7465 }
7466
7467 /* Update the data in CUM to advance over an argument of mode MODE and
7468 data type TYPE. (TYPE is null for libcalls where that information
7469 may not be available.) */
7470
7471 static void
7472 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7473 const_tree type, bool named)
7474 {
7475 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7476 HOST_WIDE_INT bytes, words;
7477 int nregs;
7478
7479 if (mode == BLKmode)
7480 bytes = int_size_in_bytes (type);
7481 else
7482 bytes = GET_MODE_SIZE (mode);
7483 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7484
7485 if (type)
7486 mode = type_natural_mode (type, NULL, false);
7487
7488 if ((type && POINTER_BOUNDS_TYPE_P (type))
7489 || POINTER_BOUNDS_MODE_P (mode))
7490 {
7491 /* If we pass bounds in BT then just update remained bounds count. */
7492 if (cum->bnds_in_bt)
7493 {
7494 cum->bnds_in_bt--;
7495 return;
7496 }
7497
7498 /* Update remained number of bounds to force. */
7499 if (cum->force_bnd_pass)
7500 cum->force_bnd_pass--;
7501
7502 cum->bnd_regno++;
7503
7504 return;
7505 }
7506
7507 /* The first arg not going to Bounds Tables resets this counter. */
7508 cum->bnds_in_bt = 0;
7509 /* For unnamed args we always pass bounds to avoid bounds mess when
7510 passed and received types do not match. If bounds do not follow
7511 unnamed arg, still pretend required number of bounds were passed. */
7512 if (cum->force_bnd_pass)
7513 {
7514 cum->bnd_regno += cum->force_bnd_pass;
7515 cum->force_bnd_pass = 0;
7516 }
7517
7518 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7519 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7520 else if (TARGET_64BIT)
7521 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7522 else
7523 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7524
7525 /* For stdarg we expect bounds to be passed for each value passed
7526 in register. */
7527 if (cum->stdarg)
7528 cum->force_bnd_pass = nregs;
7529 /* For pointers passed in memory we expect bounds passed in Bounds
7530 Table. */
7531 if (!nregs)
7532 cum->bnds_in_bt = chkp_type_bounds_count (type);
7533 }
7534
7535 /* Define where to put the arguments to a function.
7536 Value is zero to push the argument on the stack,
7537 or a hard register in which to store the argument.
7538
7539 MODE is the argument's machine mode.
7540 TYPE is the data type of the argument (as a tree).
7541 This is null for libcalls where that information may
7542 not be available.
7543 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7544 the preceding args and about the function being called.
7545 NAMED is nonzero if this argument is a named parameter
7546 (otherwise it is an extra parameter matching an ellipsis). */
7547
7548 static rtx
7549 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7550 machine_mode orig_mode, const_tree type,
7551 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7552 {
7553 /* Avoid the AL settings for the Unix64 ABI. */
7554 if (mode == VOIDmode)
7555 return constm1_rtx;
7556
7557 switch (mode)
7558 {
7559 default:
7560 break;
7561
7562 case BLKmode:
7563 if (bytes < 0)
7564 break;
7565 /* FALLTHRU */
7566 case DImode:
7567 case SImode:
7568 case HImode:
7569 case QImode:
7570 if (words <= cum->nregs)
7571 {
7572 int regno = cum->regno;
7573
7574 /* Fastcall allocates the first two DWORD (SImode) or
7575 smaller arguments to ECX and EDX if it isn't an
7576 aggregate type . */
7577 if (cum->fastcall)
7578 {
7579 if (mode == BLKmode
7580 || mode == DImode
7581 || (type && AGGREGATE_TYPE_P (type)))
7582 break;
7583
7584 /* ECX not EAX is the first allocated register. */
7585 if (regno == AX_REG)
7586 regno = CX_REG;
7587 }
7588 return gen_rtx_REG (mode, regno);
7589 }
7590 break;
7591
7592 case DFmode:
7593 if (cum->float_in_sse < 2)
7594 break;
7595 case SFmode:
7596 if (cum->float_in_sse < 1)
7597 break;
7598 /* FALLTHRU */
7599 case TImode:
7600 /* In 32bit, we pass TImode in xmm registers. */
7601 case V16QImode:
7602 case V8HImode:
7603 case V4SImode:
7604 case V2DImode:
7605 case V4SFmode:
7606 case V2DFmode:
7607 if (!type || !AGGREGATE_TYPE_P (type))
7608 {
7609 if (cum->sse_nregs)
7610 return gen_reg_or_parallel (mode, orig_mode,
7611 cum->sse_regno + FIRST_SSE_REG);
7612 }
7613 break;
7614
7615 case OImode:
7616 case XImode:
7617 /* OImode and XImode shouldn't be used directly. */
7618 gcc_unreachable ();
7619
7620 case V64QImode:
7621 case V32HImode:
7622 case V16SImode:
7623 case V8DImode:
7624 case V16SFmode:
7625 case V8DFmode:
7626 case V8SFmode:
7627 case V8SImode:
7628 case V32QImode:
7629 case V16HImode:
7630 case V4DFmode:
7631 case V4DImode:
7632 if (!type || !AGGREGATE_TYPE_P (type))
7633 {
7634 if (cum->sse_nregs)
7635 return gen_reg_or_parallel (mode, orig_mode,
7636 cum->sse_regno + FIRST_SSE_REG);
7637 }
7638 break;
7639
7640 case V8QImode:
7641 case V4HImode:
7642 case V2SImode:
7643 case V2SFmode:
7644 case V1TImode:
7645 case V1DImode:
7646 if (!type || !AGGREGATE_TYPE_P (type))
7647 {
7648 if (cum->mmx_nregs)
7649 return gen_reg_or_parallel (mode, orig_mode,
7650 cum->mmx_regno + FIRST_MMX_REG);
7651 }
7652 break;
7653 }
7654
7655 return NULL_RTX;
7656 }
7657
7658 static rtx
7659 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7660 machine_mode orig_mode, const_tree type, bool named)
7661 {
7662 /* Handle a hidden AL argument containing number of registers
7663 for varargs x86-64 functions. */
7664 if (mode == VOIDmode)
7665 return GEN_INT (cum->maybe_vaarg
7666 ? (cum->sse_nregs < 0
7667 ? X86_64_SSE_REGPARM_MAX
7668 : cum->sse_regno)
7669 : -1);
7670
7671 switch (mode)
7672 {
7673 default:
7674 break;
7675
7676 case V8SFmode:
7677 case V8SImode:
7678 case V32QImode:
7679 case V16HImode:
7680 case V4DFmode:
7681 case V4DImode:
7682 case V16SFmode:
7683 case V16SImode:
7684 case V64QImode:
7685 case V32HImode:
7686 case V8DFmode:
7687 case V8DImode:
7688 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7689 if (!named)
7690 return NULL;
7691 break;
7692 }
7693
7694 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7695 cum->sse_nregs,
7696 &x86_64_int_parameter_registers [cum->regno],
7697 cum->sse_regno);
7698 }
7699
7700 static rtx
7701 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7702 machine_mode orig_mode, bool named,
7703 HOST_WIDE_INT bytes)
7704 {
7705 unsigned int regno;
7706
7707 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7708 We use value of -2 to specify that current function call is MSABI. */
7709 if (mode == VOIDmode)
7710 return GEN_INT (-2);
7711
7712 /* If we've run out of registers, it goes on the stack. */
7713 if (cum->nregs == 0)
7714 return NULL_RTX;
7715
7716 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7717
7718 /* Only floating point modes are passed in anything but integer regs. */
7719 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7720 {
7721 if (named)
7722 regno = cum->regno + FIRST_SSE_REG;
7723 else
7724 {
7725 rtx t1, t2;
7726
7727 /* Unnamed floating parameters are passed in both the
7728 SSE and integer registers. */
7729 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7730 t2 = gen_rtx_REG (mode, regno);
7731 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7732 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7733 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7734 }
7735 }
7736 /* Handle aggregated types passed in register. */
7737 if (orig_mode == BLKmode)
7738 {
7739 if (bytes > 0 && bytes <= 8)
7740 mode = (bytes > 4 ? DImode : SImode);
7741 if (mode == BLKmode)
7742 mode = DImode;
7743 }
7744
7745 return gen_reg_or_parallel (mode, orig_mode, regno);
7746 }
7747
7748 /* Return where to put the arguments to a function.
7749 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7750
7751 MODE is the argument's machine mode. TYPE is the data type of the
7752 argument. It is null for libcalls where that information may not be
7753 available. CUM gives information about the preceding args and about
7754 the function being called. NAMED is nonzero if this argument is a
7755 named parameter (otherwise it is an extra parameter matching an
7756 ellipsis). */
7757
7758 static rtx
7759 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7760 const_tree type, bool named)
7761 {
7762 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7763 machine_mode mode = omode;
7764 HOST_WIDE_INT bytes, words;
7765 rtx arg;
7766
7767 /* All pointer bounds argumntas are handled separately here. */
7768 if ((type && POINTER_BOUNDS_TYPE_P (type))
7769 || POINTER_BOUNDS_MODE_P (mode))
7770 {
7771 /* Return NULL if bounds are forced to go in Bounds Table. */
7772 if (cum->bnds_in_bt)
7773 arg = NULL;
7774 /* Return the next available bound reg if any. */
7775 else if (cum->bnd_regno <= LAST_BND_REG)
7776 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7777 /* Return the next special slot number otherwise. */
7778 else
7779 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7780
7781 return arg;
7782 }
7783
7784 if (mode == BLKmode)
7785 bytes = int_size_in_bytes (type);
7786 else
7787 bytes = GET_MODE_SIZE (mode);
7788 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7789
7790 /* To simplify the code below, represent vector types with a vector mode
7791 even if MMX/SSE are not active. */
7792 if (type && TREE_CODE (type) == VECTOR_TYPE)
7793 mode = type_natural_mode (type, cum, false);
7794
7795 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7796 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7797 else if (TARGET_64BIT)
7798 arg = function_arg_64 (cum, mode, omode, type, named);
7799 else
7800 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7801
7802 return arg;
7803 }
7804
7805 /* A C expression that indicates when an argument must be passed by
7806 reference. If nonzero for an argument, a copy of that argument is
7807 made in memory and a pointer to the argument is passed instead of
7808 the argument itself. The pointer is passed in whatever way is
7809 appropriate for passing a pointer to that type. */
7810
7811 static bool
7812 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7813 const_tree type, bool)
7814 {
7815 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7816
7817 /* See Windows x64 Software Convention. */
7818 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7819 {
7820 int msize = (int) GET_MODE_SIZE (mode);
7821 if (type)
7822 {
7823 /* Arrays are passed by reference. */
7824 if (TREE_CODE (type) == ARRAY_TYPE)
7825 return true;
7826
7827 if (AGGREGATE_TYPE_P (type))
7828 {
7829 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7830 are passed by reference. */
7831 msize = int_size_in_bytes (type);
7832 }
7833 }
7834
7835 /* __m128 is passed by reference. */
7836 switch (msize) {
7837 case 1: case 2: case 4: case 8:
7838 break;
7839 default:
7840 return true;
7841 }
7842 }
7843 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7844 return 1;
7845
7846 return 0;
7847 }
7848
7849 /* Return true when TYPE should be 128bit aligned for 32bit argument
7850 passing ABI. XXX: This function is obsolete and is only used for
7851 checking psABI compatibility with previous versions of GCC. */
7852
7853 static bool
7854 ix86_compat_aligned_value_p (const_tree type)
7855 {
7856 machine_mode mode = TYPE_MODE (type);
7857 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7858 || mode == TDmode
7859 || mode == TFmode
7860 || mode == TCmode)
7861 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7862 return true;
7863 if (TYPE_ALIGN (type) < 128)
7864 return false;
7865
7866 if (AGGREGATE_TYPE_P (type))
7867 {
7868 /* Walk the aggregates recursively. */
7869 switch (TREE_CODE (type))
7870 {
7871 case RECORD_TYPE:
7872 case UNION_TYPE:
7873 case QUAL_UNION_TYPE:
7874 {
7875 tree field;
7876
7877 /* Walk all the structure fields. */
7878 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7879 {
7880 if (TREE_CODE (field) == FIELD_DECL
7881 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7882 return true;
7883 }
7884 break;
7885 }
7886
7887 case ARRAY_TYPE:
7888 /* Just for use if some languages passes arrays by value. */
7889 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7890 return true;
7891 break;
7892
7893 default:
7894 gcc_unreachable ();
7895 }
7896 }
7897 return false;
7898 }
7899
7900 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7901 XXX: This function is obsolete and is only used for checking psABI
7902 compatibility with previous versions of GCC. */
7903
7904 static unsigned int
7905 ix86_compat_function_arg_boundary (machine_mode mode,
7906 const_tree type, unsigned int align)
7907 {
7908 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7909 natural boundaries. */
7910 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7911 {
7912 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7913 make an exception for SSE modes since these require 128bit
7914 alignment.
7915
7916 The handling here differs from field_alignment. ICC aligns MMX
7917 arguments to 4 byte boundaries, while structure fields are aligned
7918 to 8 byte boundaries. */
7919 if (!type)
7920 {
7921 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7922 align = PARM_BOUNDARY;
7923 }
7924 else
7925 {
7926 if (!ix86_compat_aligned_value_p (type))
7927 align = PARM_BOUNDARY;
7928 }
7929 }
7930 if (align > BIGGEST_ALIGNMENT)
7931 align = BIGGEST_ALIGNMENT;
7932 return align;
7933 }
7934
7935 /* Return true when TYPE should be 128bit aligned for 32bit argument
7936 passing ABI. */
7937
7938 static bool
7939 ix86_contains_aligned_value_p (const_tree type)
7940 {
7941 machine_mode mode = TYPE_MODE (type);
7942
7943 if (mode == XFmode || mode == XCmode)
7944 return false;
7945
7946 if (TYPE_ALIGN (type) < 128)
7947 return false;
7948
7949 if (AGGREGATE_TYPE_P (type))
7950 {
7951 /* Walk the aggregates recursively. */
7952 switch (TREE_CODE (type))
7953 {
7954 case RECORD_TYPE:
7955 case UNION_TYPE:
7956 case QUAL_UNION_TYPE:
7957 {
7958 tree field;
7959
7960 /* Walk all the structure fields. */
7961 for (field = TYPE_FIELDS (type);
7962 field;
7963 field = DECL_CHAIN (field))
7964 {
7965 if (TREE_CODE (field) == FIELD_DECL
7966 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7967 return true;
7968 }
7969 break;
7970 }
7971
7972 case ARRAY_TYPE:
7973 /* Just for use if some languages passes arrays by value. */
7974 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7975 return true;
7976 break;
7977
7978 default:
7979 gcc_unreachable ();
7980 }
7981 }
7982 else
7983 return TYPE_ALIGN (type) >= 128;
7984
7985 return false;
7986 }
7987
7988 /* Gives the alignment boundary, in bits, of an argument with the
7989 specified mode and type. */
7990
7991 static unsigned int
7992 ix86_function_arg_boundary (machine_mode mode, const_tree type)
7993 {
7994 unsigned int align;
7995 if (type)
7996 {
7997 /* Since the main variant type is used for call, we convert it to
7998 the main variant type. */
7999 type = TYPE_MAIN_VARIANT (type);
8000 align = TYPE_ALIGN (type);
8001 }
8002 else
8003 align = GET_MODE_ALIGNMENT (mode);
8004 if (align < PARM_BOUNDARY)
8005 align = PARM_BOUNDARY;
8006 else
8007 {
8008 static bool warned;
8009 unsigned int saved_align = align;
8010
8011 if (!TARGET_64BIT)
8012 {
8013 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8014 if (!type)
8015 {
8016 if (mode == XFmode || mode == XCmode)
8017 align = PARM_BOUNDARY;
8018 }
8019 else if (!ix86_contains_aligned_value_p (type))
8020 align = PARM_BOUNDARY;
8021
8022 if (align < 128)
8023 align = PARM_BOUNDARY;
8024 }
8025
8026 if (warn_psabi
8027 && !warned
8028 && align != ix86_compat_function_arg_boundary (mode, type,
8029 saved_align))
8030 {
8031 warned = true;
8032 inform (input_location,
8033 "The ABI for passing parameters with %d-byte"
8034 " alignment has changed in GCC 4.6",
8035 align / BITS_PER_UNIT);
8036 }
8037 }
8038
8039 return align;
8040 }
8041
8042 /* Return true if N is a possible register number of function value. */
8043
8044 static bool
8045 ix86_function_value_regno_p (const unsigned int regno)
8046 {
8047 switch (regno)
8048 {
8049 case AX_REG:
8050 return true;
8051 case DX_REG:
8052 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8053 case DI_REG:
8054 case SI_REG:
8055 return TARGET_64BIT && ix86_abi != MS_ABI;
8056
8057 case FIRST_BND_REG:
8058 return chkp_function_instrumented_p (current_function_decl);
8059
8060 /* Complex values are returned in %st(0)/%st(1) pair. */
8061 case ST0_REG:
8062 case ST1_REG:
8063 /* TODO: The function should depend on current function ABI but
8064 builtins.c would need updating then. Therefore we use the
8065 default ABI. */
8066 if (TARGET_64BIT && ix86_abi == MS_ABI)
8067 return false;
8068 return TARGET_FLOAT_RETURNS_IN_80387;
8069
8070 /* Complex values are returned in %xmm0/%xmm1 pair. */
8071 case XMM0_REG:
8072 case XMM1_REG:
8073 return TARGET_SSE;
8074
8075 case MM0_REG:
8076 if (TARGET_MACHO || TARGET_64BIT)
8077 return false;
8078 return TARGET_MMX;
8079 }
8080
8081 return false;
8082 }
8083
8084 /* Define how to find the value returned by a function.
8085 VALTYPE is the data type of the value (as a tree).
8086 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8087 otherwise, FUNC is 0. */
8088
8089 static rtx
8090 function_value_32 (machine_mode orig_mode, machine_mode mode,
8091 const_tree fntype, const_tree fn)
8092 {
8093 unsigned int regno;
8094
8095 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8096 we normally prevent this case when mmx is not available. However
8097 some ABIs may require the result to be returned like DImode. */
8098 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8099 regno = FIRST_MMX_REG;
8100
8101 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8102 we prevent this case when sse is not available. However some ABIs
8103 may require the result to be returned like integer TImode. */
8104 else if (mode == TImode
8105 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8106 regno = FIRST_SSE_REG;
8107
8108 /* 32-byte vector modes in %ymm0. */
8109 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8110 regno = FIRST_SSE_REG;
8111
8112 /* 64-byte vector modes in %zmm0. */
8113 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8114 regno = FIRST_SSE_REG;
8115
8116 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8117 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8118 regno = FIRST_FLOAT_REG;
8119 else
8120 /* Most things go in %eax. */
8121 regno = AX_REG;
8122
8123 /* Override FP return register with %xmm0 for local functions when
8124 SSE math is enabled or for functions with sseregparm attribute. */
8125 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8126 {
8127 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8128 if ((sse_level >= 1 && mode == SFmode)
8129 || (sse_level == 2 && mode == DFmode))
8130 regno = FIRST_SSE_REG;
8131 }
8132
8133 /* OImode shouldn't be used directly. */
8134 gcc_assert (mode != OImode);
8135
8136 return gen_rtx_REG (orig_mode, regno);
8137 }
8138
8139 static rtx
8140 function_value_64 (machine_mode orig_mode, machine_mode mode,
8141 const_tree valtype)
8142 {
8143 rtx ret;
8144
8145 /* Handle libcalls, which don't provide a type node. */
8146 if (valtype == NULL)
8147 {
8148 unsigned int regno;
8149
8150 switch (mode)
8151 {
8152 case SFmode:
8153 case SCmode:
8154 case DFmode:
8155 case DCmode:
8156 case TFmode:
8157 case SDmode:
8158 case DDmode:
8159 case TDmode:
8160 regno = FIRST_SSE_REG;
8161 break;
8162 case XFmode:
8163 case XCmode:
8164 regno = FIRST_FLOAT_REG;
8165 break;
8166 case TCmode:
8167 return NULL;
8168 default:
8169 regno = AX_REG;
8170 }
8171
8172 return gen_rtx_REG (mode, regno);
8173 }
8174 else if (POINTER_TYPE_P (valtype))
8175 {
8176 /* Pointers are always returned in word_mode. */
8177 mode = word_mode;
8178 }
8179
8180 ret = construct_container (mode, orig_mode, valtype, 1,
8181 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8182 x86_64_int_return_registers, 0);
8183
8184 /* For zero sized structures, construct_container returns NULL, but we
8185 need to keep rest of compiler happy by returning meaningful value. */
8186 if (!ret)
8187 ret = gen_rtx_REG (orig_mode, AX_REG);
8188
8189 return ret;
8190 }
8191
8192 static rtx
8193 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8194 const_tree valtype)
8195 {
8196 unsigned int regno = AX_REG;
8197
8198 if (TARGET_SSE)
8199 {
8200 switch (GET_MODE_SIZE (mode))
8201 {
8202 case 16:
8203 if (valtype != NULL_TREE
8204 && !VECTOR_INTEGER_TYPE_P (valtype)
8205 && !VECTOR_INTEGER_TYPE_P (valtype)
8206 && !INTEGRAL_TYPE_P (valtype)
8207 && !VECTOR_FLOAT_TYPE_P (valtype))
8208 break;
8209 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8210 && !COMPLEX_MODE_P (mode))
8211 regno = FIRST_SSE_REG;
8212 break;
8213 case 8:
8214 case 4:
8215 if (mode == SFmode || mode == DFmode)
8216 regno = FIRST_SSE_REG;
8217 break;
8218 default:
8219 break;
8220 }
8221 }
8222 return gen_rtx_REG (orig_mode, regno);
8223 }
8224
8225 static rtx
8226 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8227 machine_mode orig_mode, machine_mode mode)
8228 {
8229 const_tree fn, fntype;
8230
8231 fn = NULL_TREE;
8232 if (fntype_or_decl && DECL_P (fntype_or_decl))
8233 fn = fntype_or_decl;
8234 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8235
8236 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8237 || POINTER_BOUNDS_MODE_P (mode))
8238 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8239 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8240 return function_value_ms_64 (orig_mode, mode, valtype);
8241 else if (TARGET_64BIT)
8242 return function_value_64 (orig_mode, mode, valtype);
8243 else
8244 return function_value_32 (orig_mode, mode, fntype, fn);
8245 }
8246
8247 static rtx
8248 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8249 {
8250 machine_mode mode, orig_mode;
8251
8252 orig_mode = TYPE_MODE (valtype);
8253 mode = type_natural_mode (valtype, NULL, true);
8254 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8255 }
8256
8257 /* Return an RTX representing a place where a function returns
8258 or recieves pointer bounds or NULL if no bounds are returned.
8259
8260 VALTYPE is a data type of a value returned by the function.
8261
8262 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8263 or FUNCTION_TYPE of the function.
8264
8265 If OUTGOING is false, return a place in which the caller will
8266 see the return value. Otherwise, return a place where a
8267 function returns a value. */
8268
8269 static rtx
8270 ix86_function_value_bounds (const_tree valtype,
8271 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8272 bool outgoing ATTRIBUTE_UNUSED)
8273 {
8274 rtx res = NULL_RTX;
8275
8276 if (BOUNDED_TYPE_P (valtype))
8277 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8278 else if (chkp_type_has_pointer (valtype))
8279 {
8280 bitmap slots;
8281 rtx bounds[2];
8282 bitmap_iterator bi;
8283 unsigned i, bnd_no = 0;
8284
8285 bitmap_obstack_initialize (NULL);
8286 slots = BITMAP_ALLOC (NULL);
8287 chkp_find_bound_slots (valtype, slots);
8288
8289 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8290 {
8291 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8292 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8293 gcc_assert (bnd_no < 2);
8294 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8295 }
8296
8297 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8298
8299 BITMAP_FREE (slots);
8300 bitmap_obstack_release (NULL);
8301 }
8302 else
8303 res = NULL_RTX;
8304
8305 return res;
8306 }
8307
8308 /* Pointer function arguments and return values are promoted to
8309 word_mode. */
8310
8311 static machine_mode
8312 ix86_promote_function_mode (const_tree type, machine_mode mode,
8313 int *punsignedp, const_tree fntype,
8314 int for_return)
8315 {
8316 if (type != NULL_TREE && POINTER_TYPE_P (type))
8317 {
8318 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8319 return word_mode;
8320 }
8321 return default_promote_function_mode (type, mode, punsignedp, fntype,
8322 for_return);
8323 }
8324
8325 /* Return true if a structure, union or array with MODE containing FIELD
8326 should be accessed using BLKmode. */
8327
8328 static bool
8329 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8330 {
8331 /* Union with XFmode must be in BLKmode. */
8332 return (mode == XFmode
8333 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8334 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8335 }
8336
8337 rtx
8338 ix86_libcall_value (machine_mode mode)
8339 {
8340 return ix86_function_value_1 (NULL, NULL, mode, mode);
8341 }
8342
8343 /* Return true iff type is returned in memory. */
8344
8345 static bool
8346 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8347 {
8348 #ifdef SUBTARGET_RETURN_IN_MEMORY
8349 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8350 #else
8351 const machine_mode mode = type_natural_mode (type, NULL, true);
8352 HOST_WIDE_INT size;
8353
8354 if (POINTER_BOUNDS_TYPE_P (type))
8355 return false;
8356
8357 if (TARGET_64BIT)
8358 {
8359 if (ix86_function_type_abi (fntype) == MS_ABI)
8360 {
8361 size = int_size_in_bytes (type);
8362
8363 /* __m128 is returned in xmm0. */
8364 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8365 || INTEGRAL_TYPE_P (type)
8366 || VECTOR_FLOAT_TYPE_P (type))
8367 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8368 && !COMPLEX_MODE_P (mode)
8369 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8370 return false;
8371
8372 /* Otherwise, the size must be exactly in [1248]. */
8373 return size != 1 && size != 2 && size != 4 && size != 8;
8374 }
8375 else
8376 {
8377 int needed_intregs, needed_sseregs;
8378
8379 return examine_argument (mode, type, 1,
8380 &needed_intregs, &needed_sseregs);
8381 }
8382 }
8383 else
8384 {
8385 if (mode == BLKmode)
8386 return true;
8387
8388 size = int_size_in_bytes (type);
8389
8390 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8391 return false;
8392
8393 if (VECTOR_MODE_P (mode) || mode == TImode)
8394 {
8395 /* User-created vectors small enough to fit in EAX. */
8396 if (size < 8)
8397 return false;
8398
8399 /* Unless ABI prescibes otherwise,
8400 MMX/3dNow values are returned in MM0 if available. */
8401
8402 if (size == 8)
8403 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8404
8405 /* SSE values are returned in XMM0 if available. */
8406 if (size == 16)
8407 return !TARGET_SSE;
8408
8409 /* AVX values are returned in YMM0 if available. */
8410 if (size == 32)
8411 return !TARGET_AVX;
8412
8413 /* AVX512F values are returned in ZMM0 if available. */
8414 if (size == 64)
8415 return !TARGET_AVX512F;
8416 }
8417
8418 if (mode == XFmode)
8419 return false;
8420
8421 if (size > 12)
8422 return true;
8423
8424 /* OImode shouldn't be used directly. */
8425 gcc_assert (mode != OImode);
8426
8427 return false;
8428 }
8429 #endif
8430 }
8431
8432 \f
8433 /* Create the va_list data type. */
8434
8435 /* Returns the calling convention specific va_list date type.
8436 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8437
8438 static tree
8439 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8440 {
8441 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8442
8443 /* For i386 we use plain pointer to argument area. */
8444 if (!TARGET_64BIT || abi == MS_ABI)
8445 return build_pointer_type (char_type_node);
8446
8447 record = lang_hooks.types.make_type (RECORD_TYPE);
8448 type_decl = build_decl (BUILTINS_LOCATION,
8449 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8450
8451 f_gpr = build_decl (BUILTINS_LOCATION,
8452 FIELD_DECL, get_identifier ("gp_offset"),
8453 unsigned_type_node);
8454 f_fpr = build_decl (BUILTINS_LOCATION,
8455 FIELD_DECL, get_identifier ("fp_offset"),
8456 unsigned_type_node);
8457 f_ovf = build_decl (BUILTINS_LOCATION,
8458 FIELD_DECL, get_identifier ("overflow_arg_area"),
8459 ptr_type_node);
8460 f_sav = build_decl (BUILTINS_LOCATION,
8461 FIELD_DECL, get_identifier ("reg_save_area"),
8462 ptr_type_node);
8463
8464 va_list_gpr_counter_field = f_gpr;
8465 va_list_fpr_counter_field = f_fpr;
8466
8467 DECL_FIELD_CONTEXT (f_gpr) = record;
8468 DECL_FIELD_CONTEXT (f_fpr) = record;
8469 DECL_FIELD_CONTEXT (f_ovf) = record;
8470 DECL_FIELD_CONTEXT (f_sav) = record;
8471
8472 TYPE_STUB_DECL (record) = type_decl;
8473 TYPE_NAME (record) = type_decl;
8474 TYPE_FIELDS (record) = f_gpr;
8475 DECL_CHAIN (f_gpr) = f_fpr;
8476 DECL_CHAIN (f_fpr) = f_ovf;
8477 DECL_CHAIN (f_ovf) = f_sav;
8478
8479 layout_type (record);
8480
8481 /* The correct type is an array type of one element. */
8482 return build_array_type (record, build_index_type (size_zero_node));
8483 }
8484
8485 /* Setup the builtin va_list data type and for 64-bit the additional
8486 calling convention specific va_list data types. */
8487
8488 static tree
8489 ix86_build_builtin_va_list (void)
8490 {
8491 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8492
8493 /* Initialize abi specific va_list builtin types. */
8494 if (TARGET_64BIT)
8495 {
8496 tree t;
8497 if (ix86_abi == MS_ABI)
8498 {
8499 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8500 if (TREE_CODE (t) != RECORD_TYPE)
8501 t = build_variant_type_copy (t);
8502 sysv_va_list_type_node = t;
8503 }
8504 else
8505 {
8506 t = ret;
8507 if (TREE_CODE (t) != RECORD_TYPE)
8508 t = build_variant_type_copy (t);
8509 sysv_va_list_type_node = t;
8510 }
8511 if (ix86_abi != MS_ABI)
8512 {
8513 t = ix86_build_builtin_va_list_abi (MS_ABI);
8514 if (TREE_CODE (t) != RECORD_TYPE)
8515 t = build_variant_type_copy (t);
8516 ms_va_list_type_node = t;
8517 }
8518 else
8519 {
8520 t = ret;
8521 if (TREE_CODE (t) != RECORD_TYPE)
8522 t = build_variant_type_copy (t);
8523 ms_va_list_type_node = t;
8524 }
8525 }
8526
8527 return ret;
8528 }
8529
8530 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8531
8532 static void
8533 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8534 {
8535 rtx save_area, mem;
8536 alias_set_type set;
8537 int i, max;
8538
8539 /* GPR size of varargs save area. */
8540 if (cfun->va_list_gpr_size)
8541 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8542 else
8543 ix86_varargs_gpr_size = 0;
8544
8545 /* FPR size of varargs save area. We don't need it if we don't pass
8546 anything in SSE registers. */
8547 if (TARGET_SSE && cfun->va_list_fpr_size)
8548 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8549 else
8550 ix86_varargs_fpr_size = 0;
8551
8552 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8553 return;
8554
8555 save_area = frame_pointer_rtx;
8556 set = get_varargs_alias_set ();
8557
8558 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8559 if (max > X86_64_REGPARM_MAX)
8560 max = X86_64_REGPARM_MAX;
8561
8562 for (i = cum->regno; i < max; i++)
8563 {
8564 mem = gen_rtx_MEM (word_mode,
8565 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8566 MEM_NOTRAP_P (mem) = 1;
8567 set_mem_alias_set (mem, set);
8568 emit_move_insn (mem,
8569 gen_rtx_REG (word_mode,
8570 x86_64_int_parameter_registers[i]));
8571 }
8572
8573 if (ix86_varargs_fpr_size)
8574 {
8575 machine_mode smode;
8576 rtx_code_label *label;
8577 rtx test;
8578
8579 /* Now emit code to save SSE registers. The AX parameter contains number
8580 of SSE parameter registers used to call this function, though all we
8581 actually check here is the zero/non-zero status. */
8582
8583 label = gen_label_rtx ();
8584 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8585 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8586 label));
8587
8588 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8589 we used movdqa (i.e. TImode) instead? Perhaps even better would
8590 be if we could determine the real mode of the data, via a hook
8591 into pass_stdarg. Ignore all that for now. */
8592 smode = V4SFmode;
8593 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8594 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8595
8596 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8597 if (max > X86_64_SSE_REGPARM_MAX)
8598 max = X86_64_SSE_REGPARM_MAX;
8599
8600 for (i = cum->sse_regno; i < max; ++i)
8601 {
8602 mem = plus_constant (Pmode, save_area,
8603 i * 16 + ix86_varargs_gpr_size);
8604 mem = gen_rtx_MEM (smode, mem);
8605 MEM_NOTRAP_P (mem) = 1;
8606 set_mem_alias_set (mem, set);
8607 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8608
8609 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8610 }
8611
8612 emit_label (label);
8613 }
8614 }
8615
8616 static void
8617 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8618 {
8619 alias_set_type set = get_varargs_alias_set ();
8620 int i;
8621
8622 /* Reset to zero, as there might be a sysv vaarg used
8623 before. */
8624 ix86_varargs_gpr_size = 0;
8625 ix86_varargs_fpr_size = 0;
8626
8627 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8628 {
8629 rtx reg, mem;
8630
8631 mem = gen_rtx_MEM (Pmode,
8632 plus_constant (Pmode, virtual_incoming_args_rtx,
8633 i * UNITS_PER_WORD));
8634 MEM_NOTRAP_P (mem) = 1;
8635 set_mem_alias_set (mem, set);
8636
8637 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8638 emit_move_insn (mem, reg);
8639 }
8640 }
8641
8642 static void
8643 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8644 tree type, int *, int no_rtl)
8645 {
8646 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8647 CUMULATIVE_ARGS next_cum;
8648 tree fntype;
8649
8650 /* This argument doesn't appear to be used anymore. Which is good,
8651 because the old code here didn't suppress rtl generation. */
8652 gcc_assert (!no_rtl);
8653
8654 if (!TARGET_64BIT)
8655 return;
8656
8657 fntype = TREE_TYPE (current_function_decl);
8658
8659 /* For varargs, we do not want to skip the dummy va_dcl argument.
8660 For stdargs, we do want to skip the last named argument. */
8661 next_cum = *cum;
8662 if (stdarg_p (fntype))
8663 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8664 true);
8665
8666 if (cum->call_abi == MS_ABI)
8667 setup_incoming_varargs_ms_64 (&next_cum);
8668 else
8669 setup_incoming_varargs_64 (&next_cum);
8670 }
8671
8672 static void
8673 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8674 enum machine_mode mode,
8675 tree type,
8676 int *pretend_size ATTRIBUTE_UNUSED,
8677 int no_rtl)
8678 {
8679 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8680 CUMULATIVE_ARGS next_cum;
8681 tree fntype;
8682 rtx save_area;
8683 int bnd_reg, i, max;
8684
8685 gcc_assert (!no_rtl);
8686
8687 /* Do nothing if we use plain pointer to argument area. */
8688 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8689 return;
8690
8691 fntype = TREE_TYPE (current_function_decl);
8692
8693 /* For varargs, we do not want to skip the dummy va_dcl argument.
8694 For stdargs, we do want to skip the last named argument. */
8695 next_cum = *cum;
8696 if (stdarg_p (fntype))
8697 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8698 true);
8699 save_area = frame_pointer_rtx;
8700
8701 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8702 if (max > X86_64_REGPARM_MAX)
8703 max = X86_64_REGPARM_MAX;
8704
8705 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8706 if (chkp_function_instrumented_p (current_function_decl))
8707 for (i = cum->regno; i < max; i++)
8708 {
8709 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8710 rtx reg = gen_rtx_REG (DImode,
8711 x86_64_int_parameter_registers[i]);
8712 rtx ptr = reg;
8713 rtx bounds;
8714
8715 if (bnd_reg <= LAST_BND_REG)
8716 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8717 else
8718 {
8719 rtx ldx_addr =
8720 plus_constant (Pmode, arg_pointer_rtx,
8721 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8722 bounds = gen_reg_rtx (BNDmode);
8723 emit_insn (BNDmode == BND64mode
8724 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8725 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8726 }
8727
8728 emit_insn (BNDmode == BND64mode
8729 ? gen_bnd64_stx (addr, ptr, bounds)
8730 : gen_bnd32_stx (addr, ptr, bounds));
8731
8732 bnd_reg++;
8733 }
8734 }
8735
8736
8737 /* Checks if TYPE is of kind va_list char *. */
8738
8739 static bool
8740 is_va_list_char_pointer (tree type)
8741 {
8742 tree canonic;
8743
8744 /* For 32-bit it is always true. */
8745 if (!TARGET_64BIT)
8746 return true;
8747 canonic = ix86_canonical_va_list_type (type);
8748 return (canonic == ms_va_list_type_node
8749 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8750 }
8751
8752 /* Implement va_start. */
8753
8754 static void
8755 ix86_va_start (tree valist, rtx nextarg)
8756 {
8757 HOST_WIDE_INT words, n_gpr, n_fpr;
8758 tree f_gpr, f_fpr, f_ovf, f_sav;
8759 tree gpr, fpr, ovf, sav, t;
8760 tree type;
8761 rtx ovf_rtx;
8762
8763 if (flag_split_stack
8764 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8765 {
8766 unsigned int scratch_regno;
8767
8768 /* When we are splitting the stack, we can't refer to the stack
8769 arguments using internal_arg_pointer, because they may be on
8770 the old stack. The split stack prologue will arrange to
8771 leave a pointer to the old stack arguments in a scratch
8772 register, which we here copy to a pseudo-register. The split
8773 stack prologue can't set the pseudo-register directly because
8774 it (the prologue) runs before any registers have been saved. */
8775
8776 scratch_regno = split_stack_prologue_scratch_regno ();
8777 if (scratch_regno != INVALID_REGNUM)
8778 {
8779 rtx reg;
8780 rtx_insn *seq;
8781
8782 reg = gen_reg_rtx (Pmode);
8783 cfun->machine->split_stack_varargs_pointer = reg;
8784
8785 start_sequence ();
8786 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8787 seq = get_insns ();
8788 end_sequence ();
8789
8790 push_topmost_sequence ();
8791 emit_insn_after (seq, entry_of_function ());
8792 pop_topmost_sequence ();
8793 }
8794 }
8795
8796 /* Only 64bit target needs something special. */
8797 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8798 {
8799 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8800 std_expand_builtin_va_start (valist, nextarg);
8801 else
8802 {
8803 rtx va_r, next;
8804
8805 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8806 next = expand_binop (ptr_mode, add_optab,
8807 cfun->machine->split_stack_varargs_pointer,
8808 crtl->args.arg_offset_rtx,
8809 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8810 convert_move (va_r, next, 0);
8811
8812 /* Store zero bounds for va_list. */
8813 if (chkp_function_instrumented_p (current_function_decl))
8814 chkp_expand_bounds_reset_for_mem (valist,
8815 make_tree (TREE_TYPE (valist),
8816 next));
8817
8818 }
8819 return;
8820 }
8821
8822 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8823 f_fpr = DECL_CHAIN (f_gpr);
8824 f_ovf = DECL_CHAIN (f_fpr);
8825 f_sav = DECL_CHAIN (f_ovf);
8826
8827 valist = build_simple_mem_ref (valist);
8828 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8829 /* The following should be folded into the MEM_REF offset. */
8830 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8831 f_gpr, NULL_TREE);
8832 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8833 f_fpr, NULL_TREE);
8834 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8835 f_ovf, NULL_TREE);
8836 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8837 f_sav, NULL_TREE);
8838
8839 /* Count number of gp and fp argument registers used. */
8840 words = crtl->args.info.words;
8841 n_gpr = crtl->args.info.regno;
8842 n_fpr = crtl->args.info.sse_regno;
8843
8844 if (cfun->va_list_gpr_size)
8845 {
8846 type = TREE_TYPE (gpr);
8847 t = build2 (MODIFY_EXPR, type,
8848 gpr, build_int_cst (type, n_gpr * 8));
8849 TREE_SIDE_EFFECTS (t) = 1;
8850 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8851 }
8852
8853 if (TARGET_SSE && cfun->va_list_fpr_size)
8854 {
8855 type = TREE_TYPE (fpr);
8856 t = build2 (MODIFY_EXPR, type, fpr,
8857 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8858 TREE_SIDE_EFFECTS (t) = 1;
8859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8860 }
8861
8862 /* Find the overflow area. */
8863 type = TREE_TYPE (ovf);
8864 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8865 ovf_rtx = crtl->args.internal_arg_pointer;
8866 else
8867 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8868 t = make_tree (type, ovf_rtx);
8869 if (words != 0)
8870 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8871
8872 /* Store zero bounds for overflow area pointer. */
8873 if (chkp_function_instrumented_p (current_function_decl))
8874 chkp_expand_bounds_reset_for_mem (ovf, t);
8875
8876 t = build2 (MODIFY_EXPR, type, ovf, t);
8877 TREE_SIDE_EFFECTS (t) = 1;
8878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8879
8880 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8881 {
8882 /* Find the register save area.
8883 Prologue of the function save it right above stack frame. */
8884 type = TREE_TYPE (sav);
8885 t = make_tree (type, frame_pointer_rtx);
8886 if (!ix86_varargs_gpr_size)
8887 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8888
8889 /* Store zero bounds for save area pointer. */
8890 if (chkp_function_instrumented_p (current_function_decl))
8891 chkp_expand_bounds_reset_for_mem (sav, t);
8892
8893 t = build2 (MODIFY_EXPR, type, sav, t);
8894 TREE_SIDE_EFFECTS (t) = 1;
8895 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8896 }
8897 }
8898
8899 /* Implement va_arg. */
8900
8901 static tree
8902 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8903 gimple_seq *post_p)
8904 {
8905 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8906 tree f_gpr, f_fpr, f_ovf, f_sav;
8907 tree gpr, fpr, ovf, sav, t;
8908 int size, rsize;
8909 tree lab_false, lab_over = NULL_TREE;
8910 tree addr, t2;
8911 rtx container;
8912 int indirect_p = 0;
8913 tree ptrtype;
8914 machine_mode nat_mode;
8915 unsigned int arg_boundary;
8916
8917 /* Only 64bit target needs something special. */
8918 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8919 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8920
8921 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8922 f_fpr = DECL_CHAIN (f_gpr);
8923 f_ovf = DECL_CHAIN (f_fpr);
8924 f_sav = DECL_CHAIN (f_ovf);
8925
8926 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8927 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8928 valist = build_va_arg_indirect_ref (valist);
8929 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8930 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8931 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8932
8933 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8934 if (indirect_p)
8935 type = build_pointer_type (type);
8936 size = int_size_in_bytes (type);
8937 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8938
8939 nat_mode = type_natural_mode (type, NULL, false);
8940 switch (nat_mode)
8941 {
8942 case V8SFmode:
8943 case V8SImode:
8944 case V32QImode:
8945 case V16HImode:
8946 case V4DFmode:
8947 case V4DImode:
8948 case V16SFmode:
8949 case V16SImode:
8950 case V64QImode:
8951 case V32HImode:
8952 case V8DFmode:
8953 case V8DImode:
8954 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8955 if (!TARGET_64BIT_MS_ABI)
8956 {
8957 container = NULL;
8958 break;
8959 }
8960
8961 default:
8962 container = construct_container (nat_mode, TYPE_MODE (type),
8963 type, 0, X86_64_REGPARM_MAX,
8964 X86_64_SSE_REGPARM_MAX, intreg,
8965 0);
8966 break;
8967 }
8968
8969 /* Pull the value out of the saved registers. */
8970
8971 addr = create_tmp_var (ptr_type_node, "addr");
8972
8973 if (container)
8974 {
8975 int needed_intregs, needed_sseregs;
8976 bool need_temp;
8977 tree int_addr, sse_addr;
8978
8979 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8980 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8981
8982 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8983
8984 need_temp = (!REG_P (container)
8985 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8986 || TYPE_ALIGN (type) > 128));
8987
8988 /* In case we are passing structure, verify that it is consecutive block
8989 on the register save area. If not we need to do moves. */
8990 if (!need_temp && !REG_P (container))
8991 {
8992 /* Verify that all registers are strictly consecutive */
8993 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8994 {
8995 int i;
8996
8997 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8998 {
8999 rtx slot = XVECEXP (container, 0, i);
9000 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9001 || INTVAL (XEXP (slot, 1)) != i * 16)
9002 need_temp = 1;
9003 }
9004 }
9005 else
9006 {
9007 int i;
9008
9009 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9010 {
9011 rtx slot = XVECEXP (container, 0, i);
9012 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9013 || INTVAL (XEXP (slot, 1)) != i * 8)
9014 need_temp = 1;
9015 }
9016 }
9017 }
9018 if (!need_temp)
9019 {
9020 int_addr = addr;
9021 sse_addr = addr;
9022 }
9023 else
9024 {
9025 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9026 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9027 }
9028
9029 /* First ensure that we fit completely in registers. */
9030 if (needed_intregs)
9031 {
9032 t = build_int_cst (TREE_TYPE (gpr),
9033 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9034 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9035 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9036 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9037 gimplify_and_add (t, pre_p);
9038 }
9039 if (needed_sseregs)
9040 {
9041 t = build_int_cst (TREE_TYPE (fpr),
9042 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9043 + X86_64_REGPARM_MAX * 8);
9044 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9045 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9046 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9047 gimplify_and_add (t, pre_p);
9048 }
9049
9050 /* Compute index to start of area used for integer regs. */
9051 if (needed_intregs)
9052 {
9053 /* int_addr = gpr + sav; */
9054 t = fold_build_pointer_plus (sav, gpr);
9055 gimplify_assign (int_addr, t, pre_p);
9056 }
9057 if (needed_sseregs)
9058 {
9059 /* sse_addr = fpr + sav; */
9060 t = fold_build_pointer_plus (sav, fpr);
9061 gimplify_assign (sse_addr, t, pre_p);
9062 }
9063 if (need_temp)
9064 {
9065 int i, prev_size = 0;
9066 tree temp = create_tmp_var (type, "va_arg_tmp");
9067
9068 /* addr = &temp; */
9069 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9070 gimplify_assign (addr, t, pre_p);
9071
9072 for (i = 0; i < XVECLEN (container, 0); i++)
9073 {
9074 rtx slot = XVECEXP (container, 0, i);
9075 rtx reg = XEXP (slot, 0);
9076 machine_mode mode = GET_MODE (reg);
9077 tree piece_type;
9078 tree addr_type;
9079 tree daddr_type;
9080 tree src_addr, src;
9081 int src_offset;
9082 tree dest_addr, dest;
9083 int cur_size = GET_MODE_SIZE (mode);
9084
9085 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9086 prev_size = INTVAL (XEXP (slot, 1));
9087 if (prev_size + cur_size > size)
9088 {
9089 cur_size = size - prev_size;
9090 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9091 if (mode == BLKmode)
9092 mode = QImode;
9093 }
9094 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9095 if (mode == GET_MODE (reg))
9096 addr_type = build_pointer_type (piece_type);
9097 else
9098 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9099 true);
9100 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9101 true);
9102
9103 if (SSE_REGNO_P (REGNO (reg)))
9104 {
9105 src_addr = sse_addr;
9106 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9107 }
9108 else
9109 {
9110 src_addr = int_addr;
9111 src_offset = REGNO (reg) * 8;
9112 }
9113 src_addr = fold_convert (addr_type, src_addr);
9114 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9115
9116 dest_addr = fold_convert (daddr_type, addr);
9117 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9118 if (cur_size == GET_MODE_SIZE (mode))
9119 {
9120 src = build_va_arg_indirect_ref (src_addr);
9121 dest = build_va_arg_indirect_ref (dest_addr);
9122
9123 gimplify_assign (dest, src, pre_p);
9124 }
9125 else
9126 {
9127 tree copy
9128 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9129 3, dest_addr, src_addr,
9130 size_int (cur_size));
9131 gimplify_and_add (copy, pre_p);
9132 }
9133 prev_size += cur_size;
9134 }
9135 }
9136
9137 if (needed_intregs)
9138 {
9139 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9140 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9141 gimplify_assign (gpr, t, pre_p);
9142 }
9143
9144 if (needed_sseregs)
9145 {
9146 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9147 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9148 gimplify_assign (fpr, t, pre_p);
9149 }
9150
9151 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9152
9153 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9154 }
9155
9156 /* ... otherwise out of the overflow area. */
9157
9158 /* When we align parameter on stack for caller, if the parameter
9159 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9160 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9161 here with caller. */
9162 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9163 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9164 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9165
9166 /* Care for on-stack alignment if needed. */
9167 if (arg_boundary <= 64 || size == 0)
9168 t = ovf;
9169 else
9170 {
9171 HOST_WIDE_INT align = arg_boundary / 8;
9172 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9173 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9174 build_int_cst (TREE_TYPE (t), -align));
9175 }
9176
9177 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9178 gimplify_assign (addr, t, pre_p);
9179
9180 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9181 gimplify_assign (unshare_expr (ovf), t, pre_p);
9182
9183 if (container)
9184 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9185
9186 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9187 addr = fold_convert (ptrtype, addr);
9188
9189 if (indirect_p)
9190 addr = build_va_arg_indirect_ref (addr);
9191 return build_va_arg_indirect_ref (addr);
9192 }
9193 \f
9194 /* Return true if OPNUM's MEM should be matched
9195 in movabs* patterns. */
9196
9197 bool
9198 ix86_check_movabs (rtx insn, int opnum)
9199 {
9200 rtx set, mem;
9201
9202 set = PATTERN (insn);
9203 if (GET_CODE (set) == PARALLEL)
9204 set = XVECEXP (set, 0, 0);
9205 gcc_assert (GET_CODE (set) == SET);
9206 mem = XEXP (set, opnum);
9207 while (GET_CODE (mem) == SUBREG)
9208 mem = SUBREG_REG (mem);
9209 gcc_assert (MEM_P (mem));
9210 return volatile_ok || !MEM_VOLATILE_P (mem);
9211 }
9212 \f
9213 /* Initialize the table of extra 80387 mathematical constants. */
9214
9215 static void
9216 init_ext_80387_constants (void)
9217 {
9218 static const char * cst[5] =
9219 {
9220 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9221 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9222 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9223 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9224 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9225 };
9226 int i;
9227
9228 for (i = 0; i < 5; i++)
9229 {
9230 real_from_string (&ext_80387_constants_table[i], cst[i]);
9231 /* Ensure each constant is rounded to XFmode precision. */
9232 real_convert (&ext_80387_constants_table[i],
9233 XFmode, &ext_80387_constants_table[i]);
9234 }
9235
9236 ext_80387_constants_init = 1;
9237 }
9238
9239 /* Return non-zero if the constant is something that
9240 can be loaded with a special instruction. */
9241
9242 int
9243 standard_80387_constant_p (rtx x)
9244 {
9245 machine_mode mode = GET_MODE (x);
9246
9247 REAL_VALUE_TYPE r;
9248
9249 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9250 return -1;
9251
9252 if (x == CONST0_RTX (mode))
9253 return 1;
9254 if (x == CONST1_RTX (mode))
9255 return 2;
9256
9257 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9258
9259 /* For XFmode constants, try to find a special 80387 instruction when
9260 optimizing for size or on those CPUs that benefit from them. */
9261 if (mode == XFmode
9262 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9263 {
9264 int i;
9265
9266 if (! ext_80387_constants_init)
9267 init_ext_80387_constants ();
9268
9269 for (i = 0; i < 5; i++)
9270 if (real_identical (&r, &ext_80387_constants_table[i]))
9271 return i + 3;
9272 }
9273
9274 /* Load of the constant -0.0 or -1.0 will be split as
9275 fldz;fchs or fld1;fchs sequence. */
9276 if (real_isnegzero (&r))
9277 return 8;
9278 if (real_identical (&r, &dconstm1))
9279 return 9;
9280
9281 return 0;
9282 }
9283
9284 /* Return the opcode of the special instruction to be used to load
9285 the constant X. */
9286
9287 const char *
9288 standard_80387_constant_opcode (rtx x)
9289 {
9290 switch (standard_80387_constant_p (x))
9291 {
9292 case 1:
9293 return "fldz";
9294 case 2:
9295 return "fld1";
9296 case 3:
9297 return "fldlg2";
9298 case 4:
9299 return "fldln2";
9300 case 5:
9301 return "fldl2e";
9302 case 6:
9303 return "fldl2t";
9304 case 7:
9305 return "fldpi";
9306 case 8:
9307 case 9:
9308 return "#";
9309 default:
9310 gcc_unreachable ();
9311 }
9312 }
9313
9314 /* Return the CONST_DOUBLE representing the 80387 constant that is
9315 loaded by the specified special instruction. The argument IDX
9316 matches the return value from standard_80387_constant_p. */
9317
9318 rtx
9319 standard_80387_constant_rtx (int idx)
9320 {
9321 int i;
9322
9323 if (! ext_80387_constants_init)
9324 init_ext_80387_constants ();
9325
9326 switch (idx)
9327 {
9328 case 3:
9329 case 4:
9330 case 5:
9331 case 6:
9332 case 7:
9333 i = idx - 3;
9334 break;
9335
9336 default:
9337 gcc_unreachable ();
9338 }
9339
9340 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9341 XFmode);
9342 }
9343
9344 /* Return 1 if X is all 0s and 2 if x is all 1s
9345 in supported SSE/AVX vector mode. */
9346
9347 int
9348 standard_sse_constant_p (rtx x)
9349 {
9350 machine_mode mode = GET_MODE (x);
9351
9352 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9353 return 1;
9354 if (vector_all_ones_operand (x, mode))
9355 switch (mode)
9356 {
9357 case V16QImode:
9358 case V8HImode:
9359 case V4SImode:
9360 case V2DImode:
9361 if (TARGET_SSE2)
9362 return 2;
9363 case V32QImode:
9364 case V16HImode:
9365 case V8SImode:
9366 case V4DImode:
9367 if (TARGET_AVX2)
9368 return 2;
9369 case V64QImode:
9370 case V32HImode:
9371 case V16SImode:
9372 case V8DImode:
9373 if (TARGET_AVX512F)
9374 return 2;
9375 default:
9376 break;
9377 }
9378
9379 return 0;
9380 }
9381
9382 /* Return the opcode of the special instruction to be used to load
9383 the constant X. */
9384
9385 const char *
9386 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9387 {
9388 switch (standard_sse_constant_p (x))
9389 {
9390 case 1:
9391 switch (get_attr_mode (insn))
9392 {
9393 case MODE_XI:
9394 return "vpxord\t%g0, %g0, %g0";
9395 case MODE_V16SF:
9396 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9397 : "vpxord\t%g0, %g0, %g0";
9398 case MODE_V8DF:
9399 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9400 : "vpxorq\t%g0, %g0, %g0";
9401 case MODE_TI:
9402 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9403 : "%vpxor\t%0, %d0";
9404 case MODE_V2DF:
9405 return "%vxorpd\t%0, %d0";
9406 case MODE_V4SF:
9407 return "%vxorps\t%0, %d0";
9408
9409 case MODE_OI:
9410 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9411 : "vpxor\t%x0, %x0, %x0";
9412 case MODE_V4DF:
9413 return "vxorpd\t%x0, %x0, %x0";
9414 case MODE_V8SF:
9415 return "vxorps\t%x0, %x0, %x0";
9416
9417 default:
9418 break;
9419 }
9420
9421 case 2:
9422 if (TARGET_AVX512VL
9423 || get_attr_mode (insn) == MODE_XI
9424 || get_attr_mode (insn) == MODE_V8DF
9425 || get_attr_mode (insn) == MODE_V16SF)
9426 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9427 if (TARGET_AVX)
9428 return "vpcmpeqd\t%0, %0, %0";
9429 else
9430 return "pcmpeqd\t%0, %0";
9431
9432 default:
9433 break;
9434 }
9435 gcc_unreachable ();
9436 }
9437
9438 /* Returns true if OP contains a symbol reference */
9439
9440 bool
9441 symbolic_reference_mentioned_p (rtx op)
9442 {
9443 const char *fmt;
9444 int i;
9445
9446 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9447 return true;
9448
9449 fmt = GET_RTX_FORMAT (GET_CODE (op));
9450 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9451 {
9452 if (fmt[i] == 'E')
9453 {
9454 int j;
9455
9456 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9457 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9458 return true;
9459 }
9460
9461 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9462 return true;
9463 }
9464
9465 return false;
9466 }
9467
9468 /* Return true if it is appropriate to emit `ret' instructions in the
9469 body of a function. Do this only if the epilogue is simple, needing a
9470 couple of insns. Prior to reloading, we can't tell how many registers
9471 must be saved, so return false then. Return false if there is no frame
9472 marker to de-allocate. */
9473
9474 bool
9475 ix86_can_use_return_insn_p (void)
9476 {
9477 struct ix86_frame frame;
9478
9479 if (! reload_completed || frame_pointer_needed)
9480 return 0;
9481
9482 /* Don't allow more than 32k pop, since that's all we can do
9483 with one instruction. */
9484 if (crtl->args.pops_args && crtl->args.size >= 32768)
9485 return 0;
9486
9487 ix86_compute_frame_layout (&frame);
9488 return (frame.stack_pointer_offset == UNITS_PER_WORD
9489 && (frame.nregs + frame.nsseregs) == 0);
9490 }
9491 \f
9492 /* Value should be nonzero if functions must have frame pointers.
9493 Zero means the frame pointer need not be set up (and parms may
9494 be accessed via the stack pointer) in functions that seem suitable. */
9495
9496 static bool
9497 ix86_frame_pointer_required (void)
9498 {
9499 /* If we accessed previous frames, then the generated code expects
9500 to be able to access the saved ebp value in our frame. */
9501 if (cfun->machine->accesses_prev_frame)
9502 return true;
9503
9504 /* Several x86 os'es need a frame pointer for other reasons,
9505 usually pertaining to setjmp. */
9506 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9507 return true;
9508
9509 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9510 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9511 return true;
9512
9513 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9514 allocation is 4GB. */
9515 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9516 return true;
9517
9518 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9519 turns off the frame pointer by default. Turn it back on now if
9520 we've not got a leaf function. */
9521 if (TARGET_OMIT_LEAF_FRAME_POINTER
9522 && (!crtl->is_leaf
9523 || ix86_current_function_calls_tls_descriptor))
9524 return true;
9525
9526 if (crtl->profile && !flag_fentry)
9527 return true;
9528
9529 return false;
9530 }
9531
9532 /* Record that the current function accesses previous call frames. */
9533
9534 void
9535 ix86_setup_frame_addresses (void)
9536 {
9537 cfun->machine->accesses_prev_frame = 1;
9538 }
9539 \f
9540 #ifndef USE_HIDDEN_LINKONCE
9541 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9542 # define USE_HIDDEN_LINKONCE 1
9543 # else
9544 # define USE_HIDDEN_LINKONCE 0
9545 # endif
9546 #endif
9547
9548 static int pic_labels_used;
9549
9550 /* Fills in the label name that should be used for a pc thunk for
9551 the given register. */
9552
9553 static void
9554 get_pc_thunk_name (char name[32], unsigned int regno)
9555 {
9556 gcc_assert (!TARGET_64BIT);
9557
9558 if (USE_HIDDEN_LINKONCE)
9559 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9560 else
9561 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9562 }
9563
9564
9565 /* This function generates code for -fpic that loads %ebx with
9566 the return address of the caller and then returns. */
9567
9568 static void
9569 ix86_code_end (void)
9570 {
9571 rtx xops[2];
9572 int regno;
9573
9574 for (regno = AX_REG; regno <= SP_REG; regno++)
9575 {
9576 char name[32];
9577 tree decl;
9578
9579 if (!(pic_labels_used & (1 << regno)))
9580 continue;
9581
9582 get_pc_thunk_name (name, regno);
9583
9584 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9585 get_identifier (name),
9586 build_function_type_list (void_type_node, NULL_TREE));
9587 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9588 NULL_TREE, void_type_node);
9589 TREE_PUBLIC (decl) = 1;
9590 TREE_STATIC (decl) = 1;
9591 DECL_IGNORED_P (decl) = 1;
9592
9593 #if TARGET_MACHO
9594 if (TARGET_MACHO)
9595 {
9596 switch_to_section (darwin_sections[text_coal_section]);
9597 fputs ("\t.weak_definition\t", asm_out_file);
9598 assemble_name (asm_out_file, name);
9599 fputs ("\n\t.private_extern\t", asm_out_file);
9600 assemble_name (asm_out_file, name);
9601 putc ('\n', asm_out_file);
9602 ASM_OUTPUT_LABEL (asm_out_file, name);
9603 DECL_WEAK (decl) = 1;
9604 }
9605 else
9606 #endif
9607 if (USE_HIDDEN_LINKONCE)
9608 {
9609 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9610
9611 targetm.asm_out.unique_section (decl, 0);
9612 switch_to_section (get_named_section (decl, NULL, 0));
9613
9614 targetm.asm_out.globalize_label (asm_out_file, name);
9615 fputs ("\t.hidden\t", asm_out_file);
9616 assemble_name (asm_out_file, name);
9617 putc ('\n', asm_out_file);
9618 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9619 }
9620 else
9621 {
9622 switch_to_section (text_section);
9623 ASM_OUTPUT_LABEL (asm_out_file, name);
9624 }
9625
9626 DECL_INITIAL (decl) = make_node (BLOCK);
9627 current_function_decl = decl;
9628 init_function_start (decl);
9629 first_function_block_is_cold = false;
9630 /* Make sure unwind info is emitted for the thunk if needed. */
9631 final_start_function (emit_barrier (), asm_out_file, 1);
9632
9633 /* Pad stack IP move with 4 instructions (two NOPs count
9634 as one instruction). */
9635 if (TARGET_PAD_SHORT_FUNCTION)
9636 {
9637 int i = 8;
9638
9639 while (i--)
9640 fputs ("\tnop\n", asm_out_file);
9641 }
9642
9643 xops[0] = gen_rtx_REG (Pmode, regno);
9644 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9645 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9646 output_asm_insn ("%!ret", NULL);
9647 final_end_function ();
9648 init_insn_lengths ();
9649 free_after_compilation (cfun);
9650 set_cfun (NULL);
9651 current_function_decl = NULL;
9652 }
9653
9654 if (flag_split_stack)
9655 file_end_indicate_split_stack ();
9656 }
9657
9658 /* Emit code for the SET_GOT patterns. */
9659
9660 const char *
9661 output_set_got (rtx dest, rtx label)
9662 {
9663 rtx xops[3];
9664
9665 xops[0] = dest;
9666
9667 if (TARGET_VXWORKS_RTP && flag_pic)
9668 {
9669 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9670 xops[2] = gen_rtx_MEM (Pmode,
9671 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9672 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9673
9674 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9675 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9676 an unadorned address. */
9677 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9678 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9679 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9680 return "";
9681 }
9682
9683 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9684
9685 if (!flag_pic)
9686 {
9687 if (TARGET_MACHO)
9688 /* We don't need a pic base, we're not producing pic. */
9689 gcc_unreachable ();
9690
9691 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9692 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9693 targetm.asm_out.internal_label (asm_out_file, "L",
9694 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9695 }
9696 else
9697 {
9698 char name[32];
9699 get_pc_thunk_name (name, REGNO (dest));
9700 pic_labels_used |= 1 << REGNO (dest);
9701
9702 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9703 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9704 output_asm_insn ("%!call\t%X2", xops);
9705
9706 #if TARGET_MACHO
9707 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9708 This is what will be referenced by the Mach-O PIC subsystem. */
9709 if (machopic_should_output_picbase_label () || !label)
9710 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9711
9712 /* When we are restoring the pic base at the site of a nonlocal label,
9713 and we decided to emit the pic base above, we will still output a
9714 local label used for calculating the correction offset (even though
9715 the offset will be 0 in that case). */
9716 if (label)
9717 targetm.asm_out.internal_label (asm_out_file, "L",
9718 CODE_LABEL_NUMBER (label));
9719 #endif
9720 }
9721
9722 if (!TARGET_MACHO)
9723 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9724
9725 return "";
9726 }
9727
9728 /* Generate an "push" pattern for input ARG. */
9729
9730 static rtx
9731 gen_push (rtx arg)
9732 {
9733 struct machine_function *m = cfun->machine;
9734
9735 if (m->fs.cfa_reg == stack_pointer_rtx)
9736 m->fs.cfa_offset += UNITS_PER_WORD;
9737 m->fs.sp_offset += UNITS_PER_WORD;
9738
9739 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9740 arg = gen_rtx_REG (word_mode, REGNO (arg));
9741
9742 return gen_rtx_SET (VOIDmode,
9743 gen_rtx_MEM (word_mode,
9744 gen_rtx_PRE_DEC (Pmode,
9745 stack_pointer_rtx)),
9746 arg);
9747 }
9748
9749 /* Generate an "pop" pattern for input ARG. */
9750
9751 static rtx
9752 gen_pop (rtx arg)
9753 {
9754 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9755 arg = gen_rtx_REG (word_mode, REGNO (arg));
9756
9757 return gen_rtx_SET (VOIDmode,
9758 arg,
9759 gen_rtx_MEM (word_mode,
9760 gen_rtx_POST_INC (Pmode,
9761 stack_pointer_rtx)));
9762 }
9763
9764 /* Return >= 0 if there is an unused call-clobbered register available
9765 for the entire function. */
9766
9767 static unsigned int
9768 ix86_select_alt_pic_regnum (void)
9769 {
9770 if (ix86_use_pseudo_pic_reg ())
9771 return INVALID_REGNUM;
9772
9773 if (crtl->is_leaf
9774 && !crtl->profile
9775 && !ix86_current_function_calls_tls_descriptor)
9776 {
9777 int i, drap;
9778 /* Can't use the same register for both PIC and DRAP. */
9779 if (crtl->drap_reg)
9780 drap = REGNO (crtl->drap_reg);
9781 else
9782 drap = -1;
9783 for (i = 2; i >= 0; --i)
9784 if (i != drap && !df_regs_ever_live_p (i))
9785 return i;
9786 }
9787
9788 return INVALID_REGNUM;
9789 }
9790
9791 /* Return TRUE if we need to save REGNO. */
9792
9793 static bool
9794 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9795 {
9796 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9797 && pic_offset_table_rtx)
9798 {
9799 if (ix86_use_pseudo_pic_reg ())
9800 {
9801 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9802 _mcount in prologue. */
9803 if (!TARGET_64BIT && flag_pic && crtl->profile)
9804 return true;
9805 }
9806 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9807 || crtl->profile
9808 || crtl->calls_eh_return
9809 || crtl->uses_const_pool
9810 || cfun->has_nonlocal_label)
9811 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9812 }
9813
9814 if (crtl->calls_eh_return && maybe_eh_return)
9815 {
9816 unsigned i;
9817 for (i = 0; ; i++)
9818 {
9819 unsigned test = EH_RETURN_DATA_REGNO (i);
9820 if (test == INVALID_REGNUM)
9821 break;
9822 if (test == regno)
9823 return true;
9824 }
9825 }
9826
9827 if (crtl->drap_reg
9828 && regno == REGNO (crtl->drap_reg)
9829 && !cfun->machine->no_drap_save_restore)
9830 return true;
9831
9832 return (df_regs_ever_live_p (regno)
9833 && !call_used_regs[regno]
9834 && !fixed_regs[regno]
9835 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9836 }
9837
9838 /* Return number of saved general prupose registers. */
9839
9840 static int
9841 ix86_nsaved_regs (void)
9842 {
9843 int nregs = 0;
9844 int regno;
9845
9846 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9847 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9848 nregs ++;
9849 return nregs;
9850 }
9851
9852 /* Return number of saved SSE registrers. */
9853
9854 static int
9855 ix86_nsaved_sseregs (void)
9856 {
9857 int nregs = 0;
9858 int regno;
9859
9860 if (!TARGET_64BIT_MS_ABI)
9861 return 0;
9862 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9863 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9864 nregs ++;
9865 return nregs;
9866 }
9867
9868 /* Given FROM and TO register numbers, say whether this elimination is
9869 allowed. If stack alignment is needed, we can only replace argument
9870 pointer with hard frame pointer, or replace frame pointer with stack
9871 pointer. Otherwise, frame pointer elimination is automatically
9872 handled and all other eliminations are valid. */
9873
9874 static bool
9875 ix86_can_eliminate (const int from, const int to)
9876 {
9877 if (stack_realign_fp)
9878 return ((from == ARG_POINTER_REGNUM
9879 && to == HARD_FRAME_POINTER_REGNUM)
9880 || (from == FRAME_POINTER_REGNUM
9881 && to == STACK_POINTER_REGNUM));
9882 else
9883 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9884 }
9885
9886 /* Return the offset between two registers, one to be eliminated, and the other
9887 its replacement, at the start of a routine. */
9888
9889 HOST_WIDE_INT
9890 ix86_initial_elimination_offset (int from, int to)
9891 {
9892 struct ix86_frame frame;
9893 ix86_compute_frame_layout (&frame);
9894
9895 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9896 return frame.hard_frame_pointer_offset;
9897 else if (from == FRAME_POINTER_REGNUM
9898 && to == HARD_FRAME_POINTER_REGNUM)
9899 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9900 else
9901 {
9902 gcc_assert (to == STACK_POINTER_REGNUM);
9903
9904 if (from == ARG_POINTER_REGNUM)
9905 return frame.stack_pointer_offset;
9906
9907 gcc_assert (from == FRAME_POINTER_REGNUM);
9908 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9909 }
9910 }
9911
9912 /* In a dynamically-aligned function, we can't know the offset from
9913 stack pointer to frame pointer, so we must ensure that setjmp
9914 eliminates fp against the hard fp (%ebp) rather than trying to
9915 index from %esp up to the top of the frame across a gap that is
9916 of unknown (at compile-time) size. */
9917 static rtx
9918 ix86_builtin_setjmp_frame_value (void)
9919 {
9920 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9921 }
9922
9923 /* When using -fsplit-stack, the allocation routines set a field in
9924 the TCB to the bottom of the stack plus this much space, measured
9925 in bytes. */
9926
9927 #define SPLIT_STACK_AVAILABLE 256
9928
9929 /* Fill structure ix86_frame about frame of currently computed function. */
9930
9931 static void
9932 ix86_compute_frame_layout (struct ix86_frame *frame)
9933 {
9934 unsigned HOST_WIDE_INT stack_alignment_needed;
9935 HOST_WIDE_INT offset;
9936 unsigned HOST_WIDE_INT preferred_alignment;
9937 HOST_WIDE_INT size = get_frame_size ();
9938 HOST_WIDE_INT to_allocate;
9939
9940 frame->nregs = ix86_nsaved_regs ();
9941 frame->nsseregs = ix86_nsaved_sseregs ();
9942
9943 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9944 function prologues and leaf. */
9945 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9946 && (!crtl->is_leaf || cfun->calls_alloca != 0
9947 || ix86_current_function_calls_tls_descriptor))
9948 {
9949 crtl->preferred_stack_boundary = 128;
9950 crtl->stack_alignment_needed = 128;
9951 }
9952 /* preferred_stack_boundary is never updated for call
9953 expanded from tls descriptor. Update it here. We don't update it in
9954 expand stage because according to the comments before
9955 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9956 away. */
9957 else if (ix86_current_function_calls_tls_descriptor
9958 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9959 {
9960 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9961 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9962 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9963 }
9964
9965 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9966 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9967
9968 gcc_assert (!size || stack_alignment_needed);
9969 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9970 gcc_assert (preferred_alignment <= stack_alignment_needed);
9971
9972 /* For SEH we have to limit the amount of code movement into the prologue.
9973 At present we do this via a BLOCKAGE, at which point there's very little
9974 scheduling that can be done, which means that there's very little point
9975 in doing anything except PUSHs. */
9976 if (TARGET_SEH)
9977 cfun->machine->use_fast_prologue_epilogue = false;
9978
9979 /* During reload iteration the amount of registers saved can change.
9980 Recompute the value as needed. Do not recompute when amount of registers
9981 didn't change as reload does multiple calls to the function and does not
9982 expect the decision to change within single iteration. */
9983 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9984 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9985 {
9986 int count = frame->nregs;
9987 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9988
9989 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9990
9991 /* The fast prologue uses move instead of push to save registers. This
9992 is significantly longer, but also executes faster as modern hardware
9993 can execute the moves in parallel, but can't do that for push/pop.
9994
9995 Be careful about choosing what prologue to emit: When function takes
9996 many instructions to execute we may use slow version as well as in
9997 case function is known to be outside hot spot (this is known with
9998 feedback only). Weight the size of function by number of registers
9999 to save as it is cheap to use one or two push instructions but very
10000 slow to use many of them. */
10001 if (count)
10002 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10003 if (node->frequency < NODE_FREQUENCY_NORMAL
10004 || (flag_branch_probabilities
10005 && node->frequency < NODE_FREQUENCY_HOT))
10006 cfun->machine->use_fast_prologue_epilogue = false;
10007 else
10008 cfun->machine->use_fast_prologue_epilogue
10009 = !expensive_function_p (count);
10010 }
10011
10012 frame->save_regs_using_mov
10013 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10014 /* If static stack checking is enabled and done with probes,
10015 the registers need to be saved before allocating the frame. */
10016 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10017
10018 /* Skip return address. */
10019 offset = UNITS_PER_WORD;
10020
10021 /* Skip pushed static chain. */
10022 if (ix86_static_chain_on_stack)
10023 offset += UNITS_PER_WORD;
10024
10025 /* Skip saved base pointer. */
10026 if (frame_pointer_needed)
10027 offset += UNITS_PER_WORD;
10028 frame->hfp_save_offset = offset;
10029
10030 /* The traditional frame pointer location is at the top of the frame. */
10031 frame->hard_frame_pointer_offset = offset;
10032
10033 /* Register save area */
10034 offset += frame->nregs * UNITS_PER_WORD;
10035 frame->reg_save_offset = offset;
10036
10037 /* On SEH target, registers are pushed just before the frame pointer
10038 location. */
10039 if (TARGET_SEH)
10040 frame->hard_frame_pointer_offset = offset;
10041
10042 /* Align and set SSE register save area. */
10043 if (frame->nsseregs)
10044 {
10045 /* The only ABI that has saved SSE registers (Win64) also has a
10046 16-byte aligned default stack, and thus we don't need to be
10047 within the re-aligned local stack frame to save them. */
10048 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10049 offset = (offset + 16 - 1) & -16;
10050 offset += frame->nsseregs * 16;
10051 }
10052 frame->sse_reg_save_offset = offset;
10053
10054 /* The re-aligned stack starts here. Values before this point are not
10055 directly comparable with values below this point. In order to make
10056 sure that no value happens to be the same before and after, force
10057 the alignment computation below to add a non-zero value. */
10058 if (stack_realign_fp)
10059 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10060
10061 /* Va-arg area */
10062 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10063 offset += frame->va_arg_size;
10064
10065 /* Align start of frame for local function. */
10066 if (stack_realign_fp
10067 || offset != frame->sse_reg_save_offset
10068 || size != 0
10069 || !crtl->is_leaf
10070 || cfun->calls_alloca
10071 || ix86_current_function_calls_tls_descriptor)
10072 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10073
10074 /* Frame pointer points here. */
10075 frame->frame_pointer_offset = offset;
10076
10077 offset += size;
10078
10079 /* Add outgoing arguments area. Can be skipped if we eliminated
10080 all the function calls as dead code.
10081 Skipping is however impossible when function calls alloca. Alloca
10082 expander assumes that last crtl->outgoing_args_size
10083 of stack frame are unused. */
10084 if (ACCUMULATE_OUTGOING_ARGS
10085 && (!crtl->is_leaf || cfun->calls_alloca
10086 || ix86_current_function_calls_tls_descriptor))
10087 {
10088 offset += crtl->outgoing_args_size;
10089 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10090 }
10091 else
10092 frame->outgoing_arguments_size = 0;
10093
10094 /* Align stack boundary. Only needed if we're calling another function
10095 or using alloca. */
10096 if (!crtl->is_leaf || cfun->calls_alloca
10097 || ix86_current_function_calls_tls_descriptor)
10098 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10099
10100 /* We've reached end of stack frame. */
10101 frame->stack_pointer_offset = offset;
10102
10103 /* Size prologue needs to allocate. */
10104 to_allocate = offset - frame->sse_reg_save_offset;
10105
10106 if ((!to_allocate && frame->nregs <= 1)
10107 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10108 frame->save_regs_using_mov = false;
10109
10110 if (ix86_using_red_zone ()
10111 && crtl->sp_is_unchanging
10112 && crtl->is_leaf
10113 && !ix86_current_function_calls_tls_descriptor)
10114 {
10115 frame->red_zone_size = to_allocate;
10116 if (frame->save_regs_using_mov)
10117 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10118 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10119 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10120 }
10121 else
10122 frame->red_zone_size = 0;
10123 frame->stack_pointer_offset -= frame->red_zone_size;
10124
10125 /* The SEH frame pointer location is near the bottom of the frame.
10126 This is enforced by the fact that the difference between the
10127 stack pointer and the frame pointer is limited to 240 bytes in
10128 the unwind data structure. */
10129 if (TARGET_SEH)
10130 {
10131 HOST_WIDE_INT diff;
10132
10133 /* If we can leave the frame pointer where it is, do so. Also, returns
10134 the establisher frame for __builtin_frame_address (0). */
10135 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10136 if (diff <= SEH_MAX_FRAME_SIZE
10137 && (diff > 240 || (diff & 15) != 0)
10138 && !crtl->accesses_prior_frames)
10139 {
10140 /* Ideally we'd determine what portion of the local stack frame
10141 (within the constraint of the lowest 240) is most heavily used.
10142 But without that complication, simply bias the frame pointer
10143 by 128 bytes so as to maximize the amount of the local stack
10144 frame that is addressable with 8-bit offsets. */
10145 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10146 }
10147 }
10148 }
10149
10150 /* This is semi-inlined memory_address_length, but simplified
10151 since we know that we're always dealing with reg+offset, and
10152 to avoid having to create and discard all that rtl. */
10153
10154 static inline int
10155 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10156 {
10157 int len = 4;
10158
10159 if (offset == 0)
10160 {
10161 /* EBP and R13 cannot be encoded without an offset. */
10162 len = (regno == BP_REG || regno == R13_REG);
10163 }
10164 else if (IN_RANGE (offset, -128, 127))
10165 len = 1;
10166
10167 /* ESP and R12 must be encoded with a SIB byte. */
10168 if (regno == SP_REG || regno == R12_REG)
10169 len++;
10170
10171 return len;
10172 }
10173
10174 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10175 The valid base registers are taken from CFUN->MACHINE->FS. */
10176
10177 static rtx
10178 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10179 {
10180 const struct machine_function *m = cfun->machine;
10181 rtx base_reg = NULL;
10182 HOST_WIDE_INT base_offset = 0;
10183
10184 if (m->use_fast_prologue_epilogue)
10185 {
10186 /* Choose the base register most likely to allow the most scheduling
10187 opportunities. Generally FP is valid throughout the function,
10188 while DRAP must be reloaded within the epilogue. But choose either
10189 over the SP due to increased encoding size. */
10190
10191 if (m->fs.fp_valid)
10192 {
10193 base_reg = hard_frame_pointer_rtx;
10194 base_offset = m->fs.fp_offset - cfa_offset;
10195 }
10196 else if (m->fs.drap_valid)
10197 {
10198 base_reg = crtl->drap_reg;
10199 base_offset = 0 - cfa_offset;
10200 }
10201 else if (m->fs.sp_valid)
10202 {
10203 base_reg = stack_pointer_rtx;
10204 base_offset = m->fs.sp_offset - cfa_offset;
10205 }
10206 }
10207 else
10208 {
10209 HOST_WIDE_INT toffset;
10210 int len = 16, tlen;
10211
10212 /* Choose the base register with the smallest address encoding.
10213 With a tie, choose FP > DRAP > SP. */
10214 if (m->fs.sp_valid)
10215 {
10216 base_reg = stack_pointer_rtx;
10217 base_offset = m->fs.sp_offset - cfa_offset;
10218 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10219 }
10220 if (m->fs.drap_valid)
10221 {
10222 toffset = 0 - cfa_offset;
10223 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10224 if (tlen <= len)
10225 {
10226 base_reg = crtl->drap_reg;
10227 base_offset = toffset;
10228 len = tlen;
10229 }
10230 }
10231 if (m->fs.fp_valid)
10232 {
10233 toffset = m->fs.fp_offset - cfa_offset;
10234 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10235 if (tlen <= len)
10236 {
10237 base_reg = hard_frame_pointer_rtx;
10238 base_offset = toffset;
10239 len = tlen;
10240 }
10241 }
10242 }
10243 gcc_assert (base_reg != NULL);
10244
10245 return plus_constant (Pmode, base_reg, base_offset);
10246 }
10247
10248 /* Emit code to save registers in the prologue. */
10249
10250 static void
10251 ix86_emit_save_regs (void)
10252 {
10253 unsigned int regno;
10254 rtx insn;
10255
10256 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10257 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10258 {
10259 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10260 RTX_FRAME_RELATED_P (insn) = 1;
10261 }
10262 }
10263
10264 /* Emit a single register save at CFA - CFA_OFFSET. */
10265
10266 static void
10267 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10268 HOST_WIDE_INT cfa_offset)
10269 {
10270 struct machine_function *m = cfun->machine;
10271 rtx reg = gen_rtx_REG (mode, regno);
10272 rtx mem, addr, base, insn;
10273
10274 addr = choose_baseaddr (cfa_offset);
10275 mem = gen_frame_mem (mode, addr);
10276
10277 /* For SSE saves, we need to indicate the 128-bit alignment. */
10278 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10279
10280 insn = emit_move_insn (mem, reg);
10281 RTX_FRAME_RELATED_P (insn) = 1;
10282
10283 base = addr;
10284 if (GET_CODE (base) == PLUS)
10285 base = XEXP (base, 0);
10286 gcc_checking_assert (REG_P (base));
10287
10288 /* When saving registers into a re-aligned local stack frame, avoid
10289 any tricky guessing by dwarf2out. */
10290 if (m->fs.realigned)
10291 {
10292 gcc_checking_assert (stack_realign_drap);
10293
10294 if (regno == REGNO (crtl->drap_reg))
10295 {
10296 /* A bit of a hack. We force the DRAP register to be saved in
10297 the re-aligned stack frame, which provides us with a copy
10298 of the CFA that will last past the prologue. Install it. */
10299 gcc_checking_assert (cfun->machine->fs.fp_valid);
10300 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10301 cfun->machine->fs.fp_offset - cfa_offset);
10302 mem = gen_rtx_MEM (mode, addr);
10303 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10304 }
10305 else
10306 {
10307 /* The frame pointer is a stable reference within the
10308 aligned frame. Use it. */
10309 gcc_checking_assert (cfun->machine->fs.fp_valid);
10310 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10311 cfun->machine->fs.fp_offset - cfa_offset);
10312 mem = gen_rtx_MEM (mode, addr);
10313 add_reg_note (insn, REG_CFA_EXPRESSION,
10314 gen_rtx_SET (VOIDmode, mem, reg));
10315 }
10316 }
10317
10318 /* The memory may not be relative to the current CFA register,
10319 which means that we may need to generate a new pattern for
10320 use by the unwind info. */
10321 else if (base != m->fs.cfa_reg)
10322 {
10323 addr = plus_constant (Pmode, m->fs.cfa_reg,
10324 m->fs.cfa_offset - cfa_offset);
10325 mem = gen_rtx_MEM (mode, addr);
10326 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10327 }
10328 }
10329
10330 /* Emit code to save registers using MOV insns.
10331 First register is stored at CFA - CFA_OFFSET. */
10332 static void
10333 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10334 {
10335 unsigned int regno;
10336
10337 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10338 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10339 {
10340 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10341 cfa_offset -= UNITS_PER_WORD;
10342 }
10343 }
10344
10345 /* Emit code to save SSE registers using MOV insns.
10346 First register is stored at CFA - CFA_OFFSET. */
10347 static void
10348 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10349 {
10350 unsigned int regno;
10351
10352 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10353 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10354 {
10355 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10356 cfa_offset -= 16;
10357 }
10358 }
10359
10360 static GTY(()) rtx queued_cfa_restores;
10361
10362 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10363 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10364 Don't add the note if the previously saved value will be left untouched
10365 within stack red-zone till return, as unwinders can find the same value
10366 in the register and on the stack. */
10367
10368 static void
10369 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10370 {
10371 if (!crtl->shrink_wrapped
10372 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10373 return;
10374
10375 if (insn)
10376 {
10377 add_reg_note (insn, REG_CFA_RESTORE, reg);
10378 RTX_FRAME_RELATED_P (insn) = 1;
10379 }
10380 else
10381 queued_cfa_restores
10382 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10383 }
10384
10385 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10386
10387 static void
10388 ix86_add_queued_cfa_restore_notes (rtx insn)
10389 {
10390 rtx last;
10391 if (!queued_cfa_restores)
10392 return;
10393 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10394 ;
10395 XEXP (last, 1) = REG_NOTES (insn);
10396 REG_NOTES (insn) = queued_cfa_restores;
10397 queued_cfa_restores = NULL_RTX;
10398 RTX_FRAME_RELATED_P (insn) = 1;
10399 }
10400
10401 /* Expand prologue or epilogue stack adjustment.
10402 The pattern exist to put a dependency on all ebp-based memory accesses.
10403 STYLE should be negative if instructions should be marked as frame related,
10404 zero if %r11 register is live and cannot be freely used and positive
10405 otherwise. */
10406
10407 static void
10408 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10409 int style, bool set_cfa)
10410 {
10411 struct machine_function *m = cfun->machine;
10412 rtx insn;
10413 bool add_frame_related_expr = false;
10414
10415 if (Pmode == SImode)
10416 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10417 else if (x86_64_immediate_operand (offset, DImode))
10418 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10419 else
10420 {
10421 rtx tmp;
10422 /* r11 is used by indirect sibcall return as well, set before the
10423 epilogue and used after the epilogue. */
10424 if (style)
10425 tmp = gen_rtx_REG (DImode, R11_REG);
10426 else
10427 {
10428 gcc_assert (src != hard_frame_pointer_rtx
10429 && dest != hard_frame_pointer_rtx);
10430 tmp = hard_frame_pointer_rtx;
10431 }
10432 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10433 if (style < 0)
10434 add_frame_related_expr = true;
10435
10436 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10437 }
10438
10439 insn = emit_insn (insn);
10440 if (style >= 0)
10441 ix86_add_queued_cfa_restore_notes (insn);
10442
10443 if (set_cfa)
10444 {
10445 rtx r;
10446
10447 gcc_assert (m->fs.cfa_reg == src);
10448 m->fs.cfa_offset += INTVAL (offset);
10449 m->fs.cfa_reg = dest;
10450
10451 r = gen_rtx_PLUS (Pmode, src, offset);
10452 r = gen_rtx_SET (VOIDmode, dest, r);
10453 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10454 RTX_FRAME_RELATED_P (insn) = 1;
10455 }
10456 else if (style < 0)
10457 {
10458 RTX_FRAME_RELATED_P (insn) = 1;
10459 if (add_frame_related_expr)
10460 {
10461 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10462 r = gen_rtx_SET (VOIDmode, dest, r);
10463 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10464 }
10465 }
10466
10467 if (dest == stack_pointer_rtx)
10468 {
10469 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10470 bool valid = m->fs.sp_valid;
10471
10472 if (src == hard_frame_pointer_rtx)
10473 {
10474 valid = m->fs.fp_valid;
10475 ooffset = m->fs.fp_offset;
10476 }
10477 else if (src == crtl->drap_reg)
10478 {
10479 valid = m->fs.drap_valid;
10480 ooffset = 0;
10481 }
10482 else
10483 {
10484 /* Else there are two possibilities: SP itself, which we set
10485 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10486 taken care of this by hand along the eh_return path. */
10487 gcc_checking_assert (src == stack_pointer_rtx
10488 || offset == const0_rtx);
10489 }
10490
10491 m->fs.sp_offset = ooffset - INTVAL (offset);
10492 m->fs.sp_valid = valid;
10493 }
10494 }
10495
10496 /* Find an available register to be used as dynamic realign argument
10497 pointer regsiter. Such a register will be written in prologue and
10498 used in begin of body, so it must not be
10499 1. parameter passing register.
10500 2. GOT pointer.
10501 We reuse static-chain register if it is available. Otherwise, we
10502 use DI for i386 and R13 for x86-64. We chose R13 since it has
10503 shorter encoding.
10504
10505 Return: the regno of chosen register. */
10506
10507 static unsigned int
10508 find_drap_reg (void)
10509 {
10510 tree decl = cfun->decl;
10511
10512 if (TARGET_64BIT)
10513 {
10514 /* Use R13 for nested function or function need static chain.
10515 Since function with tail call may use any caller-saved
10516 registers in epilogue, DRAP must not use caller-saved
10517 register in such case. */
10518 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10519 return R13_REG;
10520
10521 return R10_REG;
10522 }
10523 else
10524 {
10525 /* Use DI for nested function or function need static chain.
10526 Since function with tail call may use any caller-saved
10527 registers in epilogue, DRAP must not use caller-saved
10528 register in such case. */
10529 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10530 return DI_REG;
10531
10532 /* Reuse static chain register if it isn't used for parameter
10533 passing. */
10534 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10535 {
10536 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10537 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10538 return CX_REG;
10539 }
10540 return DI_REG;
10541 }
10542 }
10543
10544 /* Return minimum incoming stack alignment. */
10545
10546 static unsigned int
10547 ix86_minimum_incoming_stack_boundary (bool sibcall)
10548 {
10549 unsigned int incoming_stack_boundary;
10550
10551 /* Prefer the one specified at command line. */
10552 if (ix86_user_incoming_stack_boundary)
10553 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10554 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10555 if -mstackrealign is used, it isn't used for sibcall check and
10556 estimated stack alignment is 128bit. */
10557 else if (!sibcall
10558 && !TARGET_64BIT
10559 && ix86_force_align_arg_pointer
10560 && crtl->stack_alignment_estimated == 128)
10561 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10562 else
10563 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10564
10565 /* Incoming stack alignment can be changed on individual functions
10566 via force_align_arg_pointer attribute. We use the smallest
10567 incoming stack boundary. */
10568 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10569 && lookup_attribute (ix86_force_align_arg_pointer_string,
10570 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10571 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10572
10573 /* The incoming stack frame has to be aligned at least at
10574 parm_stack_boundary. */
10575 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10576 incoming_stack_boundary = crtl->parm_stack_boundary;
10577
10578 /* Stack at entrance of main is aligned by runtime. We use the
10579 smallest incoming stack boundary. */
10580 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10581 && DECL_NAME (current_function_decl)
10582 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10583 && DECL_FILE_SCOPE_P (current_function_decl))
10584 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10585
10586 return incoming_stack_boundary;
10587 }
10588
10589 /* Update incoming stack boundary and estimated stack alignment. */
10590
10591 static void
10592 ix86_update_stack_boundary (void)
10593 {
10594 ix86_incoming_stack_boundary
10595 = ix86_minimum_incoming_stack_boundary (false);
10596
10597 /* x86_64 vararg needs 16byte stack alignment for register save
10598 area. */
10599 if (TARGET_64BIT
10600 && cfun->stdarg
10601 && crtl->stack_alignment_estimated < 128)
10602 crtl->stack_alignment_estimated = 128;
10603 }
10604
10605 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10606 needed or an rtx for DRAP otherwise. */
10607
10608 static rtx
10609 ix86_get_drap_rtx (void)
10610 {
10611 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10612 crtl->need_drap = true;
10613
10614 if (stack_realign_drap)
10615 {
10616 /* Assign DRAP to vDRAP and returns vDRAP */
10617 unsigned int regno = find_drap_reg ();
10618 rtx drap_vreg;
10619 rtx arg_ptr;
10620 rtx_insn *seq, *insn;
10621
10622 arg_ptr = gen_rtx_REG (Pmode, regno);
10623 crtl->drap_reg = arg_ptr;
10624
10625 start_sequence ();
10626 drap_vreg = copy_to_reg (arg_ptr);
10627 seq = get_insns ();
10628 end_sequence ();
10629
10630 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10631 if (!optimize)
10632 {
10633 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10634 RTX_FRAME_RELATED_P (insn) = 1;
10635 }
10636 return drap_vreg;
10637 }
10638 else
10639 return NULL;
10640 }
10641
10642 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10643
10644 static rtx
10645 ix86_internal_arg_pointer (void)
10646 {
10647 return virtual_incoming_args_rtx;
10648 }
10649
10650 struct scratch_reg {
10651 rtx reg;
10652 bool saved;
10653 };
10654
10655 /* Return a short-lived scratch register for use on function entry.
10656 In 32-bit mode, it is valid only after the registers are saved
10657 in the prologue. This register must be released by means of
10658 release_scratch_register_on_entry once it is dead. */
10659
10660 static void
10661 get_scratch_register_on_entry (struct scratch_reg *sr)
10662 {
10663 int regno;
10664
10665 sr->saved = false;
10666
10667 if (TARGET_64BIT)
10668 {
10669 /* We always use R11 in 64-bit mode. */
10670 regno = R11_REG;
10671 }
10672 else
10673 {
10674 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10675 bool fastcall_p
10676 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10677 bool thiscall_p
10678 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10679 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10680 int regparm = ix86_function_regparm (fntype, decl);
10681 int drap_regno
10682 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10683
10684 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10685 for the static chain register. */
10686 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10687 && drap_regno != AX_REG)
10688 regno = AX_REG;
10689 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10690 for the static chain register. */
10691 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10692 regno = AX_REG;
10693 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10694 regno = DX_REG;
10695 /* ecx is the static chain register. */
10696 else if (regparm < 3 && !fastcall_p && !thiscall_p
10697 && !static_chain_p
10698 && drap_regno != CX_REG)
10699 regno = CX_REG;
10700 else if (ix86_save_reg (BX_REG, true))
10701 regno = BX_REG;
10702 /* esi is the static chain register. */
10703 else if (!(regparm == 3 && static_chain_p)
10704 && ix86_save_reg (SI_REG, true))
10705 regno = SI_REG;
10706 else if (ix86_save_reg (DI_REG, true))
10707 regno = DI_REG;
10708 else
10709 {
10710 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10711 sr->saved = true;
10712 }
10713 }
10714
10715 sr->reg = gen_rtx_REG (Pmode, regno);
10716 if (sr->saved)
10717 {
10718 rtx insn = emit_insn (gen_push (sr->reg));
10719 RTX_FRAME_RELATED_P (insn) = 1;
10720 }
10721 }
10722
10723 /* Release a scratch register obtained from the preceding function. */
10724
10725 static void
10726 release_scratch_register_on_entry (struct scratch_reg *sr)
10727 {
10728 if (sr->saved)
10729 {
10730 struct machine_function *m = cfun->machine;
10731 rtx x, insn = emit_insn (gen_pop (sr->reg));
10732
10733 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10734 RTX_FRAME_RELATED_P (insn) = 1;
10735 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10736 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10737 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10738 m->fs.sp_offset -= UNITS_PER_WORD;
10739 }
10740 }
10741
10742 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10743
10744 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10745
10746 static void
10747 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10748 {
10749 /* We skip the probe for the first interval + a small dope of 4 words and
10750 probe that many bytes past the specified size to maintain a protection
10751 area at the botton of the stack. */
10752 const int dope = 4 * UNITS_PER_WORD;
10753 rtx size_rtx = GEN_INT (size), last;
10754
10755 /* See if we have a constant small number of probes to generate. If so,
10756 that's the easy case. The run-time loop is made up of 11 insns in the
10757 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10758 for n # of intervals. */
10759 if (size <= 5 * PROBE_INTERVAL)
10760 {
10761 HOST_WIDE_INT i, adjust;
10762 bool first_probe = true;
10763
10764 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10765 values of N from 1 until it exceeds SIZE. If only one probe is
10766 needed, this will not generate any code. Then adjust and probe
10767 to PROBE_INTERVAL + SIZE. */
10768 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10769 {
10770 if (first_probe)
10771 {
10772 adjust = 2 * PROBE_INTERVAL + dope;
10773 first_probe = false;
10774 }
10775 else
10776 adjust = PROBE_INTERVAL;
10777
10778 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10779 plus_constant (Pmode, stack_pointer_rtx,
10780 -adjust)));
10781 emit_stack_probe (stack_pointer_rtx);
10782 }
10783
10784 if (first_probe)
10785 adjust = size + PROBE_INTERVAL + dope;
10786 else
10787 adjust = size + PROBE_INTERVAL - i;
10788
10789 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10790 plus_constant (Pmode, stack_pointer_rtx,
10791 -adjust)));
10792 emit_stack_probe (stack_pointer_rtx);
10793
10794 /* Adjust back to account for the additional first interval. */
10795 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10796 plus_constant (Pmode, stack_pointer_rtx,
10797 PROBE_INTERVAL + dope)));
10798 }
10799
10800 /* Otherwise, do the same as above, but in a loop. Note that we must be
10801 extra careful with variables wrapping around because we might be at
10802 the very top (or the very bottom) of the address space and we have
10803 to be able to handle this case properly; in particular, we use an
10804 equality test for the loop condition. */
10805 else
10806 {
10807 HOST_WIDE_INT rounded_size;
10808 struct scratch_reg sr;
10809
10810 get_scratch_register_on_entry (&sr);
10811
10812
10813 /* Step 1: round SIZE to the previous multiple of the interval. */
10814
10815 rounded_size = size & -PROBE_INTERVAL;
10816
10817
10818 /* Step 2: compute initial and final value of the loop counter. */
10819
10820 /* SP = SP_0 + PROBE_INTERVAL. */
10821 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10822 plus_constant (Pmode, stack_pointer_rtx,
10823 - (PROBE_INTERVAL + dope))));
10824
10825 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10826 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10827 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10828 gen_rtx_PLUS (Pmode, sr.reg,
10829 stack_pointer_rtx)));
10830
10831
10832 /* Step 3: the loop
10833
10834 while (SP != LAST_ADDR)
10835 {
10836 SP = SP + PROBE_INTERVAL
10837 probe at SP
10838 }
10839
10840 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10841 values of N from 1 until it is equal to ROUNDED_SIZE. */
10842
10843 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10844
10845
10846 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10847 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10848
10849 if (size != rounded_size)
10850 {
10851 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10852 plus_constant (Pmode, stack_pointer_rtx,
10853 rounded_size - size)));
10854 emit_stack_probe (stack_pointer_rtx);
10855 }
10856
10857 /* Adjust back to account for the additional first interval. */
10858 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10859 plus_constant (Pmode, stack_pointer_rtx,
10860 PROBE_INTERVAL + dope)));
10861
10862 release_scratch_register_on_entry (&sr);
10863 }
10864
10865 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10866
10867 /* Even if the stack pointer isn't the CFA register, we need to correctly
10868 describe the adjustments made to it, in particular differentiate the
10869 frame-related ones from the frame-unrelated ones. */
10870 if (size > 0)
10871 {
10872 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10873 XVECEXP (expr, 0, 0)
10874 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10875 plus_constant (Pmode, stack_pointer_rtx, -size));
10876 XVECEXP (expr, 0, 1)
10877 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10878 plus_constant (Pmode, stack_pointer_rtx,
10879 PROBE_INTERVAL + dope + size));
10880 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10881 RTX_FRAME_RELATED_P (last) = 1;
10882
10883 cfun->machine->fs.sp_offset += size;
10884 }
10885
10886 /* Make sure nothing is scheduled before we are done. */
10887 emit_insn (gen_blockage ());
10888 }
10889
10890 /* Adjust the stack pointer up to REG while probing it. */
10891
10892 const char *
10893 output_adjust_stack_and_probe (rtx reg)
10894 {
10895 static int labelno = 0;
10896 char loop_lab[32], end_lab[32];
10897 rtx xops[2];
10898
10899 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10900 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10901
10902 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10903
10904 /* Jump to END_LAB if SP == LAST_ADDR. */
10905 xops[0] = stack_pointer_rtx;
10906 xops[1] = reg;
10907 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10908 fputs ("\tje\t", asm_out_file);
10909 assemble_name_raw (asm_out_file, end_lab);
10910 fputc ('\n', asm_out_file);
10911
10912 /* SP = SP + PROBE_INTERVAL. */
10913 xops[1] = GEN_INT (PROBE_INTERVAL);
10914 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10915
10916 /* Probe at SP. */
10917 xops[1] = const0_rtx;
10918 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10919
10920 fprintf (asm_out_file, "\tjmp\t");
10921 assemble_name_raw (asm_out_file, loop_lab);
10922 fputc ('\n', asm_out_file);
10923
10924 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10925
10926 return "";
10927 }
10928
10929 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10930 inclusive. These are offsets from the current stack pointer. */
10931
10932 static void
10933 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10934 {
10935 /* See if we have a constant small number of probes to generate. If so,
10936 that's the easy case. The run-time loop is made up of 7 insns in the
10937 generic case while the compile-time loop is made up of n insns for n #
10938 of intervals. */
10939 if (size <= 7 * PROBE_INTERVAL)
10940 {
10941 HOST_WIDE_INT i;
10942
10943 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10944 it exceeds SIZE. If only one probe is needed, this will not
10945 generate any code. Then probe at FIRST + SIZE. */
10946 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10947 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10948 -(first + i)));
10949
10950 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10951 -(first + size)));
10952 }
10953
10954 /* Otherwise, do the same as above, but in a loop. Note that we must be
10955 extra careful with variables wrapping around because we might be at
10956 the very top (or the very bottom) of the address space and we have
10957 to be able to handle this case properly; in particular, we use an
10958 equality test for the loop condition. */
10959 else
10960 {
10961 HOST_WIDE_INT rounded_size, last;
10962 struct scratch_reg sr;
10963
10964 get_scratch_register_on_entry (&sr);
10965
10966
10967 /* Step 1: round SIZE to the previous multiple of the interval. */
10968
10969 rounded_size = size & -PROBE_INTERVAL;
10970
10971
10972 /* Step 2: compute initial and final value of the loop counter. */
10973
10974 /* TEST_OFFSET = FIRST. */
10975 emit_move_insn (sr.reg, GEN_INT (-first));
10976
10977 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10978 last = first + rounded_size;
10979
10980
10981 /* Step 3: the loop
10982
10983 while (TEST_ADDR != LAST_ADDR)
10984 {
10985 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10986 probe at TEST_ADDR
10987 }
10988
10989 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10990 until it is equal to ROUNDED_SIZE. */
10991
10992 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10993
10994
10995 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10996 that SIZE is equal to ROUNDED_SIZE. */
10997
10998 if (size != rounded_size)
10999 emit_stack_probe (plus_constant (Pmode,
11000 gen_rtx_PLUS (Pmode,
11001 stack_pointer_rtx,
11002 sr.reg),
11003 rounded_size - size));
11004
11005 release_scratch_register_on_entry (&sr);
11006 }
11007
11008 /* Make sure nothing is scheduled before we are done. */
11009 emit_insn (gen_blockage ());
11010 }
11011
11012 /* Probe a range of stack addresses from REG to END, inclusive. These are
11013 offsets from the current stack pointer. */
11014
11015 const char *
11016 output_probe_stack_range (rtx reg, rtx end)
11017 {
11018 static int labelno = 0;
11019 char loop_lab[32], end_lab[32];
11020 rtx xops[3];
11021
11022 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11023 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11024
11025 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11026
11027 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11028 xops[0] = reg;
11029 xops[1] = end;
11030 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11031 fputs ("\tje\t", asm_out_file);
11032 assemble_name_raw (asm_out_file, end_lab);
11033 fputc ('\n', asm_out_file);
11034
11035 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11036 xops[1] = GEN_INT (PROBE_INTERVAL);
11037 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11038
11039 /* Probe at TEST_ADDR. */
11040 xops[0] = stack_pointer_rtx;
11041 xops[1] = reg;
11042 xops[2] = const0_rtx;
11043 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11044
11045 fprintf (asm_out_file, "\tjmp\t");
11046 assemble_name_raw (asm_out_file, loop_lab);
11047 fputc ('\n', asm_out_file);
11048
11049 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11050
11051 return "";
11052 }
11053
11054 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11055 to be generated in correct form. */
11056 static void
11057 ix86_finalize_stack_realign_flags (void)
11058 {
11059 /* Check if stack realign is really needed after reload, and
11060 stores result in cfun */
11061 unsigned int incoming_stack_boundary
11062 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11063 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11064 unsigned int stack_realign = (incoming_stack_boundary
11065 < (crtl->is_leaf
11066 ? crtl->max_used_stack_slot_alignment
11067 : crtl->stack_alignment_needed));
11068
11069 if (crtl->stack_realign_finalized)
11070 {
11071 /* After stack_realign_needed is finalized, we can't no longer
11072 change it. */
11073 gcc_assert (crtl->stack_realign_needed == stack_realign);
11074 return;
11075 }
11076
11077 /* If the only reason for frame_pointer_needed is that we conservatively
11078 assumed stack realignment might be needed, but in the end nothing that
11079 needed the stack alignment had been spilled, clear frame_pointer_needed
11080 and say we don't need stack realignment. */
11081 if (stack_realign
11082 && frame_pointer_needed
11083 && crtl->is_leaf
11084 && flag_omit_frame_pointer
11085 && crtl->sp_is_unchanging
11086 && !ix86_current_function_calls_tls_descriptor
11087 && !crtl->accesses_prior_frames
11088 && !cfun->calls_alloca
11089 && !crtl->calls_eh_return
11090 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11091 && !ix86_frame_pointer_required ()
11092 && get_frame_size () == 0
11093 && ix86_nsaved_sseregs () == 0
11094 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11095 {
11096 HARD_REG_SET set_up_by_prologue, prologue_used;
11097 basic_block bb;
11098
11099 CLEAR_HARD_REG_SET (prologue_used);
11100 CLEAR_HARD_REG_SET (set_up_by_prologue);
11101 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11102 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11103 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11104 HARD_FRAME_POINTER_REGNUM);
11105 FOR_EACH_BB_FN (bb, cfun)
11106 {
11107 rtx_insn *insn;
11108 FOR_BB_INSNS (bb, insn)
11109 if (NONDEBUG_INSN_P (insn)
11110 && requires_stack_frame_p (insn, prologue_used,
11111 set_up_by_prologue))
11112 {
11113 crtl->stack_realign_needed = stack_realign;
11114 crtl->stack_realign_finalized = true;
11115 return;
11116 }
11117 }
11118
11119 /* If drap has been set, but it actually isn't live at the start
11120 of the function, there is no reason to set it up. */
11121 if (crtl->drap_reg)
11122 {
11123 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11124 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11125 {
11126 crtl->drap_reg = NULL_RTX;
11127 crtl->need_drap = false;
11128 }
11129 }
11130 else
11131 cfun->machine->no_drap_save_restore = true;
11132
11133 frame_pointer_needed = false;
11134 stack_realign = false;
11135 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11136 crtl->stack_alignment_needed = incoming_stack_boundary;
11137 crtl->stack_alignment_estimated = incoming_stack_boundary;
11138 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11139 crtl->preferred_stack_boundary = incoming_stack_boundary;
11140 df_finish_pass (true);
11141 df_scan_alloc (NULL);
11142 df_scan_blocks ();
11143 df_compute_regs_ever_live (true);
11144 df_analyze ();
11145 }
11146
11147 crtl->stack_realign_needed = stack_realign;
11148 crtl->stack_realign_finalized = true;
11149 }
11150
11151 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11152
11153 static void
11154 ix86_elim_entry_set_got (rtx reg)
11155 {
11156 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11157 rtx_insn *c_insn = BB_HEAD (bb);
11158 if (!NONDEBUG_INSN_P (c_insn))
11159 c_insn = next_nonnote_nondebug_insn (c_insn);
11160 if (c_insn && NONJUMP_INSN_P (c_insn))
11161 {
11162 rtx pat = PATTERN (c_insn);
11163 if (GET_CODE (pat) == PARALLEL)
11164 {
11165 rtx vec = XVECEXP (pat, 0, 0);
11166 if (GET_CODE (vec) == SET
11167 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11168 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11169 delete_insn (c_insn);
11170 }
11171 }
11172 }
11173
11174 /* Expand the prologue into a bunch of separate insns. */
11175
11176 void
11177 ix86_expand_prologue (void)
11178 {
11179 struct machine_function *m = cfun->machine;
11180 rtx insn, t;
11181 struct ix86_frame frame;
11182 HOST_WIDE_INT allocate;
11183 bool int_registers_saved;
11184 bool sse_registers_saved;
11185
11186 ix86_finalize_stack_realign_flags ();
11187
11188 /* DRAP should not coexist with stack_realign_fp */
11189 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11190
11191 memset (&m->fs, 0, sizeof (m->fs));
11192
11193 /* Initialize CFA state for before the prologue. */
11194 m->fs.cfa_reg = stack_pointer_rtx;
11195 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11196
11197 /* Track SP offset to the CFA. We continue tracking this after we've
11198 swapped the CFA register away from SP. In the case of re-alignment
11199 this is fudged; we're interested to offsets within the local frame. */
11200 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11201 m->fs.sp_valid = true;
11202
11203 ix86_compute_frame_layout (&frame);
11204
11205 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11206 {
11207 /* We should have already generated an error for any use of
11208 ms_hook on a nested function. */
11209 gcc_checking_assert (!ix86_static_chain_on_stack);
11210
11211 /* Check if profiling is active and we shall use profiling before
11212 prologue variant. If so sorry. */
11213 if (crtl->profile && flag_fentry != 0)
11214 sorry ("ms_hook_prologue attribute isn%'t compatible "
11215 "with -mfentry for 32-bit");
11216
11217 /* In ix86_asm_output_function_label we emitted:
11218 8b ff movl.s %edi,%edi
11219 55 push %ebp
11220 8b ec movl.s %esp,%ebp
11221
11222 This matches the hookable function prologue in Win32 API
11223 functions in Microsoft Windows XP Service Pack 2 and newer.
11224 Wine uses this to enable Windows apps to hook the Win32 API
11225 functions provided by Wine.
11226
11227 What that means is that we've already set up the frame pointer. */
11228
11229 if (frame_pointer_needed
11230 && !(crtl->drap_reg && crtl->stack_realign_needed))
11231 {
11232 rtx push, mov;
11233
11234 /* We've decided to use the frame pointer already set up.
11235 Describe this to the unwinder by pretending that both
11236 push and mov insns happen right here.
11237
11238 Putting the unwind info here at the end of the ms_hook
11239 is done so that we can make absolutely certain we get
11240 the required byte sequence at the start of the function,
11241 rather than relying on an assembler that can produce
11242 the exact encoding required.
11243
11244 However it does mean (in the unpatched case) that we have
11245 a 1 insn window where the asynchronous unwind info is
11246 incorrect. However, if we placed the unwind info at
11247 its correct location we would have incorrect unwind info
11248 in the patched case. Which is probably all moot since
11249 I don't expect Wine generates dwarf2 unwind info for the
11250 system libraries that use this feature. */
11251
11252 insn = emit_insn (gen_blockage ());
11253
11254 push = gen_push (hard_frame_pointer_rtx);
11255 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11256 stack_pointer_rtx);
11257 RTX_FRAME_RELATED_P (push) = 1;
11258 RTX_FRAME_RELATED_P (mov) = 1;
11259
11260 RTX_FRAME_RELATED_P (insn) = 1;
11261 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11262 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11263
11264 /* Note that gen_push incremented m->fs.cfa_offset, even
11265 though we didn't emit the push insn here. */
11266 m->fs.cfa_reg = hard_frame_pointer_rtx;
11267 m->fs.fp_offset = m->fs.cfa_offset;
11268 m->fs.fp_valid = true;
11269 }
11270 else
11271 {
11272 /* The frame pointer is not needed so pop %ebp again.
11273 This leaves us with a pristine state. */
11274 emit_insn (gen_pop (hard_frame_pointer_rtx));
11275 }
11276 }
11277
11278 /* The first insn of a function that accepts its static chain on the
11279 stack is to push the register that would be filled in by a direct
11280 call. This insn will be skipped by the trampoline. */
11281 else if (ix86_static_chain_on_stack)
11282 {
11283 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11284 emit_insn (gen_blockage ());
11285
11286 /* We don't want to interpret this push insn as a register save,
11287 only as a stack adjustment. The real copy of the register as
11288 a save will be done later, if needed. */
11289 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11290 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11291 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11292 RTX_FRAME_RELATED_P (insn) = 1;
11293 }
11294
11295 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11296 of DRAP is needed and stack realignment is really needed after reload */
11297 if (stack_realign_drap)
11298 {
11299 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11300
11301 /* Only need to push parameter pointer reg if it is caller saved. */
11302 if (!call_used_regs[REGNO (crtl->drap_reg)])
11303 {
11304 /* Push arg pointer reg */
11305 insn = emit_insn (gen_push (crtl->drap_reg));
11306 RTX_FRAME_RELATED_P (insn) = 1;
11307 }
11308
11309 /* Grab the argument pointer. */
11310 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11311 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11312 RTX_FRAME_RELATED_P (insn) = 1;
11313 m->fs.cfa_reg = crtl->drap_reg;
11314 m->fs.cfa_offset = 0;
11315
11316 /* Align the stack. */
11317 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11318 stack_pointer_rtx,
11319 GEN_INT (-align_bytes)));
11320 RTX_FRAME_RELATED_P (insn) = 1;
11321
11322 /* Replicate the return address on the stack so that return
11323 address can be reached via (argp - 1) slot. This is needed
11324 to implement macro RETURN_ADDR_RTX and intrinsic function
11325 expand_builtin_return_addr etc. */
11326 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11327 t = gen_frame_mem (word_mode, t);
11328 insn = emit_insn (gen_push (t));
11329 RTX_FRAME_RELATED_P (insn) = 1;
11330
11331 /* For the purposes of frame and register save area addressing,
11332 we've started over with a new frame. */
11333 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11334 m->fs.realigned = true;
11335 }
11336
11337 int_registers_saved = (frame.nregs == 0);
11338 sse_registers_saved = (frame.nsseregs == 0);
11339
11340 if (frame_pointer_needed && !m->fs.fp_valid)
11341 {
11342 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11343 slower on all targets. Also sdb doesn't like it. */
11344 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11345 RTX_FRAME_RELATED_P (insn) = 1;
11346
11347 /* Push registers now, before setting the frame pointer
11348 on SEH target. */
11349 if (!int_registers_saved
11350 && TARGET_SEH
11351 && !frame.save_regs_using_mov)
11352 {
11353 ix86_emit_save_regs ();
11354 int_registers_saved = true;
11355 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11356 }
11357
11358 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11359 {
11360 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11361 RTX_FRAME_RELATED_P (insn) = 1;
11362
11363 if (m->fs.cfa_reg == stack_pointer_rtx)
11364 m->fs.cfa_reg = hard_frame_pointer_rtx;
11365 m->fs.fp_offset = m->fs.sp_offset;
11366 m->fs.fp_valid = true;
11367 }
11368 }
11369
11370 if (!int_registers_saved)
11371 {
11372 /* If saving registers via PUSH, do so now. */
11373 if (!frame.save_regs_using_mov)
11374 {
11375 ix86_emit_save_regs ();
11376 int_registers_saved = true;
11377 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11378 }
11379
11380 /* When using red zone we may start register saving before allocating
11381 the stack frame saving one cycle of the prologue. However, avoid
11382 doing this if we have to probe the stack; at least on x86_64 the
11383 stack probe can turn into a call that clobbers a red zone location. */
11384 else if (ix86_using_red_zone ()
11385 && (! TARGET_STACK_PROBE
11386 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11387 {
11388 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11389 int_registers_saved = true;
11390 }
11391 }
11392
11393 if (stack_realign_fp)
11394 {
11395 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11396 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11397
11398 /* The computation of the size of the re-aligned stack frame means
11399 that we must allocate the size of the register save area before
11400 performing the actual alignment. Otherwise we cannot guarantee
11401 that there's enough storage above the realignment point. */
11402 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11403 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11404 GEN_INT (m->fs.sp_offset
11405 - frame.sse_reg_save_offset),
11406 -1, false);
11407
11408 /* Align the stack. */
11409 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11410 stack_pointer_rtx,
11411 GEN_INT (-align_bytes)));
11412
11413 /* For the purposes of register save area addressing, the stack
11414 pointer is no longer valid. As for the value of sp_offset,
11415 see ix86_compute_frame_layout, which we need to match in order
11416 to pass verification of stack_pointer_offset at the end. */
11417 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11418 m->fs.sp_valid = false;
11419 }
11420
11421 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11422
11423 if (flag_stack_usage_info)
11424 {
11425 /* We start to count from ARG_POINTER. */
11426 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11427
11428 /* If it was realigned, take into account the fake frame. */
11429 if (stack_realign_drap)
11430 {
11431 if (ix86_static_chain_on_stack)
11432 stack_size += UNITS_PER_WORD;
11433
11434 if (!call_used_regs[REGNO (crtl->drap_reg)])
11435 stack_size += UNITS_PER_WORD;
11436
11437 /* This over-estimates by 1 minimal-stack-alignment-unit but
11438 mitigates that by counting in the new return address slot. */
11439 current_function_dynamic_stack_size
11440 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11441 }
11442
11443 current_function_static_stack_size = stack_size;
11444 }
11445
11446 /* On SEH target with very large frame size, allocate an area to save
11447 SSE registers (as the very large allocation won't be described). */
11448 if (TARGET_SEH
11449 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11450 && !sse_registers_saved)
11451 {
11452 HOST_WIDE_INT sse_size =
11453 frame.sse_reg_save_offset - frame.reg_save_offset;
11454
11455 gcc_assert (int_registers_saved);
11456
11457 /* No need to do stack checking as the area will be immediately
11458 written. */
11459 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11460 GEN_INT (-sse_size), -1,
11461 m->fs.cfa_reg == stack_pointer_rtx);
11462 allocate -= sse_size;
11463 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11464 sse_registers_saved = true;
11465 }
11466
11467 /* The stack has already been decremented by the instruction calling us
11468 so probe if the size is non-negative to preserve the protection area. */
11469 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11470 {
11471 /* We expect the registers to be saved when probes are used. */
11472 gcc_assert (int_registers_saved);
11473
11474 if (STACK_CHECK_MOVING_SP)
11475 {
11476 if (!(crtl->is_leaf && !cfun->calls_alloca
11477 && allocate <= PROBE_INTERVAL))
11478 {
11479 ix86_adjust_stack_and_probe (allocate);
11480 allocate = 0;
11481 }
11482 }
11483 else
11484 {
11485 HOST_WIDE_INT size = allocate;
11486
11487 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11488 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11489
11490 if (TARGET_STACK_PROBE)
11491 {
11492 if (crtl->is_leaf && !cfun->calls_alloca)
11493 {
11494 if (size > PROBE_INTERVAL)
11495 ix86_emit_probe_stack_range (0, size);
11496 }
11497 else
11498 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11499 }
11500 else
11501 {
11502 if (crtl->is_leaf && !cfun->calls_alloca)
11503 {
11504 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11505 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11506 size - STACK_CHECK_PROTECT);
11507 }
11508 else
11509 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11510 }
11511 }
11512 }
11513
11514 if (allocate == 0)
11515 ;
11516 else if (!ix86_target_stack_probe ()
11517 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11518 {
11519 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11520 GEN_INT (-allocate), -1,
11521 m->fs.cfa_reg == stack_pointer_rtx);
11522 }
11523 else
11524 {
11525 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11526 rtx r10 = NULL;
11527 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11528 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11529 bool eax_live = ix86_eax_live_at_start_p ();
11530 bool r10_live = false;
11531
11532 if (TARGET_64BIT)
11533 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11534
11535 if (eax_live)
11536 {
11537 insn = emit_insn (gen_push (eax));
11538 allocate -= UNITS_PER_WORD;
11539 /* Note that SEH directives need to continue tracking the stack
11540 pointer even after the frame pointer has been set up. */
11541 if (sp_is_cfa_reg || TARGET_SEH)
11542 {
11543 if (sp_is_cfa_reg)
11544 m->fs.cfa_offset += UNITS_PER_WORD;
11545 RTX_FRAME_RELATED_P (insn) = 1;
11546 }
11547 }
11548
11549 if (r10_live)
11550 {
11551 r10 = gen_rtx_REG (Pmode, R10_REG);
11552 insn = emit_insn (gen_push (r10));
11553 allocate -= UNITS_PER_WORD;
11554 if (sp_is_cfa_reg || TARGET_SEH)
11555 {
11556 if (sp_is_cfa_reg)
11557 m->fs.cfa_offset += UNITS_PER_WORD;
11558 RTX_FRAME_RELATED_P (insn) = 1;
11559 }
11560 }
11561
11562 emit_move_insn (eax, GEN_INT (allocate));
11563 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11564
11565 /* Use the fact that AX still contains ALLOCATE. */
11566 adjust_stack_insn = (Pmode == DImode
11567 ? gen_pro_epilogue_adjust_stack_di_sub
11568 : gen_pro_epilogue_adjust_stack_si_sub);
11569
11570 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11571 stack_pointer_rtx, eax));
11572
11573 if (sp_is_cfa_reg || TARGET_SEH)
11574 {
11575 if (sp_is_cfa_reg)
11576 m->fs.cfa_offset += allocate;
11577 RTX_FRAME_RELATED_P (insn) = 1;
11578 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11579 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11580 plus_constant (Pmode, stack_pointer_rtx,
11581 -allocate)));
11582 }
11583 m->fs.sp_offset += allocate;
11584
11585 /* Use stack_pointer_rtx for relative addressing so that code
11586 works for realigned stack, too. */
11587 if (r10_live && eax_live)
11588 {
11589 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11590 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11591 gen_frame_mem (word_mode, t));
11592 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11593 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11594 gen_frame_mem (word_mode, t));
11595 }
11596 else if (eax_live || r10_live)
11597 {
11598 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11599 emit_move_insn (gen_rtx_REG (word_mode,
11600 (eax_live ? AX_REG : R10_REG)),
11601 gen_frame_mem (word_mode, t));
11602 }
11603 }
11604 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11605
11606 /* If we havn't already set up the frame pointer, do so now. */
11607 if (frame_pointer_needed && !m->fs.fp_valid)
11608 {
11609 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11610 GEN_INT (frame.stack_pointer_offset
11611 - frame.hard_frame_pointer_offset));
11612 insn = emit_insn (insn);
11613 RTX_FRAME_RELATED_P (insn) = 1;
11614 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11615
11616 if (m->fs.cfa_reg == stack_pointer_rtx)
11617 m->fs.cfa_reg = hard_frame_pointer_rtx;
11618 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11619 m->fs.fp_valid = true;
11620 }
11621
11622 if (!int_registers_saved)
11623 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11624 if (!sse_registers_saved)
11625 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11626
11627 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11628 in PROLOGUE. */
11629 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11630 {
11631 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11632 insn = emit_insn (gen_set_got (pic));
11633 RTX_FRAME_RELATED_P (insn) = 1;
11634 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11635 emit_insn (gen_prologue_use (pic));
11636 /* Deleting already emmitted SET_GOT if exist and allocated to
11637 REAL_PIC_OFFSET_TABLE_REGNUM. */
11638 ix86_elim_entry_set_got (pic);
11639 }
11640
11641 if (crtl->drap_reg && !crtl->stack_realign_needed)
11642 {
11643 /* vDRAP is setup but after reload it turns out stack realign
11644 isn't necessary, here we will emit prologue to setup DRAP
11645 without stack realign adjustment */
11646 t = choose_baseaddr (0);
11647 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11648 }
11649
11650 /* Prevent instructions from being scheduled into register save push
11651 sequence when access to the redzone area is done through frame pointer.
11652 The offset between the frame pointer and the stack pointer is calculated
11653 relative to the value of the stack pointer at the end of the function
11654 prologue, and moving instructions that access redzone area via frame
11655 pointer inside push sequence violates this assumption. */
11656 if (frame_pointer_needed && frame.red_zone_size)
11657 emit_insn (gen_memory_blockage ());
11658
11659 /* Emit cld instruction if stringops are used in the function. */
11660 if (TARGET_CLD && ix86_current_function_needs_cld)
11661 emit_insn (gen_cld ());
11662
11663 /* SEH requires that the prologue end within 256 bytes of the start of
11664 the function. Prevent instruction schedules that would extend that.
11665 Further, prevent alloca modifications to the stack pointer from being
11666 combined with prologue modifications. */
11667 if (TARGET_SEH)
11668 emit_insn (gen_prologue_use (stack_pointer_rtx));
11669 }
11670
11671 /* Emit code to restore REG using a POP insn. */
11672
11673 static void
11674 ix86_emit_restore_reg_using_pop (rtx reg)
11675 {
11676 struct machine_function *m = cfun->machine;
11677 rtx insn = emit_insn (gen_pop (reg));
11678
11679 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11680 m->fs.sp_offset -= UNITS_PER_WORD;
11681
11682 if (m->fs.cfa_reg == crtl->drap_reg
11683 && REGNO (reg) == REGNO (crtl->drap_reg))
11684 {
11685 /* Previously we'd represented the CFA as an expression
11686 like *(%ebp - 8). We've just popped that value from
11687 the stack, which means we need to reset the CFA to
11688 the drap register. This will remain until we restore
11689 the stack pointer. */
11690 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11691 RTX_FRAME_RELATED_P (insn) = 1;
11692
11693 /* This means that the DRAP register is valid for addressing too. */
11694 m->fs.drap_valid = true;
11695 return;
11696 }
11697
11698 if (m->fs.cfa_reg == stack_pointer_rtx)
11699 {
11700 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11701 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11702 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11703 RTX_FRAME_RELATED_P (insn) = 1;
11704
11705 m->fs.cfa_offset -= UNITS_PER_WORD;
11706 }
11707
11708 /* When the frame pointer is the CFA, and we pop it, we are
11709 swapping back to the stack pointer as the CFA. This happens
11710 for stack frames that don't allocate other data, so we assume
11711 the stack pointer is now pointing at the return address, i.e.
11712 the function entry state, which makes the offset be 1 word. */
11713 if (reg == hard_frame_pointer_rtx)
11714 {
11715 m->fs.fp_valid = false;
11716 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11717 {
11718 m->fs.cfa_reg = stack_pointer_rtx;
11719 m->fs.cfa_offset -= UNITS_PER_WORD;
11720
11721 add_reg_note (insn, REG_CFA_DEF_CFA,
11722 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11723 GEN_INT (m->fs.cfa_offset)));
11724 RTX_FRAME_RELATED_P (insn) = 1;
11725 }
11726 }
11727 }
11728
11729 /* Emit code to restore saved registers using POP insns. */
11730
11731 static void
11732 ix86_emit_restore_regs_using_pop (void)
11733 {
11734 unsigned int regno;
11735
11736 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11737 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11738 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11739 }
11740
11741 /* Emit code and notes for the LEAVE instruction. */
11742
11743 static void
11744 ix86_emit_leave (void)
11745 {
11746 struct machine_function *m = cfun->machine;
11747 rtx insn = emit_insn (ix86_gen_leave ());
11748
11749 ix86_add_queued_cfa_restore_notes (insn);
11750
11751 gcc_assert (m->fs.fp_valid);
11752 m->fs.sp_valid = true;
11753 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11754 m->fs.fp_valid = false;
11755
11756 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11757 {
11758 m->fs.cfa_reg = stack_pointer_rtx;
11759 m->fs.cfa_offset = m->fs.sp_offset;
11760
11761 add_reg_note (insn, REG_CFA_DEF_CFA,
11762 plus_constant (Pmode, stack_pointer_rtx,
11763 m->fs.sp_offset));
11764 RTX_FRAME_RELATED_P (insn) = 1;
11765 }
11766 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11767 m->fs.fp_offset);
11768 }
11769
11770 /* Emit code to restore saved registers using MOV insns.
11771 First register is restored from CFA - CFA_OFFSET. */
11772 static void
11773 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11774 bool maybe_eh_return)
11775 {
11776 struct machine_function *m = cfun->machine;
11777 unsigned int regno;
11778
11779 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11780 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11781 {
11782 rtx reg = gen_rtx_REG (word_mode, regno);
11783 rtx insn, mem;
11784
11785 mem = choose_baseaddr (cfa_offset);
11786 mem = gen_frame_mem (word_mode, mem);
11787 insn = emit_move_insn (reg, mem);
11788
11789 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11790 {
11791 /* Previously we'd represented the CFA as an expression
11792 like *(%ebp - 8). We've just popped that value from
11793 the stack, which means we need to reset the CFA to
11794 the drap register. This will remain until we restore
11795 the stack pointer. */
11796 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11797 RTX_FRAME_RELATED_P (insn) = 1;
11798
11799 /* This means that the DRAP register is valid for addressing. */
11800 m->fs.drap_valid = true;
11801 }
11802 else
11803 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11804
11805 cfa_offset -= UNITS_PER_WORD;
11806 }
11807 }
11808
11809 /* Emit code to restore saved registers using MOV insns.
11810 First register is restored from CFA - CFA_OFFSET. */
11811 static void
11812 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11813 bool maybe_eh_return)
11814 {
11815 unsigned int regno;
11816
11817 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11818 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11819 {
11820 rtx reg = gen_rtx_REG (V4SFmode, regno);
11821 rtx mem;
11822
11823 mem = choose_baseaddr (cfa_offset);
11824 mem = gen_rtx_MEM (V4SFmode, mem);
11825 set_mem_align (mem, 128);
11826 emit_move_insn (reg, mem);
11827
11828 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11829
11830 cfa_offset -= 16;
11831 }
11832 }
11833
11834 /* Restore function stack, frame, and registers. */
11835
11836 void
11837 ix86_expand_epilogue (int style)
11838 {
11839 struct machine_function *m = cfun->machine;
11840 struct machine_frame_state frame_state_save = m->fs;
11841 struct ix86_frame frame;
11842 bool restore_regs_via_mov;
11843 bool using_drap;
11844
11845 ix86_finalize_stack_realign_flags ();
11846 ix86_compute_frame_layout (&frame);
11847
11848 m->fs.sp_valid = (!frame_pointer_needed
11849 || (crtl->sp_is_unchanging
11850 && !stack_realign_fp));
11851 gcc_assert (!m->fs.sp_valid
11852 || m->fs.sp_offset == frame.stack_pointer_offset);
11853
11854 /* The FP must be valid if the frame pointer is present. */
11855 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11856 gcc_assert (!m->fs.fp_valid
11857 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11858
11859 /* We must have *some* valid pointer to the stack frame. */
11860 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11861
11862 /* The DRAP is never valid at this point. */
11863 gcc_assert (!m->fs.drap_valid);
11864
11865 /* See the comment about red zone and frame
11866 pointer usage in ix86_expand_prologue. */
11867 if (frame_pointer_needed && frame.red_zone_size)
11868 emit_insn (gen_memory_blockage ());
11869
11870 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11871 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11872
11873 /* Determine the CFA offset of the end of the red-zone. */
11874 m->fs.red_zone_offset = 0;
11875 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11876 {
11877 /* The red-zone begins below the return address. */
11878 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11879
11880 /* When the register save area is in the aligned portion of
11881 the stack, determine the maximum runtime displacement that
11882 matches up with the aligned frame. */
11883 if (stack_realign_drap)
11884 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11885 + UNITS_PER_WORD);
11886 }
11887
11888 /* Special care must be taken for the normal return case of a function
11889 using eh_return: the eax and edx registers are marked as saved, but
11890 not restored along this path. Adjust the save location to match. */
11891 if (crtl->calls_eh_return && style != 2)
11892 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11893
11894 /* EH_RETURN requires the use of moves to function properly. */
11895 if (crtl->calls_eh_return)
11896 restore_regs_via_mov = true;
11897 /* SEH requires the use of pops to identify the epilogue. */
11898 else if (TARGET_SEH)
11899 restore_regs_via_mov = false;
11900 /* If we're only restoring one register and sp is not valid then
11901 using a move instruction to restore the register since it's
11902 less work than reloading sp and popping the register. */
11903 else if (!m->fs.sp_valid && frame.nregs <= 1)
11904 restore_regs_via_mov = true;
11905 else if (TARGET_EPILOGUE_USING_MOVE
11906 && cfun->machine->use_fast_prologue_epilogue
11907 && (frame.nregs > 1
11908 || m->fs.sp_offset != frame.reg_save_offset))
11909 restore_regs_via_mov = true;
11910 else if (frame_pointer_needed
11911 && !frame.nregs
11912 && m->fs.sp_offset != frame.reg_save_offset)
11913 restore_regs_via_mov = true;
11914 else if (frame_pointer_needed
11915 && TARGET_USE_LEAVE
11916 && cfun->machine->use_fast_prologue_epilogue
11917 && frame.nregs == 1)
11918 restore_regs_via_mov = true;
11919 else
11920 restore_regs_via_mov = false;
11921
11922 if (restore_regs_via_mov || frame.nsseregs)
11923 {
11924 /* Ensure that the entire register save area is addressable via
11925 the stack pointer, if we will restore via sp. */
11926 if (TARGET_64BIT
11927 && m->fs.sp_offset > 0x7fffffff
11928 && !(m->fs.fp_valid || m->fs.drap_valid)
11929 && (frame.nsseregs + frame.nregs) != 0)
11930 {
11931 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11932 GEN_INT (m->fs.sp_offset
11933 - frame.sse_reg_save_offset),
11934 style,
11935 m->fs.cfa_reg == stack_pointer_rtx);
11936 }
11937 }
11938
11939 /* If there are any SSE registers to restore, then we have to do it
11940 via moves, since there's obviously no pop for SSE regs. */
11941 if (frame.nsseregs)
11942 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11943 style == 2);
11944
11945 if (restore_regs_via_mov)
11946 {
11947 rtx t;
11948
11949 if (frame.nregs)
11950 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11951
11952 /* eh_return epilogues need %ecx added to the stack pointer. */
11953 if (style == 2)
11954 {
11955 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11956
11957 /* Stack align doesn't work with eh_return. */
11958 gcc_assert (!stack_realign_drap);
11959 /* Neither does regparm nested functions. */
11960 gcc_assert (!ix86_static_chain_on_stack);
11961
11962 if (frame_pointer_needed)
11963 {
11964 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11965 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11966 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11967
11968 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11969 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11970
11971 /* Note that we use SA as a temporary CFA, as the return
11972 address is at the proper place relative to it. We
11973 pretend this happens at the FP restore insn because
11974 prior to this insn the FP would be stored at the wrong
11975 offset relative to SA, and after this insn we have no
11976 other reasonable register to use for the CFA. We don't
11977 bother resetting the CFA to the SP for the duration of
11978 the return insn. */
11979 add_reg_note (insn, REG_CFA_DEF_CFA,
11980 plus_constant (Pmode, sa, UNITS_PER_WORD));
11981 ix86_add_queued_cfa_restore_notes (insn);
11982 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11983 RTX_FRAME_RELATED_P (insn) = 1;
11984
11985 m->fs.cfa_reg = sa;
11986 m->fs.cfa_offset = UNITS_PER_WORD;
11987 m->fs.fp_valid = false;
11988
11989 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11990 const0_rtx, style, false);
11991 }
11992 else
11993 {
11994 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11995 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11996 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11997 ix86_add_queued_cfa_restore_notes (insn);
11998
11999 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12000 if (m->fs.cfa_offset != UNITS_PER_WORD)
12001 {
12002 m->fs.cfa_offset = UNITS_PER_WORD;
12003 add_reg_note (insn, REG_CFA_DEF_CFA,
12004 plus_constant (Pmode, stack_pointer_rtx,
12005 UNITS_PER_WORD));
12006 RTX_FRAME_RELATED_P (insn) = 1;
12007 }
12008 }
12009 m->fs.sp_offset = UNITS_PER_WORD;
12010 m->fs.sp_valid = true;
12011 }
12012 }
12013 else
12014 {
12015 /* SEH requires that the function end with (1) a stack adjustment
12016 if necessary, (2) a sequence of pops, and (3) a return or
12017 jump instruction. Prevent insns from the function body from
12018 being scheduled into this sequence. */
12019 if (TARGET_SEH)
12020 {
12021 /* Prevent a catch region from being adjacent to the standard
12022 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12023 several other flags that would be interesting to test are
12024 not yet set up. */
12025 if (flag_non_call_exceptions)
12026 emit_insn (gen_nops (const1_rtx));
12027 else
12028 emit_insn (gen_blockage ());
12029 }
12030
12031 /* First step is to deallocate the stack frame so that we can
12032 pop the registers. Also do it on SEH target for very large
12033 frame as the emitted instructions aren't allowed by the ABI in
12034 epilogues. */
12035 if (!m->fs.sp_valid
12036 || (TARGET_SEH
12037 && (m->fs.sp_offset - frame.reg_save_offset
12038 >= SEH_MAX_FRAME_SIZE)))
12039 {
12040 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12041 GEN_INT (m->fs.fp_offset
12042 - frame.reg_save_offset),
12043 style, false);
12044 }
12045 else if (m->fs.sp_offset != frame.reg_save_offset)
12046 {
12047 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12048 GEN_INT (m->fs.sp_offset
12049 - frame.reg_save_offset),
12050 style,
12051 m->fs.cfa_reg == stack_pointer_rtx);
12052 }
12053
12054 ix86_emit_restore_regs_using_pop ();
12055 }
12056
12057 /* If we used a stack pointer and haven't already got rid of it,
12058 then do so now. */
12059 if (m->fs.fp_valid)
12060 {
12061 /* If the stack pointer is valid and pointing at the frame
12062 pointer store address, then we only need a pop. */
12063 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12064 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12065 /* Leave results in shorter dependency chains on CPUs that are
12066 able to grok it fast. */
12067 else if (TARGET_USE_LEAVE
12068 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12069 || !cfun->machine->use_fast_prologue_epilogue)
12070 ix86_emit_leave ();
12071 else
12072 {
12073 pro_epilogue_adjust_stack (stack_pointer_rtx,
12074 hard_frame_pointer_rtx,
12075 const0_rtx, style, !using_drap);
12076 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12077 }
12078 }
12079
12080 if (using_drap)
12081 {
12082 int param_ptr_offset = UNITS_PER_WORD;
12083 rtx insn;
12084
12085 gcc_assert (stack_realign_drap);
12086
12087 if (ix86_static_chain_on_stack)
12088 param_ptr_offset += UNITS_PER_WORD;
12089 if (!call_used_regs[REGNO (crtl->drap_reg)])
12090 param_ptr_offset += UNITS_PER_WORD;
12091
12092 insn = emit_insn (gen_rtx_SET
12093 (VOIDmode, stack_pointer_rtx,
12094 gen_rtx_PLUS (Pmode,
12095 crtl->drap_reg,
12096 GEN_INT (-param_ptr_offset))));
12097 m->fs.cfa_reg = stack_pointer_rtx;
12098 m->fs.cfa_offset = param_ptr_offset;
12099 m->fs.sp_offset = param_ptr_offset;
12100 m->fs.realigned = false;
12101
12102 add_reg_note (insn, REG_CFA_DEF_CFA,
12103 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12104 GEN_INT (param_ptr_offset)));
12105 RTX_FRAME_RELATED_P (insn) = 1;
12106
12107 if (!call_used_regs[REGNO (crtl->drap_reg)])
12108 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12109 }
12110
12111 /* At this point the stack pointer must be valid, and we must have
12112 restored all of the registers. We may not have deallocated the
12113 entire stack frame. We've delayed this until now because it may
12114 be possible to merge the local stack deallocation with the
12115 deallocation forced by ix86_static_chain_on_stack. */
12116 gcc_assert (m->fs.sp_valid);
12117 gcc_assert (!m->fs.fp_valid);
12118 gcc_assert (!m->fs.realigned);
12119 if (m->fs.sp_offset != UNITS_PER_WORD)
12120 {
12121 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12122 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12123 style, true);
12124 }
12125 else
12126 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12127
12128 /* Sibcall epilogues don't want a return instruction. */
12129 if (style == 0)
12130 {
12131 m->fs = frame_state_save;
12132 return;
12133 }
12134
12135 if (crtl->args.pops_args && crtl->args.size)
12136 {
12137 rtx popc = GEN_INT (crtl->args.pops_args);
12138
12139 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12140 address, do explicit add, and jump indirectly to the caller. */
12141
12142 if (crtl->args.pops_args >= 65536)
12143 {
12144 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12145 rtx insn;
12146
12147 /* There is no "pascal" calling convention in any 64bit ABI. */
12148 gcc_assert (!TARGET_64BIT);
12149
12150 insn = emit_insn (gen_pop (ecx));
12151 m->fs.cfa_offset -= UNITS_PER_WORD;
12152 m->fs.sp_offset -= UNITS_PER_WORD;
12153
12154 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12155 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12156 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12157 add_reg_note (insn, REG_CFA_REGISTER,
12158 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12159 RTX_FRAME_RELATED_P (insn) = 1;
12160
12161 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12162 popc, -1, true);
12163 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12164 }
12165 else
12166 emit_jump_insn (gen_simple_return_pop_internal (popc));
12167 }
12168 else
12169 emit_jump_insn (gen_simple_return_internal ());
12170
12171 /* Restore the state back to the state from the prologue,
12172 so that it's correct for the next epilogue. */
12173 m->fs = frame_state_save;
12174 }
12175
12176 /* Reset from the function's potential modifications. */
12177
12178 static void
12179 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12180 {
12181 if (pic_offset_table_rtx
12182 && !ix86_use_pseudo_pic_reg ())
12183 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12184 #if TARGET_MACHO
12185 /* Mach-O doesn't support labels at the end of objects, so if
12186 it looks like we might want one, insert a NOP. */
12187 {
12188 rtx_insn *insn = get_last_insn ();
12189 rtx_insn *deleted_debug_label = NULL;
12190 while (insn
12191 && NOTE_P (insn)
12192 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12193 {
12194 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12195 notes only, instead set their CODE_LABEL_NUMBER to -1,
12196 otherwise there would be code generation differences
12197 in between -g and -g0. */
12198 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12199 deleted_debug_label = insn;
12200 insn = PREV_INSN (insn);
12201 }
12202 if (insn
12203 && (LABEL_P (insn)
12204 || (NOTE_P (insn)
12205 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12206 fputs ("\tnop\n", file);
12207 else if (deleted_debug_label)
12208 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12209 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12210 CODE_LABEL_NUMBER (insn) = -1;
12211 }
12212 #endif
12213
12214 }
12215
12216 /* Return a scratch register to use in the split stack prologue. The
12217 split stack prologue is used for -fsplit-stack. It is the first
12218 instructions in the function, even before the regular prologue.
12219 The scratch register can be any caller-saved register which is not
12220 used for parameters or for the static chain. */
12221
12222 static unsigned int
12223 split_stack_prologue_scratch_regno (void)
12224 {
12225 if (TARGET_64BIT)
12226 return R11_REG;
12227 else
12228 {
12229 bool is_fastcall, is_thiscall;
12230 int regparm;
12231
12232 is_fastcall = (lookup_attribute ("fastcall",
12233 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12234 != NULL);
12235 is_thiscall = (lookup_attribute ("thiscall",
12236 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12237 != NULL);
12238 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12239
12240 if (is_fastcall)
12241 {
12242 if (DECL_STATIC_CHAIN (cfun->decl))
12243 {
12244 sorry ("-fsplit-stack does not support fastcall with "
12245 "nested function");
12246 return INVALID_REGNUM;
12247 }
12248 return AX_REG;
12249 }
12250 else if (is_thiscall)
12251 {
12252 if (!DECL_STATIC_CHAIN (cfun->decl))
12253 return DX_REG;
12254 return AX_REG;
12255 }
12256 else if (regparm < 3)
12257 {
12258 if (!DECL_STATIC_CHAIN (cfun->decl))
12259 return CX_REG;
12260 else
12261 {
12262 if (regparm >= 2)
12263 {
12264 sorry ("-fsplit-stack does not support 2 register "
12265 "parameters for a nested function");
12266 return INVALID_REGNUM;
12267 }
12268 return DX_REG;
12269 }
12270 }
12271 else
12272 {
12273 /* FIXME: We could make this work by pushing a register
12274 around the addition and comparison. */
12275 sorry ("-fsplit-stack does not support 3 register parameters");
12276 return INVALID_REGNUM;
12277 }
12278 }
12279 }
12280
12281 /* A SYMBOL_REF for the function which allocates new stackspace for
12282 -fsplit-stack. */
12283
12284 static GTY(()) rtx split_stack_fn;
12285
12286 /* A SYMBOL_REF for the more stack function when using the large
12287 model. */
12288
12289 static GTY(()) rtx split_stack_fn_large;
12290
12291 /* Handle -fsplit-stack. These are the first instructions in the
12292 function, even before the regular prologue. */
12293
12294 void
12295 ix86_expand_split_stack_prologue (void)
12296 {
12297 struct ix86_frame frame;
12298 HOST_WIDE_INT allocate;
12299 unsigned HOST_WIDE_INT args_size;
12300 rtx_code_label *label;
12301 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12302 rtx scratch_reg = NULL_RTX;
12303 rtx_code_label *varargs_label = NULL;
12304 rtx fn;
12305
12306 gcc_assert (flag_split_stack && reload_completed);
12307
12308 ix86_finalize_stack_realign_flags ();
12309 ix86_compute_frame_layout (&frame);
12310 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12311
12312 /* This is the label we will branch to if we have enough stack
12313 space. We expect the basic block reordering pass to reverse this
12314 branch if optimizing, so that we branch in the unlikely case. */
12315 label = gen_label_rtx ();
12316
12317 /* We need to compare the stack pointer minus the frame size with
12318 the stack boundary in the TCB. The stack boundary always gives
12319 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12320 can compare directly. Otherwise we need to do an addition. */
12321
12322 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12323 UNSPEC_STACK_CHECK);
12324 limit = gen_rtx_CONST (Pmode, limit);
12325 limit = gen_rtx_MEM (Pmode, limit);
12326 if (allocate < SPLIT_STACK_AVAILABLE)
12327 current = stack_pointer_rtx;
12328 else
12329 {
12330 unsigned int scratch_regno;
12331 rtx offset;
12332
12333 /* We need a scratch register to hold the stack pointer minus
12334 the required frame size. Since this is the very start of the
12335 function, the scratch register can be any caller-saved
12336 register which is not used for parameters. */
12337 offset = GEN_INT (- allocate);
12338 scratch_regno = split_stack_prologue_scratch_regno ();
12339 if (scratch_regno == INVALID_REGNUM)
12340 return;
12341 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12342 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12343 {
12344 /* We don't use ix86_gen_add3 in this case because it will
12345 want to split to lea, but when not optimizing the insn
12346 will not be split after this point. */
12347 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12348 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12349 offset)));
12350 }
12351 else
12352 {
12353 emit_move_insn (scratch_reg, offset);
12354 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12355 stack_pointer_rtx));
12356 }
12357 current = scratch_reg;
12358 }
12359
12360 ix86_expand_branch (GEU, current, limit, label);
12361 jump_insn = get_last_insn ();
12362 JUMP_LABEL (jump_insn) = label;
12363
12364 /* Mark the jump as very likely to be taken. */
12365 add_int_reg_note (jump_insn, REG_BR_PROB,
12366 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12367
12368 if (split_stack_fn == NULL_RTX)
12369 {
12370 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12371 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12372 }
12373 fn = split_stack_fn;
12374
12375 /* Get more stack space. We pass in the desired stack space and the
12376 size of the arguments to copy to the new stack. In 32-bit mode
12377 we push the parameters; __morestack will return on a new stack
12378 anyhow. In 64-bit mode we pass the parameters in r10 and
12379 r11. */
12380 allocate_rtx = GEN_INT (allocate);
12381 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12382 call_fusage = NULL_RTX;
12383 if (TARGET_64BIT)
12384 {
12385 rtx reg10, reg11;
12386
12387 reg10 = gen_rtx_REG (Pmode, R10_REG);
12388 reg11 = gen_rtx_REG (Pmode, R11_REG);
12389
12390 /* If this function uses a static chain, it will be in %r10.
12391 Preserve it across the call to __morestack. */
12392 if (DECL_STATIC_CHAIN (cfun->decl))
12393 {
12394 rtx rax;
12395
12396 rax = gen_rtx_REG (word_mode, AX_REG);
12397 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12398 use_reg (&call_fusage, rax);
12399 }
12400
12401 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12402 && !TARGET_PECOFF)
12403 {
12404 HOST_WIDE_INT argval;
12405
12406 gcc_assert (Pmode == DImode);
12407 /* When using the large model we need to load the address
12408 into a register, and we've run out of registers. So we
12409 switch to a different calling convention, and we call a
12410 different function: __morestack_large. We pass the
12411 argument size in the upper 32 bits of r10 and pass the
12412 frame size in the lower 32 bits. */
12413 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12414 gcc_assert ((args_size & 0xffffffff) == args_size);
12415
12416 if (split_stack_fn_large == NULL_RTX)
12417 {
12418 split_stack_fn_large =
12419 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12420 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12421 }
12422 if (ix86_cmodel == CM_LARGE_PIC)
12423 {
12424 rtx_code_label *label;
12425 rtx x;
12426
12427 label = gen_label_rtx ();
12428 emit_label (label);
12429 LABEL_PRESERVE_P (label) = 1;
12430 emit_insn (gen_set_rip_rex64 (reg10, label));
12431 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12432 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12433 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12434 UNSPEC_GOT);
12435 x = gen_rtx_CONST (Pmode, x);
12436 emit_move_insn (reg11, x);
12437 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12438 x = gen_const_mem (Pmode, x);
12439 emit_move_insn (reg11, x);
12440 }
12441 else
12442 emit_move_insn (reg11, split_stack_fn_large);
12443
12444 fn = reg11;
12445
12446 argval = ((args_size << 16) << 16) + allocate;
12447 emit_move_insn (reg10, GEN_INT (argval));
12448 }
12449 else
12450 {
12451 emit_move_insn (reg10, allocate_rtx);
12452 emit_move_insn (reg11, GEN_INT (args_size));
12453 use_reg (&call_fusage, reg11);
12454 }
12455
12456 use_reg (&call_fusage, reg10);
12457 }
12458 else
12459 {
12460 emit_insn (gen_push (GEN_INT (args_size)));
12461 emit_insn (gen_push (allocate_rtx));
12462 }
12463 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12464 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12465 NULL_RTX, false);
12466 add_function_usage_to (call_insn, call_fusage);
12467
12468 /* In order to make call/return prediction work right, we now need
12469 to execute a return instruction. See
12470 libgcc/config/i386/morestack.S for the details on how this works.
12471
12472 For flow purposes gcc must not see this as a return
12473 instruction--we need control flow to continue at the subsequent
12474 label. Therefore, we use an unspec. */
12475 gcc_assert (crtl->args.pops_args < 65536);
12476 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12477
12478 /* If we are in 64-bit mode and this function uses a static chain,
12479 we saved %r10 in %rax before calling _morestack. */
12480 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12481 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12482 gen_rtx_REG (word_mode, AX_REG));
12483
12484 /* If this function calls va_start, we need to store a pointer to
12485 the arguments on the old stack, because they may not have been
12486 all copied to the new stack. At this point the old stack can be
12487 found at the frame pointer value used by __morestack, because
12488 __morestack has set that up before calling back to us. Here we
12489 store that pointer in a scratch register, and in
12490 ix86_expand_prologue we store the scratch register in a stack
12491 slot. */
12492 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12493 {
12494 unsigned int scratch_regno;
12495 rtx frame_reg;
12496 int words;
12497
12498 scratch_regno = split_stack_prologue_scratch_regno ();
12499 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12500 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12501
12502 /* 64-bit:
12503 fp -> old fp value
12504 return address within this function
12505 return address of caller of this function
12506 stack arguments
12507 So we add three words to get to the stack arguments.
12508
12509 32-bit:
12510 fp -> old fp value
12511 return address within this function
12512 first argument to __morestack
12513 second argument to __morestack
12514 return address of caller of this function
12515 stack arguments
12516 So we add five words to get to the stack arguments.
12517 */
12518 words = TARGET_64BIT ? 3 : 5;
12519 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12520 gen_rtx_PLUS (Pmode, frame_reg,
12521 GEN_INT (words * UNITS_PER_WORD))));
12522
12523 varargs_label = gen_label_rtx ();
12524 emit_jump_insn (gen_jump (varargs_label));
12525 JUMP_LABEL (get_last_insn ()) = varargs_label;
12526
12527 emit_barrier ();
12528 }
12529
12530 emit_label (label);
12531 LABEL_NUSES (label) = 1;
12532
12533 /* If this function calls va_start, we now have to set the scratch
12534 register for the case where we do not call __morestack. In this
12535 case we need to set it based on the stack pointer. */
12536 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12537 {
12538 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12539 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12540 GEN_INT (UNITS_PER_WORD))));
12541
12542 emit_label (varargs_label);
12543 LABEL_NUSES (varargs_label) = 1;
12544 }
12545 }
12546
12547 /* We may have to tell the dataflow pass that the split stack prologue
12548 is initializing a scratch register. */
12549
12550 static void
12551 ix86_live_on_entry (bitmap regs)
12552 {
12553 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12554 {
12555 gcc_assert (flag_split_stack);
12556 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12557 }
12558 }
12559 \f
12560 /* Extract the parts of an RTL expression that is a valid memory address
12561 for an instruction. Return 0 if the structure of the address is
12562 grossly off. Return -1 if the address contains ASHIFT, so it is not
12563 strictly valid, but still used for computing length of lea instruction. */
12564
12565 int
12566 ix86_decompose_address (rtx addr, struct ix86_address *out)
12567 {
12568 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12569 rtx base_reg, index_reg;
12570 HOST_WIDE_INT scale = 1;
12571 rtx scale_rtx = NULL_RTX;
12572 rtx tmp;
12573 int retval = 1;
12574 enum ix86_address_seg seg = SEG_DEFAULT;
12575
12576 /* Allow zero-extended SImode addresses,
12577 they will be emitted with addr32 prefix. */
12578 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12579 {
12580 if (GET_CODE (addr) == ZERO_EXTEND
12581 && GET_MODE (XEXP (addr, 0)) == SImode)
12582 {
12583 addr = XEXP (addr, 0);
12584 if (CONST_INT_P (addr))
12585 return 0;
12586 }
12587 else if (GET_CODE (addr) == AND
12588 && const_32bit_mask (XEXP (addr, 1), DImode))
12589 {
12590 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12591 if (addr == NULL_RTX)
12592 return 0;
12593
12594 if (CONST_INT_P (addr))
12595 return 0;
12596 }
12597 }
12598
12599 /* Allow SImode subregs of DImode addresses,
12600 they will be emitted with addr32 prefix. */
12601 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12602 {
12603 if (GET_CODE (addr) == SUBREG
12604 && GET_MODE (SUBREG_REG (addr)) == DImode)
12605 {
12606 addr = SUBREG_REG (addr);
12607 if (CONST_INT_P (addr))
12608 return 0;
12609 }
12610 }
12611
12612 if (REG_P (addr))
12613 base = addr;
12614 else if (GET_CODE (addr) == SUBREG)
12615 {
12616 if (REG_P (SUBREG_REG (addr)))
12617 base = addr;
12618 else
12619 return 0;
12620 }
12621 else if (GET_CODE (addr) == PLUS)
12622 {
12623 rtx addends[4], op;
12624 int n = 0, i;
12625
12626 op = addr;
12627 do
12628 {
12629 if (n >= 4)
12630 return 0;
12631 addends[n++] = XEXP (op, 1);
12632 op = XEXP (op, 0);
12633 }
12634 while (GET_CODE (op) == PLUS);
12635 if (n >= 4)
12636 return 0;
12637 addends[n] = op;
12638
12639 for (i = n; i >= 0; --i)
12640 {
12641 op = addends[i];
12642 switch (GET_CODE (op))
12643 {
12644 case MULT:
12645 if (index)
12646 return 0;
12647 index = XEXP (op, 0);
12648 scale_rtx = XEXP (op, 1);
12649 break;
12650
12651 case ASHIFT:
12652 if (index)
12653 return 0;
12654 index = XEXP (op, 0);
12655 tmp = XEXP (op, 1);
12656 if (!CONST_INT_P (tmp))
12657 return 0;
12658 scale = INTVAL (tmp);
12659 if ((unsigned HOST_WIDE_INT) scale > 3)
12660 return 0;
12661 scale = 1 << scale;
12662 break;
12663
12664 case ZERO_EXTEND:
12665 op = XEXP (op, 0);
12666 if (GET_CODE (op) != UNSPEC)
12667 return 0;
12668 /* FALLTHRU */
12669
12670 case UNSPEC:
12671 if (XINT (op, 1) == UNSPEC_TP
12672 && TARGET_TLS_DIRECT_SEG_REFS
12673 && seg == SEG_DEFAULT)
12674 seg = DEFAULT_TLS_SEG_REG;
12675 else
12676 return 0;
12677 break;
12678
12679 case SUBREG:
12680 if (!REG_P (SUBREG_REG (op)))
12681 return 0;
12682 /* FALLTHRU */
12683
12684 case REG:
12685 if (!base)
12686 base = op;
12687 else if (!index)
12688 index = op;
12689 else
12690 return 0;
12691 break;
12692
12693 case CONST:
12694 case CONST_INT:
12695 case SYMBOL_REF:
12696 case LABEL_REF:
12697 if (disp)
12698 return 0;
12699 disp = op;
12700 break;
12701
12702 default:
12703 return 0;
12704 }
12705 }
12706 }
12707 else if (GET_CODE (addr) == MULT)
12708 {
12709 index = XEXP (addr, 0); /* index*scale */
12710 scale_rtx = XEXP (addr, 1);
12711 }
12712 else if (GET_CODE (addr) == ASHIFT)
12713 {
12714 /* We're called for lea too, which implements ashift on occasion. */
12715 index = XEXP (addr, 0);
12716 tmp = XEXP (addr, 1);
12717 if (!CONST_INT_P (tmp))
12718 return 0;
12719 scale = INTVAL (tmp);
12720 if ((unsigned HOST_WIDE_INT) scale > 3)
12721 return 0;
12722 scale = 1 << scale;
12723 retval = -1;
12724 }
12725 else
12726 disp = addr; /* displacement */
12727
12728 if (index)
12729 {
12730 if (REG_P (index))
12731 ;
12732 else if (GET_CODE (index) == SUBREG
12733 && REG_P (SUBREG_REG (index)))
12734 ;
12735 else
12736 return 0;
12737 }
12738
12739 /* Extract the integral value of scale. */
12740 if (scale_rtx)
12741 {
12742 if (!CONST_INT_P (scale_rtx))
12743 return 0;
12744 scale = INTVAL (scale_rtx);
12745 }
12746
12747 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12748 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12749
12750 /* Avoid useless 0 displacement. */
12751 if (disp == const0_rtx && (base || index))
12752 disp = NULL_RTX;
12753
12754 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12755 if (base_reg && index_reg && scale == 1
12756 && (index_reg == arg_pointer_rtx
12757 || index_reg == frame_pointer_rtx
12758 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12759 {
12760 std::swap (base, index);
12761 std::swap (base_reg, index_reg);
12762 }
12763
12764 /* Special case: %ebp cannot be encoded as a base without a displacement.
12765 Similarly %r13. */
12766 if (!disp
12767 && base_reg
12768 && (base_reg == hard_frame_pointer_rtx
12769 || base_reg == frame_pointer_rtx
12770 || base_reg == arg_pointer_rtx
12771 || (REG_P (base_reg)
12772 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12773 || REGNO (base_reg) == R13_REG))))
12774 disp = const0_rtx;
12775
12776 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12777 Avoid this by transforming to [%esi+0].
12778 Reload calls address legitimization without cfun defined, so we need
12779 to test cfun for being non-NULL. */
12780 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12781 && base_reg && !index_reg && !disp
12782 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12783 disp = const0_rtx;
12784
12785 /* Special case: encode reg+reg instead of reg*2. */
12786 if (!base && index && scale == 2)
12787 base = index, base_reg = index_reg, scale = 1;
12788
12789 /* Special case: scaling cannot be encoded without base or displacement. */
12790 if (!base && !disp && index && scale != 1)
12791 disp = const0_rtx;
12792
12793 out->base = base;
12794 out->index = index;
12795 out->disp = disp;
12796 out->scale = scale;
12797 out->seg = seg;
12798
12799 return retval;
12800 }
12801 \f
12802 /* Return cost of the memory address x.
12803 For i386, it is better to use a complex address than let gcc copy
12804 the address into a reg and make a new pseudo. But not if the address
12805 requires to two regs - that would mean more pseudos with longer
12806 lifetimes. */
12807 static int
12808 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12809 {
12810 struct ix86_address parts;
12811 int cost = 1;
12812 int ok = ix86_decompose_address (x, &parts);
12813
12814 gcc_assert (ok);
12815
12816 if (parts.base && GET_CODE (parts.base) == SUBREG)
12817 parts.base = SUBREG_REG (parts.base);
12818 if (parts.index && GET_CODE (parts.index) == SUBREG)
12819 parts.index = SUBREG_REG (parts.index);
12820
12821 /* Attempt to minimize number of registers in the address. */
12822 if ((parts.base
12823 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12824 || (parts.index
12825 && (!REG_P (parts.index)
12826 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12827 cost++;
12828
12829 /* When address base or index is "pic_offset_table_rtx" we don't increase
12830 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12831 itself it most likely means that base or index is not invariant.
12832 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12833 profitable for x86. */
12834 if (parts.base
12835 && (!pic_offset_table_rtx
12836 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12837 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12838 && parts.index
12839 && (!pic_offset_table_rtx
12840 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12841 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12842 && parts.base != parts.index)
12843 cost++;
12844
12845 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12846 since it's predecode logic can't detect the length of instructions
12847 and it degenerates to vector decoded. Increase cost of such
12848 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12849 to split such addresses or even refuse such addresses at all.
12850
12851 Following addressing modes are affected:
12852 [base+scale*index]
12853 [scale*index+disp]
12854 [base+index]
12855
12856 The first and last case may be avoidable by explicitly coding the zero in
12857 memory address, but I don't have AMD-K6 machine handy to check this
12858 theory. */
12859
12860 if (TARGET_K6
12861 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12862 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12863 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12864 cost += 10;
12865
12866 return cost;
12867 }
12868 \f
12869 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12870 this is used for to form addresses to local data when -fPIC is in
12871 use. */
12872
12873 static bool
12874 darwin_local_data_pic (rtx disp)
12875 {
12876 return (GET_CODE (disp) == UNSPEC
12877 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12878 }
12879
12880 /* Determine if a given RTX is a valid constant. We already know this
12881 satisfies CONSTANT_P. */
12882
12883 static bool
12884 ix86_legitimate_constant_p (machine_mode, rtx x)
12885 {
12886 /* Pointer bounds constants are not valid. */
12887 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12888 return false;
12889
12890 switch (GET_CODE (x))
12891 {
12892 case CONST:
12893 x = XEXP (x, 0);
12894
12895 if (GET_CODE (x) == PLUS)
12896 {
12897 if (!CONST_INT_P (XEXP (x, 1)))
12898 return false;
12899 x = XEXP (x, 0);
12900 }
12901
12902 if (TARGET_MACHO && darwin_local_data_pic (x))
12903 return true;
12904
12905 /* Only some unspecs are valid as "constants". */
12906 if (GET_CODE (x) == UNSPEC)
12907 switch (XINT (x, 1))
12908 {
12909 case UNSPEC_GOT:
12910 case UNSPEC_GOTOFF:
12911 case UNSPEC_PLTOFF:
12912 return TARGET_64BIT;
12913 case UNSPEC_TPOFF:
12914 case UNSPEC_NTPOFF:
12915 x = XVECEXP (x, 0, 0);
12916 return (GET_CODE (x) == SYMBOL_REF
12917 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12918 case UNSPEC_DTPOFF:
12919 x = XVECEXP (x, 0, 0);
12920 return (GET_CODE (x) == SYMBOL_REF
12921 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12922 default:
12923 return false;
12924 }
12925
12926 /* We must have drilled down to a symbol. */
12927 if (GET_CODE (x) == LABEL_REF)
12928 return true;
12929 if (GET_CODE (x) != SYMBOL_REF)
12930 return false;
12931 /* FALLTHRU */
12932
12933 case SYMBOL_REF:
12934 /* TLS symbols are never valid. */
12935 if (SYMBOL_REF_TLS_MODEL (x))
12936 return false;
12937
12938 /* DLLIMPORT symbols are never valid. */
12939 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12940 && SYMBOL_REF_DLLIMPORT_P (x))
12941 return false;
12942
12943 #if TARGET_MACHO
12944 /* mdynamic-no-pic */
12945 if (MACHO_DYNAMIC_NO_PIC_P)
12946 return machopic_symbol_defined_p (x);
12947 #endif
12948 break;
12949
12950 case CONST_DOUBLE:
12951 if (GET_MODE (x) == TImode
12952 && x != CONST0_RTX (TImode)
12953 && !TARGET_64BIT)
12954 return false;
12955 break;
12956
12957 case CONST_VECTOR:
12958 if (!standard_sse_constant_p (x))
12959 return false;
12960
12961 default:
12962 break;
12963 }
12964
12965 /* Otherwise we handle everything else in the move patterns. */
12966 return true;
12967 }
12968
12969 /* Determine if it's legal to put X into the constant pool. This
12970 is not possible for the address of thread-local symbols, which
12971 is checked above. */
12972
12973 static bool
12974 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12975 {
12976 /* We can always put integral constants and vectors in memory. */
12977 switch (GET_CODE (x))
12978 {
12979 case CONST_INT:
12980 case CONST_DOUBLE:
12981 case CONST_VECTOR:
12982 return false;
12983
12984 default:
12985 break;
12986 }
12987 return !ix86_legitimate_constant_p (mode, x);
12988 }
12989
12990 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12991 otherwise zero. */
12992
12993 static bool
12994 is_imported_p (rtx x)
12995 {
12996 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12997 || GET_CODE (x) != SYMBOL_REF)
12998 return false;
12999
13000 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13001 }
13002
13003
13004 /* Nonzero if the constant value X is a legitimate general operand
13005 when generating PIC code. It is given that flag_pic is on and
13006 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13007
13008 bool
13009 legitimate_pic_operand_p (rtx x)
13010 {
13011 rtx inner;
13012
13013 switch (GET_CODE (x))
13014 {
13015 case CONST:
13016 inner = XEXP (x, 0);
13017 if (GET_CODE (inner) == PLUS
13018 && CONST_INT_P (XEXP (inner, 1)))
13019 inner = XEXP (inner, 0);
13020
13021 /* Only some unspecs are valid as "constants". */
13022 if (GET_CODE (inner) == UNSPEC)
13023 switch (XINT (inner, 1))
13024 {
13025 case UNSPEC_GOT:
13026 case UNSPEC_GOTOFF:
13027 case UNSPEC_PLTOFF:
13028 return TARGET_64BIT;
13029 case UNSPEC_TPOFF:
13030 x = XVECEXP (inner, 0, 0);
13031 return (GET_CODE (x) == SYMBOL_REF
13032 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13033 case UNSPEC_MACHOPIC_OFFSET:
13034 return legitimate_pic_address_disp_p (x);
13035 default:
13036 return false;
13037 }
13038 /* FALLTHRU */
13039
13040 case SYMBOL_REF:
13041 case LABEL_REF:
13042 return legitimate_pic_address_disp_p (x);
13043
13044 default:
13045 return true;
13046 }
13047 }
13048
13049 /* Determine if a given CONST RTX is a valid memory displacement
13050 in PIC mode. */
13051
13052 bool
13053 legitimate_pic_address_disp_p (rtx disp)
13054 {
13055 bool saw_plus;
13056
13057 /* In 64bit mode we can allow direct addresses of symbols and labels
13058 when they are not dynamic symbols. */
13059 if (TARGET_64BIT)
13060 {
13061 rtx op0 = disp, op1;
13062
13063 switch (GET_CODE (disp))
13064 {
13065 case LABEL_REF:
13066 return true;
13067
13068 case CONST:
13069 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13070 break;
13071 op0 = XEXP (XEXP (disp, 0), 0);
13072 op1 = XEXP (XEXP (disp, 0), 1);
13073 if (!CONST_INT_P (op1)
13074 || INTVAL (op1) >= 16*1024*1024
13075 || INTVAL (op1) < -16*1024*1024)
13076 break;
13077 if (GET_CODE (op0) == LABEL_REF)
13078 return true;
13079 if (GET_CODE (op0) == CONST
13080 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13081 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13082 return true;
13083 if (GET_CODE (op0) == UNSPEC
13084 && XINT (op0, 1) == UNSPEC_PCREL)
13085 return true;
13086 if (GET_CODE (op0) != SYMBOL_REF)
13087 break;
13088 /* FALLTHRU */
13089
13090 case SYMBOL_REF:
13091 /* TLS references should always be enclosed in UNSPEC.
13092 The dllimported symbol needs always to be resolved. */
13093 if (SYMBOL_REF_TLS_MODEL (op0)
13094 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13095 return false;
13096
13097 if (TARGET_PECOFF)
13098 {
13099 if (is_imported_p (op0))
13100 return true;
13101
13102 if (SYMBOL_REF_FAR_ADDR_P (op0)
13103 || !SYMBOL_REF_LOCAL_P (op0))
13104 break;
13105
13106 /* Function-symbols need to be resolved only for
13107 large-model.
13108 For the small-model we don't need to resolve anything
13109 here. */
13110 if ((ix86_cmodel != CM_LARGE_PIC
13111 && SYMBOL_REF_FUNCTION_P (op0))
13112 || ix86_cmodel == CM_SMALL_PIC)
13113 return true;
13114 /* Non-external symbols don't need to be resolved for
13115 large, and medium-model. */
13116 if ((ix86_cmodel == CM_LARGE_PIC
13117 || ix86_cmodel == CM_MEDIUM_PIC)
13118 && !SYMBOL_REF_EXTERNAL_P (op0))
13119 return true;
13120 }
13121 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13122 && SYMBOL_REF_LOCAL_P (op0)
13123 && ix86_cmodel != CM_LARGE_PIC)
13124 return true;
13125 break;
13126
13127 default:
13128 break;
13129 }
13130 }
13131 if (GET_CODE (disp) != CONST)
13132 return false;
13133 disp = XEXP (disp, 0);
13134
13135 if (TARGET_64BIT)
13136 {
13137 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13138 of GOT tables. We should not need these anyway. */
13139 if (GET_CODE (disp) != UNSPEC
13140 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13141 && XINT (disp, 1) != UNSPEC_GOTOFF
13142 && XINT (disp, 1) != UNSPEC_PCREL
13143 && XINT (disp, 1) != UNSPEC_PLTOFF))
13144 return false;
13145
13146 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13147 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13148 return false;
13149 return true;
13150 }
13151
13152 saw_plus = false;
13153 if (GET_CODE (disp) == PLUS)
13154 {
13155 if (!CONST_INT_P (XEXP (disp, 1)))
13156 return false;
13157 disp = XEXP (disp, 0);
13158 saw_plus = true;
13159 }
13160
13161 if (TARGET_MACHO && darwin_local_data_pic (disp))
13162 return true;
13163
13164 if (GET_CODE (disp) != UNSPEC)
13165 return false;
13166
13167 switch (XINT (disp, 1))
13168 {
13169 case UNSPEC_GOT:
13170 if (saw_plus)
13171 return false;
13172 /* We need to check for both symbols and labels because VxWorks loads
13173 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13174 details. */
13175 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13176 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13177 case UNSPEC_GOTOFF:
13178 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13179 While ABI specify also 32bit relocation but we don't produce it in
13180 small PIC model at all. */
13181 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13182 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13183 && !TARGET_64BIT)
13184 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13185 return false;
13186 case UNSPEC_GOTTPOFF:
13187 case UNSPEC_GOTNTPOFF:
13188 case UNSPEC_INDNTPOFF:
13189 if (saw_plus)
13190 return false;
13191 disp = XVECEXP (disp, 0, 0);
13192 return (GET_CODE (disp) == SYMBOL_REF
13193 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13194 case UNSPEC_NTPOFF:
13195 disp = XVECEXP (disp, 0, 0);
13196 return (GET_CODE (disp) == SYMBOL_REF
13197 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13198 case UNSPEC_DTPOFF:
13199 disp = XVECEXP (disp, 0, 0);
13200 return (GET_CODE (disp) == SYMBOL_REF
13201 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13202 }
13203
13204 return false;
13205 }
13206
13207 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13208 replace the input X, or the original X if no replacement is called for.
13209 The output parameter *WIN is 1 if the calling macro should goto WIN,
13210 0 if it should not. */
13211
13212 bool
13213 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13214 int)
13215 {
13216 /* Reload can generate:
13217
13218 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13219 (reg:DI 97))
13220 (reg:DI 2 cx))
13221
13222 This RTX is rejected from ix86_legitimate_address_p due to
13223 non-strictness of base register 97. Following this rejection,
13224 reload pushes all three components into separate registers,
13225 creating invalid memory address RTX.
13226
13227 Following code reloads only the invalid part of the
13228 memory address RTX. */
13229
13230 if (GET_CODE (x) == PLUS
13231 && REG_P (XEXP (x, 1))
13232 && GET_CODE (XEXP (x, 0)) == PLUS
13233 && REG_P (XEXP (XEXP (x, 0), 1)))
13234 {
13235 rtx base, index;
13236 bool something_reloaded = false;
13237
13238 base = XEXP (XEXP (x, 0), 1);
13239 if (!REG_OK_FOR_BASE_STRICT_P (base))
13240 {
13241 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13242 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13243 opnum, (enum reload_type) type);
13244 something_reloaded = true;
13245 }
13246
13247 index = XEXP (x, 1);
13248 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13249 {
13250 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13251 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13252 opnum, (enum reload_type) type);
13253 something_reloaded = true;
13254 }
13255
13256 gcc_assert (something_reloaded);
13257 return true;
13258 }
13259
13260 return false;
13261 }
13262
13263 /* Determine if op is suitable RTX for an address register.
13264 Return naked register if a register or a register subreg is
13265 found, otherwise return NULL_RTX. */
13266
13267 static rtx
13268 ix86_validate_address_register (rtx op)
13269 {
13270 machine_mode mode = GET_MODE (op);
13271
13272 /* Only SImode or DImode registers can form the address. */
13273 if (mode != SImode && mode != DImode)
13274 return NULL_RTX;
13275
13276 if (REG_P (op))
13277 return op;
13278 else if (GET_CODE (op) == SUBREG)
13279 {
13280 rtx reg = SUBREG_REG (op);
13281
13282 if (!REG_P (reg))
13283 return NULL_RTX;
13284
13285 mode = GET_MODE (reg);
13286
13287 /* Don't allow SUBREGs that span more than a word. It can
13288 lead to spill failures when the register is one word out
13289 of a two word structure. */
13290 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13291 return NULL_RTX;
13292
13293 /* Allow only SUBREGs of non-eliminable hard registers. */
13294 if (register_no_elim_operand (reg, mode))
13295 return reg;
13296 }
13297
13298 /* Op is not a register. */
13299 return NULL_RTX;
13300 }
13301
13302 /* Recognizes RTL expressions that are valid memory addresses for an
13303 instruction. The MODE argument is the machine mode for the MEM
13304 expression that wants to use this address.
13305
13306 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13307 convert common non-canonical forms to canonical form so that they will
13308 be recognized. */
13309
13310 static bool
13311 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13312 {
13313 struct ix86_address parts;
13314 rtx base, index, disp;
13315 HOST_WIDE_INT scale;
13316 enum ix86_address_seg seg;
13317
13318 if (ix86_decompose_address (addr, &parts) <= 0)
13319 /* Decomposition failed. */
13320 return false;
13321
13322 base = parts.base;
13323 index = parts.index;
13324 disp = parts.disp;
13325 scale = parts.scale;
13326 seg = parts.seg;
13327
13328 /* Validate base register. */
13329 if (base)
13330 {
13331 rtx reg = ix86_validate_address_register (base);
13332
13333 if (reg == NULL_RTX)
13334 return false;
13335
13336 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13337 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13338 /* Base is not valid. */
13339 return false;
13340 }
13341
13342 /* Validate index register. */
13343 if (index)
13344 {
13345 rtx reg = ix86_validate_address_register (index);
13346
13347 if (reg == NULL_RTX)
13348 return false;
13349
13350 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13351 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13352 /* Index is not valid. */
13353 return false;
13354 }
13355
13356 /* Index and base should have the same mode. */
13357 if (base && index
13358 && GET_MODE (base) != GET_MODE (index))
13359 return false;
13360
13361 /* Address override works only on the (%reg) part of %fs:(%reg). */
13362 if (seg != SEG_DEFAULT
13363 && ((base && GET_MODE (base) != word_mode)
13364 || (index && GET_MODE (index) != word_mode)))
13365 return false;
13366
13367 /* Validate scale factor. */
13368 if (scale != 1)
13369 {
13370 if (!index)
13371 /* Scale without index. */
13372 return false;
13373
13374 if (scale != 2 && scale != 4 && scale != 8)
13375 /* Scale is not a valid multiplier. */
13376 return false;
13377 }
13378
13379 /* Validate displacement. */
13380 if (disp)
13381 {
13382 if (GET_CODE (disp) == CONST
13383 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13384 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13385 switch (XINT (XEXP (disp, 0), 1))
13386 {
13387 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13388 used. While ABI specify also 32bit relocations, we don't produce
13389 them at all and use IP relative instead. */
13390 case UNSPEC_GOT:
13391 case UNSPEC_GOTOFF:
13392 gcc_assert (flag_pic);
13393 if (!TARGET_64BIT)
13394 goto is_legitimate_pic;
13395
13396 /* 64bit address unspec. */
13397 return false;
13398
13399 case UNSPEC_GOTPCREL:
13400 case UNSPEC_PCREL:
13401 gcc_assert (flag_pic);
13402 goto is_legitimate_pic;
13403
13404 case UNSPEC_GOTTPOFF:
13405 case UNSPEC_GOTNTPOFF:
13406 case UNSPEC_INDNTPOFF:
13407 case UNSPEC_NTPOFF:
13408 case UNSPEC_DTPOFF:
13409 break;
13410
13411 case UNSPEC_STACK_CHECK:
13412 gcc_assert (flag_split_stack);
13413 break;
13414
13415 default:
13416 /* Invalid address unspec. */
13417 return false;
13418 }
13419
13420 else if (SYMBOLIC_CONST (disp)
13421 && (flag_pic
13422 || (TARGET_MACHO
13423 #if TARGET_MACHO
13424 && MACHOPIC_INDIRECT
13425 && !machopic_operand_p (disp)
13426 #endif
13427 )))
13428 {
13429
13430 is_legitimate_pic:
13431 if (TARGET_64BIT && (index || base))
13432 {
13433 /* foo@dtpoff(%rX) is ok. */
13434 if (GET_CODE (disp) != CONST
13435 || GET_CODE (XEXP (disp, 0)) != PLUS
13436 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13437 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13438 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13439 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13440 /* Non-constant pic memory reference. */
13441 return false;
13442 }
13443 else if ((!TARGET_MACHO || flag_pic)
13444 && ! legitimate_pic_address_disp_p (disp))
13445 /* Displacement is an invalid pic construct. */
13446 return false;
13447 #if TARGET_MACHO
13448 else if (MACHO_DYNAMIC_NO_PIC_P
13449 && !ix86_legitimate_constant_p (Pmode, disp))
13450 /* displacment must be referenced via non_lazy_pointer */
13451 return false;
13452 #endif
13453
13454 /* This code used to verify that a symbolic pic displacement
13455 includes the pic_offset_table_rtx register.
13456
13457 While this is good idea, unfortunately these constructs may
13458 be created by "adds using lea" optimization for incorrect
13459 code like:
13460
13461 int a;
13462 int foo(int i)
13463 {
13464 return *(&a+i);
13465 }
13466
13467 This code is nonsensical, but results in addressing
13468 GOT table with pic_offset_table_rtx base. We can't
13469 just refuse it easily, since it gets matched by
13470 "addsi3" pattern, that later gets split to lea in the
13471 case output register differs from input. While this
13472 can be handled by separate addsi pattern for this case
13473 that never results in lea, this seems to be easier and
13474 correct fix for crash to disable this test. */
13475 }
13476 else if (GET_CODE (disp) != LABEL_REF
13477 && !CONST_INT_P (disp)
13478 && (GET_CODE (disp) != CONST
13479 || !ix86_legitimate_constant_p (Pmode, disp))
13480 && (GET_CODE (disp) != SYMBOL_REF
13481 || !ix86_legitimate_constant_p (Pmode, disp)))
13482 /* Displacement is not constant. */
13483 return false;
13484 else if (TARGET_64BIT
13485 && !x86_64_immediate_operand (disp, VOIDmode))
13486 /* Displacement is out of range. */
13487 return false;
13488 /* In x32 mode, constant addresses are sign extended to 64bit, so
13489 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13490 else if (TARGET_X32 && !(index || base)
13491 && CONST_INT_P (disp)
13492 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13493 return false;
13494 }
13495
13496 /* Everything looks valid. */
13497 return true;
13498 }
13499
13500 /* Determine if a given RTX is a valid constant address. */
13501
13502 bool
13503 constant_address_p (rtx x)
13504 {
13505 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13506 }
13507 \f
13508 /* Return a unique alias set for the GOT. */
13509
13510 static alias_set_type
13511 ix86_GOT_alias_set (void)
13512 {
13513 static alias_set_type set = -1;
13514 if (set == -1)
13515 set = new_alias_set ();
13516 return set;
13517 }
13518
13519 /* Set regs_ever_live for PIC base address register
13520 to true if required. */
13521 static void
13522 set_pic_reg_ever_live ()
13523 {
13524 if (reload_in_progress)
13525 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13526 }
13527
13528 /* Return a legitimate reference for ORIG (an address) using the
13529 register REG. If REG is 0, a new pseudo is generated.
13530
13531 There are two types of references that must be handled:
13532
13533 1. Global data references must load the address from the GOT, via
13534 the PIC reg. An insn is emitted to do this load, and the reg is
13535 returned.
13536
13537 2. Static data references, constant pool addresses, and code labels
13538 compute the address as an offset from the GOT, whose base is in
13539 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13540 differentiate them from global data objects. The returned
13541 address is the PIC reg + an unspec constant.
13542
13543 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13544 reg also appears in the address. */
13545
13546 static rtx
13547 legitimize_pic_address (rtx orig, rtx reg)
13548 {
13549 rtx addr = orig;
13550 rtx new_rtx = orig;
13551
13552 #if TARGET_MACHO
13553 if (TARGET_MACHO && !TARGET_64BIT)
13554 {
13555 if (reg == 0)
13556 reg = gen_reg_rtx (Pmode);
13557 /* Use the generic Mach-O PIC machinery. */
13558 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13559 }
13560 #endif
13561
13562 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13563 {
13564 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13565 if (tmp)
13566 return tmp;
13567 }
13568
13569 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13570 new_rtx = addr;
13571 else if (TARGET_64BIT && !TARGET_PECOFF
13572 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13573 {
13574 rtx tmpreg;
13575 /* This symbol may be referenced via a displacement from the PIC
13576 base address (@GOTOFF). */
13577
13578 set_pic_reg_ever_live ();
13579 if (GET_CODE (addr) == CONST)
13580 addr = XEXP (addr, 0);
13581 if (GET_CODE (addr) == PLUS)
13582 {
13583 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13584 UNSPEC_GOTOFF);
13585 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13586 }
13587 else
13588 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13589 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13590 if (!reg)
13591 tmpreg = gen_reg_rtx (Pmode);
13592 else
13593 tmpreg = reg;
13594 emit_move_insn (tmpreg, new_rtx);
13595
13596 if (reg != 0)
13597 {
13598 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13599 tmpreg, 1, OPTAB_DIRECT);
13600 new_rtx = reg;
13601 }
13602 else
13603 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13604 }
13605 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13606 {
13607 /* This symbol may be referenced via a displacement from the PIC
13608 base address (@GOTOFF). */
13609
13610 set_pic_reg_ever_live ();
13611 if (GET_CODE (addr) == CONST)
13612 addr = XEXP (addr, 0);
13613 if (GET_CODE (addr) == PLUS)
13614 {
13615 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13616 UNSPEC_GOTOFF);
13617 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13618 }
13619 else
13620 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13621 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13622 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13623
13624 if (reg != 0)
13625 {
13626 emit_move_insn (reg, new_rtx);
13627 new_rtx = reg;
13628 }
13629 }
13630 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13631 /* We can't use @GOTOFF for text labels on VxWorks;
13632 see gotoff_operand. */
13633 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13634 {
13635 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13636 if (tmp)
13637 return tmp;
13638
13639 /* For x64 PE-COFF there is no GOT table. So we use address
13640 directly. */
13641 if (TARGET_64BIT && TARGET_PECOFF)
13642 {
13643 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13644 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13645
13646 if (reg == 0)
13647 reg = gen_reg_rtx (Pmode);
13648 emit_move_insn (reg, new_rtx);
13649 new_rtx = reg;
13650 }
13651 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13652 {
13653 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13654 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13655 new_rtx = gen_const_mem (Pmode, new_rtx);
13656 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13657
13658 if (reg == 0)
13659 reg = gen_reg_rtx (Pmode);
13660 /* Use directly gen_movsi, otherwise the address is loaded
13661 into register for CSE. We don't want to CSE this addresses,
13662 instead we CSE addresses from the GOT table, so skip this. */
13663 emit_insn (gen_movsi (reg, new_rtx));
13664 new_rtx = reg;
13665 }
13666 else
13667 {
13668 /* This symbol must be referenced via a load from the
13669 Global Offset Table (@GOT). */
13670
13671 set_pic_reg_ever_live ();
13672 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13673 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13674 if (TARGET_64BIT)
13675 new_rtx = force_reg (Pmode, new_rtx);
13676 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13677 new_rtx = gen_const_mem (Pmode, new_rtx);
13678 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13679
13680 if (reg == 0)
13681 reg = gen_reg_rtx (Pmode);
13682 emit_move_insn (reg, new_rtx);
13683 new_rtx = reg;
13684 }
13685 }
13686 else
13687 {
13688 if (CONST_INT_P (addr)
13689 && !x86_64_immediate_operand (addr, VOIDmode))
13690 {
13691 if (reg)
13692 {
13693 emit_move_insn (reg, addr);
13694 new_rtx = reg;
13695 }
13696 else
13697 new_rtx = force_reg (Pmode, addr);
13698 }
13699 else if (GET_CODE (addr) == CONST)
13700 {
13701 addr = XEXP (addr, 0);
13702
13703 /* We must match stuff we generate before. Assume the only
13704 unspecs that can get here are ours. Not that we could do
13705 anything with them anyway.... */
13706 if (GET_CODE (addr) == UNSPEC
13707 || (GET_CODE (addr) == PLUS
13708 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13709 return orig;
13710 gcc_assert (GET_CODE (addr) == PLUS);
13711 }
13712 if (GET_CODE (addr) == PLUS)
13713 {
13714 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13715
13716 /* Check first to see if this is a constant offset from a @GOTOFF
13717 symbol reference. */
13718 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13719 && CONST_INT_P (op1))
13720 {
13721 if (!TARGET_64BIT)
13722 {
13723 set_pic_reg_ever_live ();
13724 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13725 UNSPEC_GOTOFF);
13726 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13727 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13728 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13729
13730 if (reg != 0)
13731 {
13732 emit_move_insn (reg, new_rtx);
13733 new_rtx = reg;
13734 }
13735 }
13736 else
13737 {
13738 if (INTVAL (op1) < -16*1024*1024
13739 || INTVAL (op1) >= 16*1024*1024)
13740 {
13741 if (!x86_64_immediate_operand (op1, Pmode))
13742 op1 = force_reg (Pmode, op1);
13743 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13744 }
13745 }
13746 }
13747 else
13748 {
13749 rtx base = legitimize_pic_address (op0, reg);
13750 machine_mode mode = GET_MODE (base);
13751 new_rtx
13752 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13753
13754 if (CONST_INT_P (new_rtx))
13755 {
13756 if (INTVAL (new_rtx) < -16*1024*1024
13757 || INTVAL (new_rtx) >= 16*1024*1024)
13758 {
13759 if (!x86_64_immediate_operand (new_rtx, mode))
13760 new_rtx = force_reg (mode, new_rtx);
13761 new_rtx
13762 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13763 }
13764 else
13765 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13766 }
13767 else
13768 {
13769 if (GET_CODE (new_rtx) == PLUS
13770 && CONSTANT_P (XEXP (new_rtx, 1)))
13771 {
13772 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13773 new_rtx = XEXP (new_rtx, 1);
13774 }
13775 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13776 }
13777 }
13778 }
13779 }
13780 return new_rtx;
13781 }
13782 \f
13783 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13784
13785 static rtx
13786 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13787 {
13788 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13789
13790 if (GET_MODE (tp) != tp_mode)
13791 {
13792 gcc_assert (GET_MODE (tp) == SImode);
13793 gcc_assert (tp_mode == DImode);
13794
13795 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13796 }
13797
13798 if (to_reg)
13799 tp = copy_to_mode_reg (tp_mode, tp);
13800
13801 return tp;
13802 }
13803
13804 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13805
13806 static GTY(()) rtx ix86_tls_symbol;
13807
13808 static rtx
13809 ix86_tls_get_addr (void)
13810 {
13811 if (!ix86_tls_symbol)
13812 {
13813 const char *sym
13814 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13815 ? "___tls_get_addr" : "__tls_get_addr");
13816
13817 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13818 }
13819
13820 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13821 {
13822 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13823 UNSPEC_PLTOFF);
13824 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13825 gen_rtx_CONST (Pmode, unspec));
13826 }
13827
13828 return ix86_tls_symbol;
13829 }
13830
13831 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13832
13833 static GTY(()) rtx ix86_tls_module_base_symbol;
13834
13835 rtx
13836 ix86_tls_module_base (void)
13837 {
13838 if (!ix86_tls_module_base_symbol)
13839 {
13840 ix86_tls_module_base_symbol
13841 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13842
13843 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13844 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13845 }
13846
13847 return ix86_tls_module_base_symbol;
13848 }
13849
13850 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13851 false if we expect this to be used for a memory address and true if
13852 we expect to load the address into a register. */
13853
13854 static rtx
13855 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13856 {
13857 rtx dest, base, off;
13858 rtx pic = NULL_RTX, tp = NULL_RTX;
13859 machine_mode tp_mode = Pmode;
13860 int type;
13861
13862 /* Fall back to global dynamic model if tool chain cannot support local
13863 dynamic. */
13864 if (TARGET_SUN_TLS && !TARGET_64BIT
13865 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13866 && model == TLS_MODEL_LOCAL_DYNAMIC)
13867 model = TLS_MODEL_GLOBAL_DYNAMIC;
13868
13869 switch (model)
13870 {
13871 case TLS_MODEL_GLOBAL_DYNAMIC:
13872 dest = gen_reg_rtx (Pmode);
13873
13874 if (!TARGET_64BIT)
13875 {
13876 if (flag_pic && !TARGET_PECOFF)
13877 pic = pic_offset_table_rtx;
13878 else
13879 {
13880 pic = gen_reg_rtx (Pmode);
13881 emit_insn (gen_set_got (pic));
13882 }
13883 }
13884
13885 if (TARGET_GNU2_TLS)
13886 {
13887 if (TARGET_64BIT)
13888 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13889 else
13890 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13891
13892 tp = get_thread_pointer (Pmode, true);
13893 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13894
13895 if (GET_MODE (x) != Pmode)
13896 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13897
13898 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13899 }
13900 else
13901 {
13902 rtx caddr = ix86_tls_get_addr ();
13903
13904 if (TARGET_64BIT)
13905 {
13906 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13907 rtx_insn *insns;
13908
13909 start_sequence ();
13910 emit_call_insn
13911 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13912 insns = get_insns ();
13913 end_sequence ();
13914
13915 if (GET_MODE (x) != Pmode)
13916 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13917
13918 RTL_CONST_CALL_P (insns) = 1;
13919 emit_libcall_block (insns, dest, rax, x);
13920 }
13921 else
13922 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13923 }
13924 break;
13925
13926 case TLS_MODEL_LOCAL_DYNAMIC:
13927 base = gen_reg_rtx (Pmode);
13928
13929 if (!TARGET_64BIT)
13930 {
13931 if (flag_pic)
13932 pic = pic_offset_table_rtx;
13933 else
13934 {
13935 pic = gen_reg_rtx (Pmode);
13936 emit_insn (gen_set_got (pic));
13937 }
13938 }
13939
13940 if (TARGET_GNU2_TLS)
13941 {
13942 rtx tmp = ix86_tls_module_base ();
13943
13944 if (TARGET_64BIT)
13945 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13946 else
13947 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13948
13949 tp = get_thread_pointer (Pmode, true);
13950 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13951 gen_rtx_MINUS (Pmode, tmp, tp));
13952 }
13953 else
13954 {
13955 rtx caddr = ix86_tls_get_addr ();
13956
13957 if (TARGET_64BIT)
13958 {
13959 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13960 rtx_insn *insns;
13961 rtx eqv;
13962
13963 start_sequence ();
13964 emit_call_insn
13965 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13966 insns = get_insns ();
13967 end_sequence ();
13968
13969 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13970 share the LD_BASE result with other LD model accesses. */
13971 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13972 UNSPEC_TLS_LD_BASE);
13973
13974 RTL_CONST_CALL_P (insns) = 1;
13975 emit_libcall_block (insns, base, rax, eqv);
13976 }
13977 else
13978 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13979 }
13980
13981 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13982 off = gen_rtx_CONST (Pmode, off);
13983
13984 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13985
13986 if (TARGET_GNU2_TLS)
13987 {
13988 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13989
13990 if (GET_MODE (x) != Pmode)
13991 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13992
13993 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13994 }
13995 break;
13996
13997 case TLS_MODEL_INITIAL_EXEC:
13998 if (TARGET_64BIT)
13999 {
14000 if (TARGET_SUN_TLS && !TARGET_X32)
14001 {
14002 /* The Sun linker took the AMD64 TLS spec literally
14003 and can only handle %rax as destination of the
14004 initial executable code sequence. */
14005
14006 dest = gen_reg_rtx (DImode);
14007 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14008 return dest;
14009 }
14010
14011 /* Generate DImode references to avoid %fs:(%reg32)
14012 problems and linker IE->LE relaxation bug. */
14013 tp_mode = DImode;
14014 pic = NULL;
14015 type = UNSPEC_GOTNTPOFF;
14016 }
14017 else if (flag_pic)
14018 {
14019 set_pic_reg_ever_live ();
14020 pic = pic_offset_table_rtx;
14021 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14022 }
14023 else if (!TARGET_ANY_GNU_TLS)
14024 {
14025 pic = gen_reg_rtx (Pmode);
14026 emit_insn (gen_set_got (pic));
14027 type = UNSPEC_GOTTPOFF;
14028 }
14029 else
14030 {
14031 pic = NULL;
14032 type = UNSPEC_INDNTPOFF;
14033 }
14034
14035 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14036 off = gen_rtx_CONST (tp_mode, off);
14037 if (pic)
14038 off = gen_rtx_PLUS (tp_mode, pic, off);
14039 off = gen_const_mem (tp_mode, off);
14040 set_mem_alias_set (off, ix86_GOT_alias_set ());
14041
14042 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14043 {
14044 base = get_thread_pointer (tp_mode,
14045 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14046 off = force_reg (tp_mode, off);
14047 return gen_rtx_PLUS (tp_mode, base, off);
14048 }
14049 else
14050 {
14051 base = get_thread_pointer (Pmode, true);
14052 dest = gen_reg_rtx (Pmode);
14053 emit_insn (ix86_gen_sub3 (dest, base, off));
14054 }
14055 break;
14056
14057 case TLS_MODEL_LOCAL_EXEC:
14058 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14059 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14060 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14061 off = gen_rtx_CONST (Pmode, off);
14062
14063 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14064 {
14065 base = get_thread_pointer (Pmode,
14066 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14067 return gen_rtx_PLUS (Pmode, base, off);
14068 }
14069 else
14070 {
14071 base = get_thread_pointer (Pmode, true);
14072 dest = gen_reg_rtx (Pmode);
14073 emit_insn (ix86_gen_sub3 (dest, base, off));
14074 }
14075 break;
14076
14077 default:
14078 gcc_unreachable ();
14079 }
14080
14081 return dest;
14082 }
14083
14084 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14085 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14086 unique refptr-DECL symbol corresponding to symbol DECL. */
14087
14088 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14089 {
14090 static inline hashval_t hash (tree_map *m) { return m->hash; }
14091 static inline bool
14092 equal (tree_map *a, tree_map *b)
14093 {
14094 return a->base.from == b->base.from;
14095 }
14096
14097 static void
14098 handle_cache_entry (tree_map *&m)
14099 {
14100 extern void gt_ggc_mx (tree_map *&);
14101 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14102 return;
14103 else if (ggc_marked_p (m->base.from))
14104 gt_ggc_mx (m);
14105 else
14106 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14107 }
14108 };
14109
14110 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14111
14112 static tree
14113 get_dllimport_decl (tree decl, bool beimport)
14114 {
14115 struct tree_map *h, in;
14116 const char *name;
14117 const char *prefix;
14118 size_t namelen, prefixlen;
14119 char *imp_name;
14120 tree to;
14121 rtx rtl;
14122
14123 if (!dllimport_map)
14124 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14125
14126 in.hash = htab_hash_pointer (decl);
14127 in.base.from = decl;
14128 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14129 h = *loc;
14130 if (h)
14131 return h->to;
14132
14133 *loc = h = ggc_alloc<tree_map> ();
14134 h->hash = in.hash;
14135 h->base.from = decl;
14136 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14137 VAR_DECL, NULL, ptr_type_node);
14138 DECL_ARTIFICIAL (to) = 1;
14139 DECL_IGNORED_P (to) = 1;
14140 DECL_EXTERNAL (to) = 1;
14141 TREE_READONLY (to) = 1;
14142
14143 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14144 name = targetm.strip_name_encoding (name);
14145 if (beimport)
14146 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14147 ? "*__imp_" : "*__imp__";
14148 else
14149 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14150 namelen = strlen (name);
14151 prefixlen = strlen (prefix);
14152 imp_name = (char *) alloca (namelen + prefixlen + 1);
14153 memcpy (imp_name, prefix, prefixlen);
14154 memcpy (imp_name + prefixlen, name, namelen + 1);
14155
14156 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14157 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14158 SET_SYMBOL_REF_DECL (rtl, to);
14159 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14160 if (!beimport)
14161 {
14162 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14163 #ifdef SUB_TARGET_RECORD_STUB
14164 SUB_TARGET_RECORD_STUB (name);
14165 #endif
14166 }
14167
14168 rtl = gen_const_mem (Pmode, rtl);
14169 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14170
14171 SET_DECL_RTL (to, rtl);
14172 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14173
14174 return to;
14175 }
14176
14177 /* Expand SYMBOL into its corresponding far-addresse symbol.
14178 WANT_REG is true if we require the result be a register. */
14179
14180 static rtx
14181 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14182 {
14183 tree imp_decl;
14184 rtx x;
14185
14186 gcc_assert (SYMBOL_REF_DECL (symbol));
14187 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14188
14189 x = DECL_RTL (imp_decl);
14190 if (want_reg)
14191 x = force_reg (Pmode, x);
14192 return x;
14193 }
14194
14195 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14196 true if we require the result be a register. */
14197
14198 static rtx
14199 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14200 {
14201 tree imp_decl;
14202 rtx x;
14203
14204 gcc_assert (SYMBOL_REF_DECL (symbol));
14205 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14206
14207 x = DECL_RTL (imp_decl);
14208 if (want_reg)
14209 x = force_reg (Pmode, x);
14210 return x;
14211 }
14212
14213 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14214 is true if we require the result be a register. */
14215
14216 static rtx
14217 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14218 {
14219 if (!TARGET_PECOFF)
14220 return NULL_RTX;
14221
14222 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14223 {
14224 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14225 return legitimize_dllimport_symbol (addr, inreg);
14226 if (GET_CODE (addr) == CONST
14227 && GET_CODE (XEXP (addr, 0)) == PLUS
14228 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14229 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14230 {
14231 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14232 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14233 }
14234 }
14235
14236 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14237 return NULL_RTX;
14238 if (GET_CODE (addr) == SYMBOL_REF
14239 && !is_imported_p (addr)
14240 && SYMBOL_REF_EXTERNAL_P (addr)
14241 && SYMBOL_REF_DECL (addr))
14242 return legitimize_pe_coff_extern_decl (addr, inreg);
14243
14244 if (GET_CODE (addr) == CONST
14245 && GET_CODE (XEXP (addr, 0)) == PLUS
14246 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14247 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14248 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14249 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14250 {
14251 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14252 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14253 }
14254 return NULL_RTX;
14255 }
14256
14257 /* Try machine-dependent ways of modifying an illegitimate address
14258 to be legitimate. If we find one, return the new, valid address.
14259 This macro is used in only one place: `memory_address' in explow.c.
14260
14261 OLDX is the address as it was before break_out_memory_refs was called.
14262 In some cases it is useful to look at this to decide what needs to be done.
14263
14264 It is always safe for this macro to do nothing. It exists to recognize
14265 opportunities to optimize the output.
14266
14267 For the 80386, we handle X+REG by loading X into a register R and
14268 using R+REG. R will go in a general reg and indexing will be used.
14269 However, if REG is a broken-out memory address or multiplication,
14270 nothing needs to be done because REG can certainly go in a general reg.
14271
14272 When -fpic is used, special handling is needed for symbolic references.
14273 See comments by legitimize_pic_address in i386.c for details. */
14274
14275 static rtx
14276 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14277 {
14278 int changed = 0;
14279 unsigned log;
14280
14281 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14282 if (log)
14283 return legitimize_tls_address (x, (enum tls_model) log, false);
14284 if (GET_CODE (x) == CONST
14285 && GET_CODE (XEXP (x, 0)) == PLUS
14286 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14287 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14288 {
14289 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14290 (enum tls_model) log, false);
14291 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14292 }
14293
14294 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14295 {
14296 rtx tmp = legitimize_pe_coff_symbol (x, true);
14297 if (tmp)
14298 return tmp;
14299 }
14300
14301 if (flag_pic && SYMBOLIC_CONST (x))
14302 return legitimize_pic_address (x, 0);
14303
14304 #if TARGET_MACHO
14305 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14306 return machopic_indirect_data_reference (x, 0);
14307 #endif
14308
14309 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14310 if (GET_CODE (x) == ASHIFT
14311 && CONST_INT_P (XEXP (x, 1))
14312 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14313 {
14314 changed = 1;
14315 log = INTVAL (XEXP (x, 1));
14316 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14317 GEN_INT (1 << log));
14318 }
14319
14320 if (GET_CODE (x) == PLUS)
14321 {
14322 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14323
14324 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14325 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14326 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14327 {
14328 changed = 1;
14329 log = INTVAL (XEXP (XEXP (x, 0), 1));
14330 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14331 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14332 GEN_INT (1 << log));
14333 }
14334
14335 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14336 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14337 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14338 {
14339 changed = 1;
14340 log = INTVAL (XEXP (XEXP (x, 1), 1));
14341 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14342 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14343 GEN_INT (1 << log));
14344 }
14345
14346 /* Put multiply first if it isn't already. */
14347 if (GET_CODE (XEXP (x, 1)) == MULT)
14348 {
14349 rtx tmp = XEXP (x, 0);
14350 XEXP (x, 0) = XEXP (x, 1);
14351 XEXP (x, 1) = tmp;
14352 changed = 1;
14353 }
14354
14355 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14356 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14357 created by virtual register instantiation, register elimination, and
14358 similar optimizations. */
14359 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14360 {
14361 changed = 1;
14362 x = gen_rtx_PLUS (Pmode,
14363 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14364 XEXP (XEXP (x, 1), 0)),
14365 XEXP (XEXP (x, 1), 1));
14366 }
14367
14368 /* Canonicalize
14369 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14370 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14371 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14372 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14373 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14374 && CONSTANT_P (XEXP (x, 1)))
14375 {
14376 rtx constant;
14377 rtx other = NULL_RTX;
14378
14379 if (CONST_INT_P (XEXP (x, 1)))
14380 {
14381 constant = XEXP (x, 1);
14382 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14383 }
14384 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14385 {
14386 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14387 other = XEXP (x, 1);
14388 }
14389 else
14390 constant = 0;
14391
14392 if (constant)
14393 {
14394 changed = 1;
14395 x = gen_rtx_PLUS (Pmode,
14396 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14397 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14398 plus_constant (Pmode, other,
14399 INTVAL (constant)));
14400 }
14401 }
14402
14403 if (changed && ix86_legitimate_address_p (mode, x, false))
14404 return x;
14405
14406 if (GET_CODE (XEXP (x, 0)) == MULT)
14407 {
14408 changed = 1;
14409 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14410 }
14411
14412 if (GET_CODE (XEXP (x, 1)) == MULT)
14413 {
14414 changed = 1;
14415 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14416 }
14417
14418 if (changed
14419 && REG_P (XEXP (x, 1))
14420 && REG_P (XEXP (x, 0)))
14421 return x;
14422
14423 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14424 {
14425 changed = 1;
14426 x = legitimize_pic_address (x, 0);
14427 }
14428
14429 if (changed && ix86_legitimate_address_p (mode, x, false))
14430 return x;
14431
14432 if (REG_P (XEXP (x, 0)))
14433 {
14434 rtx temp = gen_reg_rtx (Pmode);
14435 rtx val = force_operand (XEXP (x, 1), temp);
14436 if (val != temp)
14437 {
14438 val = convert_to_mode (Pmode, val, 1);
14439 emit_move_insn (temp, val);
14440 }
14441
14442 XEXP (x, 1) = temp;
14443 return x;
14444 }
14445
14446 else if (REG_P (XEXP (x, 1)))
14447 {
14448 rtx temp = gen_reg_rtx (Pmode);
14449 rtx val = force_operand (XEXP (x, 0), temp);
14450 if (val != temp)
14451 {
14452 val = convert_to_mode (Pmode, val, 1);
14453 emit_move_insn (temp, val);
14454 }
14455
14456 XEXP (x, 0) = temp;
14457 return x;
14458 }
14459 }
14460
14461 return x;
14462 }
14463 \f
14464 /* Print an integer constant expression in assembler syntax. Addition
14465 and subtraction are the only arithmetic that may appear in these
14466 expressions. FILE is the stdio stream to write to, X is the rtx, and
14467 CODE is the operand print code from the output string. */
14468
14469 static void
14470 output_pic_addr_const (FILE *file, rtx x, int code)
14471 {
14472 char buf[256];
14473
14474 switch (GET_CODE (x))
14475 {
14476 case PC:
14477 gcc_assert (flag_pic);
14478 putc ('.', file);
14479 break;
14480
14481 case SYMBOL_REF:
14482 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14483 output_addr_const (file, x);
14484 else
14485 {
14486 const char *name = XSTR (x, 0);
14487
14488 /* Mark the decl as referenced so that cgraph will
14489 output the function. */
14490 if (SYMBOL_REF_DECL (x))
14491 mark_decl_referenced (SYMBOL_REF_DECL (x));
14492
14493 #if TARGET_MACHO
14494 if (MACHOPIC_INDIRECT
14495 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14496 name = machopic_indirection_name (x, /*stub_p=*/true);
14497 #endif
14498 assemble_name (file, name);
14499 }
14500 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14501 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14502 fputs ("@PLT", file);
14503 break;
14504
14505 case LABEL_REF:
14506 x = XEXP (x, 0);
14507 /* FALLTHRU */
14508 case CODE_LABEL:
14509 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14510 assemble_name (asm_out_file, buf);
14511 break;
14512
14513 case CONST_INT:
14514 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14515 break;
14516
14517 case CONST:
14518 /* This used to output parentheses around the expression,
14519 but that does not work on the 386 (either ATT or BSD assembler). */
14520 output_pic_addr_const (file, XEXP (x, 0), code);
14521 break;
14522
14523 case CONST_DOUBLE:
14524 if (GET_MODE (x) == VOIDmode)
14525 {
14526 /* We can use %d if the number is <32 bits and positive. */
14527 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14528 fprintf (file, "0x%lx%08lx",
14529 (unsigned long) CONST_DOUBLE_HIGH (x),
14530 (unsigned long) CONST_DOUBLE_LOW (x));
14531 else
14532 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14533 }
14534 else
14535 /* We can't handle floating point constants;
14536 TARGET_PRINT_OPERAND must handle them. */
14537 output_operand_lossage ("floating constant misused");
14538 break;
14539
14540 case PLUS:
14541 /* Some assemblers need integer constants to appear first. */
14542 if (CONST_INT_P (XEXP (x, 0)))
14543 {
14544 output_pic_addr_const (file, XEXP (x, 0), code);
14545 putc ('+', file);
14546 output_pic_addr_const (file, XEXP (x, 1), code);
14547 }
14548 else
14549 {
14550 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14551 output_pic_addr_const (file, XEXP (x, 1), code);
14552 putc ('+', file);
14553 output_pic_addr_const (file, XEXP (x, 0), code);
14554 }
14555 break;
14556
14557 case MINUS:
14558 if (!TARGET_MACHO)
14559 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14560 output_pic_addr_const (file, XEXP (x, 0), code);
14561 putc ('-', file);
14562 output_pic_addr_const (file, XEXP (x, 1), code);
14563 if (!TARGET_MACHO)
14564 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14565 break;
14566
14567 case UNSPEC:
14568 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14569 {
14570 bool f = i386_asm_output_addr_const_extra (file, x);
14571 gcc_assert (f);
14572 break;
14573 }
14574
14575 gcc_assert (XVECLEN (x, 0) == 1);
14576 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14577 switch (XINT (x, 1))
14578 {
14579 case UNSPEC_GOT:
14580 fputs ("@GOT", file);
14581 break;
14582 case UNSPEC_GOTOFF:
14583 fputs ("@GOTOFF", file);
14584 break;
14585 case UNSPEC_PLTOFF:
14586 fputs ("@PLTOFF", file);
14587 break;
14588 case UNSPEC_PCREL:
14589 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14590 "(%rip)" : "[rip]", file);
14591 break;
14592 case UNSPEC_GOTPCREL:
14593 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14594 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14595 break;
14596 case UNSPEC_GOTTPOFF:
14597 /* FIXME: This might be @TPOFF in Sun ld too. */
14598 fputs ("@gottpoff", file);
14599 break;
14600 case UNSPEC_TPOFF:
14601 fputs ("@tpoff", file);
14602 break;
14603 case UNSPEC_NTPOFF:
14604 if (TARGET_64BIT)
14605 fputs ("@tpoff", file);
14606 else
14607 fputs ("@ntpoff", file);
14608 break;
14609 case UNSPEC_DTPOFF:
14610 fputs ("@dtpoff", file);
14611 break;
14612 case UNSPEC_GOTNTPOFF:
14613 if (TARGET_64BIT)
14614 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14615 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14616 else
14617 fputs ("@gotntpoff", file);
14618 break;
14619 case UNSPEC_INDNTPOFF:
14620 fputs ("@indntpoff", file);
14621 break;
14622 #if TARGET_MACHO
14623 case UNSPEC_MACHOPIC_OFFSET:
14624 putc ('-', file);
14625 machopic_output_function_base_name (file);
14626 break;
14627 #endif
14628 default:
14629 output_operand_lossage ("invalid UNSPEC as operand");
14630 break;
14631 }
14632 break;
14633
14634 default:
14635 output_operand_lossage ("invalid expression as operand");
14636 }
14637 }
14638
14639 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14640 We need to emit DTP-relative relocations. */
14641
14642 static void ATTRIBUTE_UNUSED
14643 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14644 {
14645 fputs (ASM_LONG, file);
14646 output_addr_const (file, x);
14647 fputs ("@dtpoff", file);
14648 switch (size)
14649 {
14650 case 4:
14651 break;
14652 case 8:
14653 fputs (", 0", file);
14654 break;
14655 default:
14656 gcc_unreachable ();
14657 }
14658 }
14659
14660 /* Return true if X is a representation of the PIC register. This copes
14661 with calls from ix86_find_base_term, where the register might have
14662 been replaced by a cselib value. */
14663
14664 static bool
14665 ix86_pic_register_p (rtx x)
14666 {
14667 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14668 return (pic_offset_table_rtx
14669 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14670 else if (!REG_P (x))
14671 return false;
14672 else if (pic_offset_table_rtx)
14673 {
14674 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14675 return true;
14676 if (HARD_REGISTER_P (x)
14677 && !HARD_REGISTER_P (pic_offset_table_rtx)
14678 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14679 return true;
14680 return false;
14681 }
14682 else
14683 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14684 }
14685
14686 /* Helper function for ix86_delegitimize_address.
14687 Attempt to delegitimize TLS local-exec accesses. */
14688
14689 static rtx
14690 ix86_delegitimize_tls_address (rtx orig_x)
14691 {
14692 rtx x = orig_x, unspec;
14693 struct ix86_address addr;
14694
14695 if (!TARGET_TLS_DIRECT_SEG_REFS)
14696 return orig_x;
14697 if (MEM_P (x))
14698 x = XEXP (x, 0);
14699 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14700 return orig_x;
14701 if (ix86_decompose_address (x, &addr) == 0
14702 || addr.seg != DEFAULT_TLS_SEG_REG
14703 || addr.disp == NULL_RTX
14704 || GET_CODE (addr.disp) != CONST)
14705 return orig_x;
14706 unspec = XEXP (addr.disp, 0);
14707 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14708 unspec = XEXP (unspec, 0);
14709 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14710 return orig_x;
14711 x = XVECEXP (unspec, 0, 0);
14712 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14713 if (unspec != XEXP (addr.disp, 0))
14714 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14715 if (addr.index)
14716 {
14717 rtx idx = addr.index;
14718 if (addr.scale != 1)
14719 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14720 x = gen_rtx_PLUS (Pmode, idx, x);
14721 }
14722 if (addr.base)
14723 x = gen_rtx_PLUS (Pmode, addr.base, x);
14724 if (MEM_P (orig_x))
14725 x = replace_equiv_address_nv (orig_x, x);
14726 return x;
14727 }
14728
14729 /* In the name of slightly smaller debug output, and to cater to
14730 general assembler lossage, recognize PIC+GOTOFF and turn it back
14731 into a direct symbol reference.
14732
14733 On Darwin, this is necessary to avoid a crash, because Darwin
14734 has a different PIC label for each routine but the DWARF debugging
14735 information is not associated with any particular routine, so it's
14736 necessary to remove references to the PIC label from RTL stored by
14737 the DWARF output code. */
14738
14739 static rtx
14740 ix86_delegitimize_address (rtx x)
14741 {
14742 rtx orig_x = delegitimize_mem_from_attrs (x);
14743 /* addend is NULL or some rtx if x is something+GOTOFF where
14744 something doesn't include the PIC register. */
14745 rtx addend = NULL_RTX;
14746 /* reg_addend is NULL or a multiple of some register. */
14747 rtx reg_addend = NULL_RTX;
14748 /* const_addend is NULL or a const_int. */
14749 rtx const_addend = NULL_RTX;
14750 /* This is the result, or NULL. */
14751 rtx result = NULL_RTX;
14752
14753 x = orig_x;
14754
14755 if (MEM_P (x))
14756 x = XEXP (x, 0);
14757
14758 if (TARGET_64BIT)
14759 {
14760 if (GET_CODE (x) == CONST
14761 && GET_CODE (XEXP (x, 0)) == PLUS
14762 && GET_MODE (XEXP (x, 0)) == Pmode
14763 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14764 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14765 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14766 {
14767 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14768 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14769 if (MEM_P (orig_x))
14770 x = replace_equiv_address_nv (orig_x, x);
14771 return x;
14772 }
14773
14774 if (GET_CODE (x) == CONST
14775 && GET_CODE (XEXP (x, 0)) == UNSPEC
14776 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14777 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14778 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14779 {
14780 x = XVECEXP (XEXP (x, 0), 0, 0);
14781 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14782 {
14783 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14784 GET_MODE (x), 0);
14785 if (x == NULL_RTX)
14786 return orig_x;
14787 }
14788 return x;
14789 }
14790
14791 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14792 return ix86_delegitimize_tls_address (orig_x);
14793
14794 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14795 and -mcmodel=medium -fpic. */
14796 }
14797
14798 if (GET_CODE (x) != PLUS
14799 || GET_CODE (XEXP (x, 1)) != CONST)
14800 return ix86_delegitimize_tls_address (orig_x);
14801
14802 if (ix86_pic_register_p (XEXP (x, 0)))
14803 /* %ebx + GOT/GOTOFF */
14804 ;
14805 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14806 {
14807 /* %ebx + %reg * scale + GOT/GOTOFF */
14808 reg_addend = XEXP (x, 0);
14809 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14810 reg_addend = XEXP (reg_addend, 1);
14811 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14812 reg_addend = XEXP (reg_addend, 0);
14813 else
14814 {
14815 reg_addend = NULL_RTX;
14816 addend = XEXP (x, 0);
14817 }
14818 }
14819 else
14820 addend = XEXP (x, 0);
14821
14822 x = XEXP (XEXP (x, 1), 0);
14823 if (GET_CODE (x) == PLUS
14824 && CONST_INT_P (XEXP (x, 1)))
14825 {
14826 const_addend = XEXP (x, 1);
14827 x = XEXP (x, 0);
14828 }
14829
14830 if (GET_CODE (x) == UNSPEC
14831 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14832 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14833 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14834 && !MEM_P (orig_x) && !addend)))
14835 result = XVECEXP (x, 0, 0);
14836
14837 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14838 && !MEM_P (orig_x))
14839 result = XVECEXP (x, 0, 0);
14840
14841 if (! result)
14842 return ix86_delegitimize_tls_address (orig_x);
14843
14844 if (const_addend)
14845 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14846 if (reg_addend)
14847 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14848 if (addend)
14849 {
14850 /* If the rest of original X doesn't involve the PIC register, add
14851 addend and subtract pic_offset_table_rtx. This can happen e.g.
14852 for code like:
14853 leal (%ebx, %ecx, 4), %ecx
14854 ...
14855 movl foo@GOTOFF(%ecx), %edx
14856 in which case we return (%ecx - %ebx) + foo
14857 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14858 and reload has completed. */
14859 if (pic_offset_table_rtx
14860 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14861 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14862 pic_offset_table_rtx),
14863 result);
14864 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14865 {
14866 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14867 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14868 result = gen_rtx_PLUS (Pmode, tmp, result);
14869 }
14870 else
14871 return orig_x;
14872 }
14873 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14874 {
14875 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14876 if (result == NULL_RTX)
14877 return orig_x;
14878 }
14879 return result;
14880 }
14881
14882 /* If X is a machine specific address (i.e. a symbol or label being
14883 referenced as a displacement from the GOT implemented using an
14884 UNSPEC), then return the base term. Otherwise return X. */
14885
14886 rtx
14887 ix86_find_base_term (rtx x)
14888 {
14889 rtx term;
14890
14891 if (TARGET_64BIT)
14892 {
14893 if (GET_CODE (x) != CONST)
14894 return x;
14895 term = XEXP (x, 0);
14896 if (GET_CODE (term) == PLUS
14897 && (CONST_INT_P (XEXP (term, 1))
14898 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14899 term = XEXP (term, 0);
14900 if (GET_CODE (term) != UNSPEC
14901 || (XINT (term, 1) != UNSPEC_GOTPCREL
14902 && XINT (term, 1) != UNSPEC_PCREL))
14903 return x;
14904
14905 return XVECEXP (term, 0, 0);
14906 }
14907
14908 return ix86_delegitimize_address (x);
14909 }
14910 \f
14911 static void
14912 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14913 bool fp, FILE *file)
14914 {
14915 const char *suffix;
14916
14917 if (mode == CCFPmode || mode == CCFPUmode)
14918 {
14919 code = ix86_fp_compare_code_to_integer (code);
14920 mode = CCmode;
14921 }
14922 if (reverse)
14923 code = reverse_condition (code);
14924
14925 switch (code)
14926 {
14927 case EQ:
14928 switch (mode)
14929 {
14930 case CCAmode:
14931 suffix = "a";
14932 break;
14933
14934 case CCCmode:
14935 suffix = "c";
14936 break;
14937
14938 case CCOmode:
14939 suffix = "o";
14940 break;
14941
14942 case CCSmode:
14943 suffix = "s";
14944 break;
14945
14946 default:
14947 suffix = "e";
14948 }
14949 break;
14950 case NE:
14951 switch (mode)
14952 {
14953 case CCAmode:
14954 suffix = "na";
14955 break;
14956
14957 case CCCmode:
14958 suffix = "nc";
14959 break;
14960
14961 case CCOmode:
14962 suffix = "no";
14963 break;
14964
14965 case CCSmode:
14966 suffix = "ns";
14967 break;
14968
14969 default:
14970 suffix = "ne";
14971 }
14972 break;
14973 case GT:
14974 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14975 suffix = "g";
14976 break;
14977 case GTU:
14978 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14979 Those same assemblers have the same but opposite lossage on cmov. */
14980 if (mode == CCmode)
14981 suffix = fp ? "nbe" : "a";
14982 else
14983 gcc_unreachable ();
14984 break;
14985 case LT:
14986 switch (mode)
14987 {
14988 case CCNOmode:
14989 case CCGOCmode:
14990 suffix = "s";
14991 break;
14992
14993 case CCmode:
14994 case CCGCmode:
14995 suffix = "l";
14996 break;
14997
14998 default:
14999 gcc_unreachable ();
15000 }
15001 break;
15002 case LTU:
15003 if (mode == CCmode)
15004 suffix = "b";
15005 else if (mode == CCCmode)
15006 suffix = fp ? "b" : "c";
15007 else
15008 gcc_unreachable ();
15009 break;
15010 case GE:
15011 switch (mode)
15012 {
15013 case CCNOmode:
15014 case CCGOCmode:
15015 suffix = "ns";
15016 break;
15017
15018 case CCmode:
15019 case CCGCmode:
15020 suffix = "ge";
15021 break;
15022
15023 default:
15024 gcc_unreachable ();
15025 }
15026 break;
15027 case GEU:
15028 if (mode == CCmode)
15029 suffix = "nb";
15030 else if (mode == CCCmode)
15031 suffix = fp ? "nb" : "nc";
15032 else
15033 gcc_unreachable ();
15034 break;
15035 case LE:
15036 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15037 suffix = "le";
15038 break;
15039 case LEU:
15040 if (mode == CCmode)
15041 suffix = "be";
15042 else
15043 gcc_unreachable ();
15044 break;
15045 case UNORDERED:
15046 suffix = fp ? "u" : "p";
15047 break;
15048 case ORDERED:
15049 suffix = fp ? "nu" : "np";
15050 break;
15051 default:
15052 gcc_unreachable ();
15053 }
15054 fputs (suffix, file);
15055 }
15056
15057 /* Print the name of register X to FILE based on its machine mode and number.
15058 If CODE is 'w', pretend the mode is HImode.
15059 If CODE is 'b', pretend the mode is QImode.
15060 If CODE is 'k', pretend the mode is SImode.
15061 If CODE is 'q', pretend the mode is DImode.
15062 If CODE is 'x', pretend the mode is V4SFmode.
15063 If CODE is 't', pretend the mode is V8SFmode.
15064 If CODE is 'g', pretend the mode is V16SFmode.
15065 If CODE is 'h', pretend the reg is the 'high' byte register.
15066 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15067 If CODE is 'd', duplicate the operand for AVX instruction.
15068 */
15069
15070 void
15071 print_reg (rtx x, int code, FILE *file)
15072 {
15073 const char *reg;
15074 unsigned int regno;
15075 bool duplicated = code == 'd' && TARGET_AVX;
15076
15077 if (ASSEMBLER_DIALECT == ASM_ATT)
15078 putc ('%', file);
15079
15080 if (x == pc_rtx)
15081 {
15082 gcc_assert (TARGET_64BIT);
15083 fputs ("rip", file);
15084 return;
15085 }
15086
15087 regno = true_regnum (x);
15088 gcc_assert (regno != ARG_POINTER_REGNUM
15089 && regno != FRAME_POINTER_REGNUM
15090 && regno != FLAGS_REG
15091 && regno != FPSR_REG
15092 && regno != FPCR_REG);
15093
15094 if (code == 'w' || MMX_REG_P (x))
15095 code = 2;
15096 else if (code == 'b')
15097 code = 1;
15098 else if (code == 'k')
15099 code = 4;
15100 else if (code == 'q')
15101 code = 8;
15102 else if (code == 'y')
15103 code = 3;
15104 else if (code == 'h')
15105 code = 0;
15106 else if (code == 'x')
15107 code = 16;
15108 else if (code == 't')
15109 code = 32;
15110 else if (code == 'g')
15111 code = 64;
15112 else
15113 code = GET_MODE_SIZE (GET_MODE (x));
15114
15115 /* Irritatingly, AMD extended registers use different naming convention
15116 from the normal registers: "r%d[bwd]" */
15117 if (REX_INT_REGNO_P (regno))
15118 {
15119 gcc_assert (TARGET_64BIT);
15120 putc ('r', file);
15121 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15122 switch (code)
15123 {
15124 case 0:
15125 error ("extended registers have no high halves");
15126 break;
15127 case 1:
15128 putc ('b', file);
15129 break;
15130 case 2:
15131 putc ('w', file);
15132 break;
15133 case 4:
15134 putc ('d', file);
15135 break;
15136 case 8:
15137 /* no suffix */
15138 break;
15139 default:
15140 error ("unsupported operand size for extended register");
15141 break;
15142 }
15143 return;
15144 }
15145
15146 reg = NULL;
15147 switch (code)
15148 {
15149 case 3:
15150 if (STACK_TOP_P (x))
15151 {
15152 reg = "st(0)";
15153 break;
15154 }
15155 /* FALLTHRU */
15156 case 8:
15157 case 4:
15158 case 12:
15159 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15160 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15161 /* FALLTHRU */
15162 case 16:
15163 case 2:
15164 normal:
15165 reg = hi_reg_name[regno];
15166 break;
15167 case 1:
15168 if (regno >= ARRAY_SIZE (qi_reg_name))
15169 goto normal;
15170 reg = qi_reg_name[regno];
15171 break;
15172 case 0:
15173 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15174 goto normal;
15175 reg = qi_high_reg_name[regno];
15176 break;
15177 case 32:
15178 if (SSE_REG_P (x))
15179 {
15180 gcc_assert (!duplicated);
15181 putc ('y', file);
15182 fputs (hi_reg_name[regno] + 1, file);
15183 return;
15184 }
15185 case 64:
15186 if (SSE_REG_P (x))
15187 {
15188 gcc_assert (!duplicated);
15189 putc ('z', file);
15190 fputs (hi_reg_name[REGNO (x)] + 1, file);
15191 return;
15192 }
15193 break;
15194 default:
15195 gcc_unreachable ();
15196 }
15197
15198 fputs (reg, file);
15199 if (duplicated)
15200 {
15201 if (ASSEMBLER_DIALECT == ASM_ATT)
15202 fprintf (file, ", %%%s", reg);
15203 else
15204 fprintf (file, ", %s", reg);
15205 }
15206 }
15207
15208 /* Meaning of CODE:
15209 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15210 C -- print opcode suffix for set/cmov insn.
15211 c -- like C, but print reversed condition
15212 F,f -- likewise, but for floating-point.
15213 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15214 otherwise nothing
15215 R -- print embeded rounding and sae.
15216 r -- print only sae.
15217 z -- print the opcode suffix for the size of the current operand.
15218 Z -- likewise, with special suffixes for x87 instructions.
15219 * -- print a star (in certain assembler syntax)
15220 A -- print an absolute memory reference.
15221 E -- print address with DImode register names if TARGET_64BIT.
15222 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15223 s -- print a shift double count, followed by the assemblers argument
15224 delimiter.
15225 b -- print the QImode name of the register for the indicated operand.
15226 %b0 would print %al if operands[0] is reg 0.
15227 w -- likewise, print the HImode name of the register.
15228 k -- likewise, print the SImode name of the register.
15229 q -- likewise, print the DImode name of the register.
15230 x -- likewise, print the V4SFmode name of the register.
15231 t -- likewise, print the V8SFmode name of the register.
15232 g -- likewise, print the V16SFmode name of the register.
15233 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15234 y -- print "st(0)" instead of "st" as a register.
15235 d -- print duplicated register operand for AVX instruction.
15236 D -- print condition for SSE cmp instruction.
15237 P -- if PIC, print an @PLT suffix.
15238 p -- print raw symbol name.
15239 X -- don't print any sort of PIC '@' suffix for a symbol.
15240 & -- print some in-use local-dynamic symbol name.
15241 H -- print a memory address offset by 8; used for sse high-parts
15242 Y -- print condition for XOP pcom* instruction.
15243 + -- print a branch hint as 'cs' or 'ds' prefix
15244 ; -- print a semicolon (after prefixes due to bug in older gas).
15245 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15246 @ -- print a segment register of thread base pointer load
15247 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15248 ! -- print MPX prefix for jxx/call/ret instructions if required.
15249 */
15250
15251 void
15252 ix86_print_operand (FILE *file, rtx x, int code)
15253 {
15254 if (code)
15255 {
15256 switch (code)
15257 {
15258 case 'A':
15259 switch (ASSEMBLER_DIALECT)
15260 {
15261 case ASM_ATT:
15262 putc ('*', file);
15263 break;
15264
15265 case ASM_INTEL:
15266 /* Intel syntax. For absolute addresses, registers should not
15267 be surrounded by braces. */
15268 if (!REG_P (x))
15269 {
15270 putc ('[', file);
15271 ix86_print_operand (file, x, 0);
15272 putc (']', file);
15273 return;
15274 }
15275 break;
15276
15277 default:
15278 gcc_unreachable ();
15279 }
15280
15281 ix86_print_operand (file, x, 0);
15282 return;
15283
15284 case 'E':
15285 /* Wrap address in an UNSPEC to declare special handling. */
15286 if (TARGET_64BIT)
15287 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15288
15289 output_address (x);
15290 return;
15291
15292 case 'L':
15293 if (ASSEMBLER_DIALECT == ASM_ATT)
15294 putc ('l', file);
15295 return;
15296
15297 case 'W':
15298 if (ASSEMBLER_DIALECT == ASM_ATT)
15299 putc ('w', file);
15300 return;
15301
15302 case 'B':
15303 if (ASSEMBLER_DIALECT == ASM_ATT)
15304 putc ('b', file);
15305 return;
15306
15307 case 'Q':
15308 if (ASSEMBLER_DIALECT == ASM_ATT)
15309 putc ('l', file);
15310 return;
15311
15312 case 'S':
15313 if (ASSEMBLER_DIALECT == ASM_ATT)
15314 putc ('s', file);
15315 return;
15316
15317 case 'T':
15318 if (ASSEMBLER_DIALECT == ASM_ATT)
15319 putc ('t', file);
15320 return;
15321
15322 case 'O':
15323 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15324 if (ASSEMBLER_DIALECT != ASM_ATT)
15325 return;
15326
15327 switch (GET_MODE_SIZE (GET_MODE (x)))
15328 {
15329 case 2:
15330 putc ('w', file);
15331 break;
15332
15333 case 4:
15334 putc ('l', file);
15335 break;
15336
15337 case 8:
15338 putc ('q', file);
15339 break;
15340
15341 default:
15342 output_operand_lossage
15343 ("invalid operand size for operand code 'O'");
15344 return;
15345 }
15346
15347 putc ('.', file);
15348 #endif
15349 return;
15350
15351 case 'z':
15352 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15353 {
15354 /* Opcodes don't get size suffixes if using Intel opcodes. */
15355 if (ASSEMBLER_DIALECT == ASM_INTEL)
15356 return;
15357
15358 switch (GET_MODE_SIZE (GET_MODE (x)))
15359 {
15360 case 1:
15361 putc ('b', file);
15362 return;
15363
15364 case 2:
15365 putc ('w', file);
15366 return;
15367
15368 case 4:
15369 putc ('l', file);
15370 return;
15371
15372 case 8:
15373 putc ('q', file);
15374 return;
15375
15376 default:
15377 output_operand_lossage
15378 ("invalid operand size for operand code 'z'");
15379 return;
15380 }
15381 }
15382
15383 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15384 warning
15385 (0, "non-integer operand used with operand code 'z'");
15386 /* FALLTHRU */
15387
15388 case 'Z':
15389 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15390 if (ASSEMBLER_DIALECT == ASM_INTEL)
15391 return;
15392
15393 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15394 {
15395 switch (GET_MODE_SIZE (GET_MODE (x)))
15396 {
15397 case 2:
15398 #ifdef HAVE_AS_IX86_FILDS
15399 putc ('s', file);
15400 #endif
15401 return;
15402
15403 case 4:
15404 putc ('l', file);
15405 return;
15406
15407 case 8:
15408 #ifdef HAVE_AS_IX86_FILDQ
15409 putc ('q', file);
15410 #else
15411 fputs ("ll", file);
15412 #endif
15413 return;
15414
15415 default:
15416 break;
15417 }
15418 }
15419 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15420 {
15421 /* 387 opcodes don't get size suffixes
15422 if the operands are registers. */
15423 if (STACK_REG_P (x))
15424 return;
15425
15426 switch (GET_MODE_SIZE (GET_MODE (x)))
15427 {
15428 case 4:
15429 putc ('s', file);
15430 return;
15431
15432 case 8:
15433 putc ('l', file);
15434 return;
15435
15436 case 12:
15437 case 16:
15438 putc ('t', file);
15439 return;
15440
15441 default:
15442 break;
15443 }
15444 }
15445 else
15446 {
15447 output_operand_lossage
15448 ("invalid operand type used with operand code 'Z'");
15449 return;
15450 }
15451
15452 output_operand_lossage
15453 ("invalid operand size for operand code 'Z'");
15454 return;
15455
15456 case 'd':
15457 case 'b':
15458 case 'w':
15459 case 'k':
15460 case 'q':
15461 case 'h':
15462 case 't':
15463 case 'g':
15464 case 'y':
15465 case 'x':
15466 case 'X':
15467 case 'P':
15468 case 'p':
15469 break;
15470
15471 case 's':
15472 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15473 {
15474 ix86_print_operand (file, x, 0);
15475 fputs (", ", file);
15476 }
15477 return;
15478
15479 case 'Y':
15480 switch (GET_CODE (x))
15481 {
15482 case NE:
15483 fputs ("neq", file);
15484 break;
15485 case EQ:
15486 fputs ("eq", file);
15487 break;
15488 case GE:
15489 case GEU:
15490 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15491 break;
15492 case GT:
15493 case GTU:
15494 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15495 break;
15496 case LE:
15497 case LEU:
15498 fputs ("le", file);
15499 break;
15500 case LT:
15501 case LTU:
15502 fputs ("lt", file);
15503 break;
15504 case UNORDERED:
15505 fputs ("unord", file);
15506 break;
15507 case ORDERED:
15508 fputs ("ord", file);
15509 break;
15510 case UNEQ:
15511 fputs ("ueq", file);
15512 break;
15513 case UNGE:
15514 fputs ("nlt", file);
15515 break;
15516 case UNGT:
15517 fputs ("nle", file);
15518 break;
15519 case UNLE:
15520 fputs ("ule", file);
15521 break;
15522 case UNLT:
15523 fputs ("ult", file);
15524 break;
15525 case LTGT:
15526 fputs ("une", file);
15527 break;
15528 default:
15529 output_operand_lossage ("operand is not a condition code, "
15530 "invalid operand code 'Y'");
15531 return;
15532 }
15533 return;
15534
15535 case 'D':
15536 /* Little bit of braindamage here. The SSE compare instructions
15537 does use completely different names for the comparisons that the
15538 fp conditional moves. */
15539 switch (GET_CODE (x))
15540 {
15541 case UNEQ:
15542 if (TARGET_AVX)
15543 {
15544 fputs ("eq_us", file);
15545 break;
15546 }
15547 case EQ:
15548 fputs ("eq", file);
15549 break;
15550 case UNLT:
15551 if (TARGET_AVX)
15552 {
15553 fputs ("nge", file);
15554 break;
15555 }
15556 case LT:
15557 fputs ("lt", file);
15558 break;
15559 case UNLE:
15560 if (TARGET_AVX)
15561 {
15562 fputs ("ngt", file);
15563 break;
15564 }
15565 case LE:
15566 fputs ("le", file);
15567 break;
15568 case UNORDERED:
15569 fputs ("unord", file);
15570 break;
15571 case LTGT:
15572 if (TARGET_AVX)
15573 {
15574 fputs ("neq_oq", file);
15575 break;
15576 }
15577 case NE:
15578 fputs ("neq", file);
15579 break;
15580 case GE:
15581 if (TARGET_AVX)
15582 {
15583 fputs ("ge", file);
15584 break;
15585 }
15586 case UNGE:
15587 fputs ("nlt", file);
15588 break;
15589 case GT:
15590 if (TARGET_AVX)
15591 {
15592 fputs ("gt", file);
15593 break;
15594 }
15595 case UNGT:
15596 fputs ("nle", file);
15597 break;
15598 case ORDERED:
15599 fputs ("ord", file);
15600 break;
15601 default:
15602 output_operand_lossage ("operand is not a condition code, "
15603 "invalid operand code 'D'");
15604 return;
15605 }
15606 return;
15607
15608 case 'F':
15609 case 'f':
15610 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15611 if (ASSEMBLER_DIALECT == ASM_ATT)
15612 putc ('.', file);
15613 #endif
15614
15615 case 'C':
15616 case 'c':
15617 if (!COMPARISON_P (x))
15618 {
15619 output_operand_lossage ("operand is not a condition code, "
15620 "invalid operand code '%c'", code);
15621 return;
15622 }
15623 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15624 code == 'c' || code == 'f',
15625 code == 'F' || code == 'f',
15626 file);
15627 return;
15628
15629 case 'H':
15630 if (!offsettable_memref_p (x))
15631 {
15632 output_operand_lossage ("operand is not an offsettable memory "
15633 "reference, invalid operand code 'H'");
15634 return;
15635 }
15636 /* It doesn't actually matter what mode we use here, as we're
15637 only going to use this for printing. */
15638 x = adjust_address_nv (x, DImode, 8);
15639 /* Output 'qword ptr' for intel assembler dialect. */
15640 if (ASSEMBLER_DIALECT == ASM_INTEL)
15641 code = 'q';
15642 break;
15643
15644 case 'K':
15645 gcc_assert (CONST_INT_P (x));
15646
15647 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15648 #ifdef HAVE_AS_IX86_HLE
15649 fputs ("xacquire ", file);
15650 #else
15651 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15652 #endif
15653 else if (INTVAL (x) & IX86_HLE_RELEASE)
15654 #ifdef HAVE_AS_IX86_HLE
15655 fputs ("xrelease ", file);
15656 #else
15657 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15658 #endif
15659 /* We do not want to print value of the operand. */
15660 return;
15661
15662 case 'N':
15663 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15664 fputs ("{z}", file);
15665 return;
15666
15667 case 'r':
15668 gcc_assert (CONST_INT_P (x));
15669 gcc_assert (INTVAL (x) == ROUND_SAE);
15670
15671 if (ASSEMBLER_DIALECT == ASM_INTEL)
15672 fputs (", ", file);
15673
15674 fputs ("{sae}", file);
15675
15676 if (ASSEMBLER_DIALECT == ASM_ATT)
15677 fputs (", ", file);
15678
15679 return;
15680
15681 case 'R':
15682 gcc_assert (CONST_INT_P (x));
15683
15684 if (ASSEMBLER_DIALECT == ASM_INTEL)
15685 fputs (", ", file);
15686
15687 switch (INTVAL (x))
15688 {
15689 case ROUND_NEAREST_INT | ROUND_SAE:
15690 fputs ("{rn-sae}", file);
15691 break;
15692 case ROUND_NEG_INF | ROUND_SAE:
15693 fputs ("{rd-sae}", file);
15694 break;
15695 case ROUND_POS_INF | ROUND_SAE:
15696 fputs ("{ru-sae}", file);
15697 break;
15698 case ROUND_ZERO | ROUND_SAE:
15699 fputs ("{rz-sae}", file);
15700 break;
15701 default:
15702 gcc_unreachable ();
15703 }
15704
15705 if (ASSEMBLER_DIALECT == ASM_ATT)
15706 fputs (", ", file);
15707
15708 return;
15709
15710 case '*':
15711 if (ASSEMBLER_DIALECT == ASM_ATT)
15712 putc ('*', file);
15713 return;
15714
15715 case '&':
15716 {
15717 const char *name = get_some_local_dynamic_name ();
15718 if (name == NULL)
15719 output_operand_lossage ("'%%&' used without any "
15720 "local dynamic TLS references");
15721 else
15722 assemble_name (file, name);
15723 return;
15724 }
15725
15726 case '+':
15727 {
15728 rtx x;
15729
15730 if (!optimize
15731 || optimize_function_for_size_p (cfun)
15732 || !TARGET_BRANCH_PREDICTION_HINTS)
15733 return;
15734
15735 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15736 if (x)
15737 {
15738 int pred_val = XINT (x, 0);
15739
15740 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15741 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15742 {
15743 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15744 bool cputaken
15745 = final_forward_branch_p (current_output_insn) == 0;
15746
15747 /* Emit hints only in the case default branch prediction
15748 heuristics would fail. */
15749 if (taken != cputaken)
15750 {
15751 /* We use 3e (DS) prefix for taken branches and
15752 2e (CS) prefix for not taken branches. */
15753 if (taken)
15754 fputs ("ds ; ", file);
15755 else
15756 fputs ("cs ; ", file);
15757 }
15758 }
15759 }
15760 return;
15761 }
15762
15763 case ';':
15764 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15765 putc (';', file);
15766 #endif
15767 return;
15768
15769 case '@':
15770 if (ASSEMBLER_DIALECT == ASM_ATT)
15771 putc ('%', file);
15772
15773 /* The kernel uses a different segment register for performance
15774 reasons; a system call would not have to trash the userspace
15775 segment register, which would be expensive. */
15776 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15777 fputs ("fs", file);
15778 else
15779 fputs ("gs", file);
15780 return;
15781
15782 case '~':
15783 putc (TARGET_AVX2 ? 'i' : 'f', file);
15784 return;
15785
15786 case '^':
15787 if (TARGET_64BIT && Pmode != word_mode)
15788 fputs ("addr32 ", file);
15789 return;
15790
15791 case '!':
15792 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15793 fputs ("bnd ", file);
15794 return;
15795
15796 default:
15797 output_operand_lossage ("invalid operand code '%c'", code);
15798 }
15799 }
15800
15801 if (REG_P (x))
15802 print_reg (x, code, file);
15803
15804 else if (MEM_P (x))
15805 {
15806 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15807 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15808 && GET_MODE (x) != BLKmode)
15809 {
15810 const char * size;
15811 switch (GET_MODE_SIZE (GET_MODE (x)))
15812 {
15813 case 1: size = "BYTE"; break;
15814 case 2: size = "WORD"; break;
15815 case 4: size = "DWORD"; break;
15816 case 8: size = "QWORD"; break;
15817 case 12: size = "TBYTE"; break;
15818 case 16:
15819 if (GET_MODE (x) == XFmode)
15820 size = "TBYTE";
15821 else
15822 size = "XMMWORD";
15823 break;
15824 case 32: size = "YMMWORD"; break;
15825 case 64: size = "ZMMWORD"; break;
15826 default:
15827 gcc_unreachable ();
15828 }
15829
15830 /* Check for explicit size override (codes 'b', 'w', 'k',
15831 'q' and 'x') */
15832 if (code == 'b')
15833 size = "BYTE";
15834 else if (code == 'w')
15835 size = "WORD";
15836 else if (code == 'k')
15837 size = "DWORD";
15838 else if (code == 'q')
15839 size = "QWORD";
15840 else if (code == 'x')
15841 size = "XMMWORD";
15842
15843 fputs (size, file);
15844 fputs (" PTR ", file);
15845 }
15846
15847 x = XEXP (x, 0);
15848 /* Avoid (%rip) for call operands. */
15849 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15850 && !CONST_INT_P (x))
15851 output_addr_const (file, x);
15852 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15853 output_operand_lossage ("invalid constraints for operand");
15854 else
15855 output_address (x);
15856 }
15857
15858 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15859 {
15860 REAL_VALUE_TYPE r;
15861 long l;
15862
15863 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15864 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15865
15866 if (ASSEMBLER_DIALECT == ASM_ATT)
15867 putc ('$', file);
15868 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15869 if (code == 'q')
15870 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15871 (unsigned long long) (int) l);
15872 else
15873 fprintf (file, "0x%08x", (unsigned int) l);
15874 }
15875
15876 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15877 {
15878 REAL_VALUE_TYPE r;
15879 long l[2];
15880
15881 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15882 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15883
15884 if (ASSEMBLER_DIALECT == ASM_ATT)
15885 putc ('$', file);
15886 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15887 }
15888
15889 /* These float cases don't actually occur as immediate operands. */
15890 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15891 {
15892 char dstr[30];
15893
15894 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15895 fputs (dstr, file);
15896 }
15897
15898 else
15899 {
15900 /* We have patterns that allow zero sets of memory, for instance.
15901 In 64-bit mode, we should probably support all 8-byte vectors,
15902 since we can in fact encode that into an immediate. */
15903 if (GET_CODE (x) == CONST_VECTOR)
15904 {
15905 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15906 x = const0_rtx;
15907 }
15908
15909 if (code != 'P' && code != 'p')
15910 {
15911 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15912 {
15913 if (ASSEMBLER_DIALECT == ASM_ATT)
15914 putc ('$', file);
15915 }
15916 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15917 || GET_CODE (x) == LABEL_REF)
15918 {
15919 if (ASSEMBLER_DIALECT == ASM_ATT)
15920 putc ('$', file);
15921 else
15922 fputs ("OFFSET FLAT:", file);
15923 }
15924 }
15925 if (CONST_INT_P (x))
15926 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15927 else if (flag_pic || MACHOPIC_INDIRECT)
15928 output_pic_addr_const (file, x, code);
15929 else
15930 output_addr_const (file, x);
15931 }
15932 }
15933
15934 static bool
15935 ix86_print_operand_punct_valid_p (unsigned char code)
15936 {
15937 return (code == '@' || code == '*' || code == '+' || code == '&'
15938 || code == ';' || code == '~' || code == '^' || code == '!');
15939 }
15940 \f
15941 /* Print a memory operand whose address is ADDR. */
15942
15943 static void
15944 ix86_print_operand_address (FILE *file, rtx addr)
15945 {
15946 struct ix86_address parts;
15947 rtx base, index, disp;
15948 int scale;
15949 int ok;
15950 bool vsib = false;
15951 int code = 0;
15952
15953 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15954 {
15955 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15956 gcc_assert (parts.index == NULL_RTX);
15957 parts.index = XVECEXP (addr, 0, 1);
15958 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15959 addr = XVECEXP (addr, 0, 0);
15960 vsib = true;
15961 }
15962 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15963 {
15964 gcc_assert (TARGET_64BIT);
15965 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15966 code = 'q';
15967 }
15968 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15969 {
15970 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15971 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15972 if (parts.base != NULL_RTX)
15973 {
15974 parts.index = parts.base;
15975 parts.scale = 1;
15976 }
15977 parts.base = XVECEXP (addr, 0, 0);
15978 addr = XVECEXP (addr, 0, 0);
15979 }
15980 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15981 {
15982 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15983 gcc_assert (parts.index == NULL_RTX);
15984 parts.index = XVECEXP (addr, 0, 1);
15985 addr = XVECEXP (addr, 0, 0);
15986 }
15987 else
15988 ok = ix86_decompose_address (addr, &parts);
15989
15990 gcc_assert (ok);
15991
15992 base = parts.base;
15993 index = parts.index;
15994 disp = parts.disp;
15995 scale = parts.scale;
15996
15997 switch (parts.seg)
15998 {
15999 case SEG_DEFAULT:
16000 break;
16001 case SEG_FS:
16002 case SEG_GS:
16003 if (ASSEMBLER_DIALECT == ASM_ATT)
16004 putc ('%', file);
16005 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16006 break;
16007 default:
16008 gcc_unreachable ();
16009 }
16010
16011 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16012 if (TARGET_64BIT && !base && !index)
16013 {
16014 rtx symbol = disp;
16015
16016 if (GET_CODE (disp) == CONST
16017 && GET_CODE (XEXP (disp, 0)) == PLUS
16018 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16019 symbol = XEXP (XEXP (disp, 0), 0);
16020
16021 if (GET_CODE (symbol) == LABEL_REF
16022 || (GET_CODE (symbol) == SYMBOL_REF
16023 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16024 base = pc_rtx;
16025 }
16026 if (!base && !index)
16027 {
16028 /* Displacement only requires special attention. */
16029
16030 if (CONST_INT_P (disp))
16031 {
16032 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16033 fputs ("ds:", file);
16034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16035 }
16036 else if (flag_pic)
16037 output_pic_addr_const (file, disp, 0);
16038 else
16039 output_addr_const (file, disp);
16040 }
16041 else
16042 {
16043 /* Print SImode register names to force addr32 prefix. */
16044 if (SImode_address_operand (addr, VOIDmode))
16045 {
16046 #ifdef ENABLE_CHECKING
16047 gcc_assert (TARGET_64BIT);
16048 switch (GET_CODE (addr))
16049 {
16050 case SUBREG:
16051 gcc_assert (GET_MODE (addr) == SImode);
16052 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16053 break;
16054 case ZERO_EXTEND:
16055 case AND:
16056 gcc_assert (GET_MODE (addr) == DImode);
16057 break;
16058 default:
16059 gcc_unreachable ();
16060 }
16061 #endif
16062 gcc_assert (!code);
16063 code = 'k';
16064 }
16065 else if (code == 0
16066 && TARGET_X32
16067 && disp
16068 && CONST_INT_P (disp)
16069 && INTVAL (disp) < -16*1024*1024)
16070 {
16071 /* X32 runs in 64-bit mode, where displacement, DISP, in
16072 address DISP(%r64), is encoded as 32-bit immediate sign-
16073 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16074 address is %r64 + 0xffffffffbffffd00. When %r64 <
16075 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16076 which is invalid for x32. The correct address is %r64
16077 - 0x40000300 == 0xf7ffdd64. To properly encode
16078 -0x40000300(%r64) for x32, we zero-extend negative
16079 displacement by forcing addr32 prefix which truncates
16080 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16081 zero-extend all negative displacements, including -1(%rsp).
16082 However, for small negative displacements, sign-extension
16083 won't cause overflow. We only zero-extend negative
16084 displacements if they < -16*1024*1024, which is also used
16085 to check legitimate address displacements for PIC. */
16086 code = 'k';
16087 }
16088
16089 if (ASSEMBLER_DIALECT == ASM_ATT)
16090 {
16091 if (disp)
16092 {
16093 if (flag_pic)
16094 output_pic_addr_const (file, disp, 0);
16095 else if (GET_CODE (disp) == LABEL_REF)
16096 output_asm_label (disp);
16097 else
16098 output_addr_const (file, disp);
16099 }
16100
16101 putc ('(', file);
16102 if (base)
16103 print_reg (base, code, file);
16104 if (index)
16105 {
16106 putc (',', file);
16107 print_reg (index, vsib ? 0 : code, file);
16108 if (scale != 1 || vsib)
16109 fprintf (file, ",%d", scale);
16110 }
16111 putc (')', file);
16112 }
16113 else
16114 {
16115 rtx offset = NULL_RTX;
16116
16117 if (disp)
16118 {
16119 /* Pull out the offset of a symbol; print any symbol itself. */
16120 if (GET_CODE (disp) == CONST
16121 && GET_CODE (XEXP (disp, 0)) == PLUS
16122 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16123 {
16124 offset = XEXP (XEXP (disp, 0), 1);
16125 disp = gen_rtx_CONST (VOIDmode,
16126 XEXP (XEXP (disp, 0), 0));
16127 }
16128
16129 if (flag_pic)
16130 output_pic_addr_const (file, disp, 0);
16131 else if (GET_CODE (disp) == LABEL_REF)
16132 output_asm_label (disp);
16133 else if (CONST_INT_P (disp))
16134 offset = disp;
16135 else
16136 output_addr_const (file, disp);
16137 }
16138
16139 putc ('[', file);
16140 if (base)
16141 {
16142 print_reg (base, code, file);
16143 if (offset)
16144 {
16145 if (INTVAL (offset) >= 0)
16146 putc ('+', file);
16147 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16148 }
16149 }
16150 else if (offset)
16151 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16152 else
16153 putc ('0', file);
16154
16155 if (index)
16156 {
16157 putc ('+', file);
16158 print_reg (index, vsib ? 0 : code, file);
16159 if (scale != 1 || vsib)
16160 fprintf (file, "*%d", scale);
16161 }
16162 putc (']', file);
16163 }
16164 }
16165 }
16166
16167 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16168
16169 static bool
16170 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16171 {
16172 rtx op;
16173
16174 if (GET_CODE (x) != UNSPEC)
16175 return false;
16176
16177 op = XVECEXP (x, 0, 0);
16178 switch (XINT (x, 1))
16179 {
16180 case UNSPEC_GOTTPOFF:
16181 output_addr_const (file, op);
16182 /* FIXME: This might be @TPOFF in Sun ld. */
16183 fputs ("@gottpoff", file);
16184 break;
16185 case UNSPEC_TPOFF:
16186 output_addr_const (file, op);
16187 fputs ("@tpoff", file);
16188 break;
16189 case UNSPEC_NTPOFF:
16190 output_addr_const (file, op);
16191 if (TARGET_64BIT)
16192 fputs ("@tpoff", file);
16193 else
16194 fputs ("@ntpoff", file);
16195 break;
16196 case UNSPEC_DTPOFF:
16197 output_addr_const (file, op);
16198 fputs ("@dtpoff", file);
16199 break;
16200 case UNSPEC_GOTNTPOFF:
16201 output_addr_const (file, op);
16202 if (TARGET_64BIT)
16203 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16204 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16205 else
16206 fputs ("@gotntpoff", file);
16207 break;
16208 case UNSPEC_INDNTPOFF:
16209 output_addr_const (file, op);
16210 fputs ("@indntpoff", file);
16211 break;
16212 #if TARGET_MACHO
16213 case UNSPEC_MACHOPIC_OFFSET:
16214 output_addr_const (file, op);
16215 putc ('-', file);
16216 machopic_output_function_base_name (file);
16217 break;
16218 #endif
16219
16220 case UNSPEC_STACK_CHECK:
16221 {
16222 int offset;
16223
16224 gcc_assert (flag_split_stack);
16225
16226 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16227 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16228 #else
16229 gcc_unreachable ();
16230 #endif
16231
16232 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16233 }
16234 break;
16235
16236 default:
16237 return false;
16238 }
16239
16240 return true;
16241 }
16242 \f
16243 /* Split one or more double-mode RTL references into pairs of half-mode
16244 references. The RTL can be REG, offsettable MEM, integer constant, or
16245 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16246 split and "num" is its length. lo_half and hi_half are output arrays
16247 that parallel "operands". */
16248
16249 void
16250 split_double_mode (machine_mode mode, rtx operands[],
16251 int num, rtx lo_half[], rtx hi_half[])
16252 {
16253 machine_mode half_mode;
16254 unsigned int byte;
16255
16256 switch (mode)
16257 {
16258 case TImode:
16259 half_mode = DImode;
16260 break;
16261 case DImode:
16262 half_mode = SImode;
16263 break;
16264 default:
16265 gcc_unreachable ();
16266 }
16267
16268 byte = GET_MODE_SIZE (half_mode);
16269
16270 while (num--)
16271 {
16272 rtx op = operands[num];
16273
16274 /* simplify_subreg refuse to split volatile memory addresses,
16275 but we still have to handle it. */
16276 if (MEM_P (op))
16277 {
16278 lo_half[num] = adjust_address (op, half_mode, 0);
16279 hi_half[num] = adjust_address (op, half_mode, byte);
16280 }
16281 else
16282 {
16283 lo_half[num] = simplify_gen_subreg (half_mode, op,
16284 GET_MODE (op) == VOIDmode
16285 ? mode : GET_MODE (op), 0);
16286 hi_half[num] = simplify_gen_subreg (half_mode, op,
16287 GET_MODE (op) == VOIDmode
16288 ? mode : GET_MODE (op), byte);
16289 }
16290 }
16291 }
16292 \f
16293 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16294 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16295 is the expression of the binary operation. The output may either be
16296 emitted here, or returned to the caller, like all output_* functions.
16297
16298 There is no guarantee that the operands are the same mode, as they
16299 might be within FLOAT or FLOAT_EXTEND expressions. */
16300
16301 #ifndef SYSV386_COMPAT
16302 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16303 wants to fix the assemblers because that causes incompatibility
16304 with gcc. No-one wants to fix gcc because that causes
16305 incompatibility with assemblers... You can use the option of
16306 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16307 #define SYSV386_COMPAT 1
16308 #endif
16309
16310 const char *
16311 output_387_binary_op (rtx insn, rtx *operands)
16312 {
16313 static char buf[40];
16314 const char *p;
16315 const char *ssep;
16316 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16317
16318 #ifdef ENABLE_CHECKING
16319 /* Even if we do not want to check the inputs, this documents input
16320 constraints. Which helps in understanding the following code. */
16321 if (STACK_REG_P (operands[0])
16322 && ((REG_P (operands[1])
16323 && REGNO (operands[0]) == REGNO (operands[1])
16324 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16325 || (REG_P (operands[2])
16326 && REGNO (operands[0]) == REGNO (operands[2])
16327 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16328 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16329 ; /* ok */
16330 else
16331 gcc_assert (is_sse);
16332 #endif
16333
16334 switch (GET_CODE (operands[3]))
16335 {
16336 case PLUS:
16337 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16338 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16339 p = "fiadd";
16340 else
16341 p = "fadd";
16342 ssep = "vadd";
16343 break;
16344
16345 case MINUS:
16346 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16347 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16348 p = "fisub";
16349 else
16350 p = "fsub";
16351 ssep = "vsub";
16352 break;
16353
16354 case MULT:
16355 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16356 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16357 p = "fimul";
16358 else
16359 p = "fmul";
16360 ssep = "vmul";
16361 break;
16362
16363 case DIV:
16364 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16365 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16366 p = "fidiv";
16367 else
16368 p = "fdiv";
16369 ssep = "vdiv";
16370 break;
16371
16372 default:
16373 gcc_unreachable ();
16374 }
16375
16376 if (is_sse)
16377 {
16378 if (TARGET_AVX)
16379 {
16380 strcpy (buf, ssep);
16381 if (GET_MODE (operands[0]) == SFmode)
16382 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16383 else
16384 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16385 }
16386 else
16387 {
16388 strcpy (buf, ssep + 1);
16389 if (GET_MODE (operands[0]) == SFmode)
16390 strcat (buf, "ss\t{%2, %0|%0, %2}");
16391 else
16392 strcat (buf, "sd\t{%2, %0|%0, %2}");
16393 }
16394 return buf;
16395 }
16396 strcpy (buf, p);
16397
16398 switch (GET_CODE (operands[3]))
16399 {
16400 case MULT:
16401 case PLUS:
16402 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16403 {
16404 rtx temp = operands[2];
16405 operands[2] = operands[1];
16406 operands[1] = temp;
16407 }
16408
16409 /* know operands[0] == operands[1]. */
16410
16411 if (MEM_P (operands[2]))
16412 {
16413 p = "%Z2\t%2";
16414 break;
16415 }
16416
16417 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16418 {
16419 if (STACK_TOP_P (operands[0]))
16420 /* How is it that we are storing to a dead operand[2]?
16421 Well, presumably operands[1] is dead too. We can't
16422 store the result to st(0) as st(0) gets popped on this
16423 instruction. Instead store to operands[2] (which I
16424 think has to be st(1)). st(1) will be popped later.
16425 gcc <= 2.8.1 didn't have this check and generated
16426 assembly code that the Unixware assembler rejected. */
16427 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16428 else
16429 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16430 break;
16431 }
16432
16433 if (STACK_TOP_P (operands[0]))
16434 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16435 else
16436 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16437 break;
16438
16439 case MINUS:
16440 case DIV:
16441 if (MEM_P (operands[1]))
16442 {
16443 p = "r%Z1\t%1";
16444 break;
16445 }
16446
16447 if (MEM_P (operands[2]))
16448 {
16449 p = "%Z2\t%2";
16450 break;
16451 }
16452
16453 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16454 {
16455 #if SYSV386_COMPAT
16456 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16457 derived assemblers, confusingly reverse the direction of
16458 the operation for fsub{r} and fdiv{r} when the
16459 destination register is not st(0). The Intel assembler
16460 doesn't have this brain damage. Read !SYSV386_COMPAT to
16461 figure out what the hardware really does. */
16462 if (STACK_TOP_P (operands[0]))
16463 p = "{p\t%0, %2|rp\t%2, %0}";
16464 else
16465 p = "{rp\t%2, %0|p\t%0, %2}";
16466 #else
16467 if (STACK_TOP_P (operands[0]))
16468 /* As above for fmul/fadd, we can't store to st(0). */
16469 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16470 else
16471 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16472 #endif
16473 break;
16474 }
16475
16476 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16477 {
16478 #if SYSV386_COMPAT
16479 if (STACK_TOP_P (operands[0]))
16480 p = "{rp\t%0, %1|p\t%1, %0}";
16481 else
16482 p = "{p\t%1, %0|rp\t%0, %1}";
16483 #else
16484 if (STACK_TOP_P (operands[0]))
16485 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16486 else
16487 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16488 #endif
16489 break;
16490 }
16491
16492 if (STACK_TOP_P (operands[0]))
16493 {
16494 if (STACK_TOP_P (operands[1]))
16495 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16496 else
16497 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16498 break;
16499 }
16500 else if (STACK_TOP_P (operands[1]))
16501 {
16502 #if SYSV386_COMPAT
16503 p = "{\t%1, %0|r\t%0, %1}";
16504 #else
16505 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16506 #endif
16507 }
16508 else
16509 {
16510 #if SYSV386_COMPAT
16511 p = "{r\t%2, %0|\t%0, %2}";
16512 #else
16513 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16514 #endif
16515 }
16516 break;
16517
16518 default:
16519 gcc_unreachable ();
16520 }
16521
16522 strcat (buf, p);
16523 return buf;
16524 }
16525
16526 /* Check if a 256bit AVX register is referenced inside of EXP. */
16527
16528 static bool
16529 ix86_check_avx256_register (const_rtx exp)
16530 {
16531 if (GET_CODE (exp) == SUBREG)
16532 exp = SUBREG_REG (exp);
16533
16534 return (REG_P (exp)
16535 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16536 }
16537
16538 /* Return needed mode for entity in optimize_mode_switching pass. */
16539
16540 static int
16541 ix86_avx_u128_mode_needed (rtx_insn *insn)
16542 {
16543 if (CALL_P (insn))
16544 {
16545 rtx link;
16546
16547 /* Needed mode is set to AVX_U128_CLEAN if there are
16548 no 256bit modes used in function arguments. */
16549 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16550 link;
16551 link = XEXP (link, 1))
16552 {
16553 if (GET_CODE (XEXP (link, 0)) == USE)
16554 {
16555 rtx arg = XEXP (XEXP (link, 0), 0);
16556
16557 if (ix86_check_avx256_register (arg))
16558 return AVX_U128_DIRTY;
16559 }
16560 }
16561
16562 return AVX_U128_CLEAN;
16563 }
16564
16565 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16566 changes state only when a 256bit register is written to, but we need
16567 to prevent the compiler from moving optimal insertion point above
16568 eventual read from 256bit register. */
16569 subrtx_iterator::array_type array;
16570 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16571 if (ix86_check_avx256_register (*iter))
16572 return AVX_U128_DIRTY;
16573
16574 return AVX_U128_ANY;
16575 }
16576
16577 /* Return mode that i387 must be switched into
16578 prior to the execution of insn. */
16579
16580 static int
16581 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16582 {
16583 enum attr_i387_cw mode;
16584
16585 /* The mode UNINITIALIZED is used to store control word after a
16586 function call or ASM pattern. The mode ANY specify that function
16587 has no requirements on the control word and make no changes in the
16588 bits we are interested in. */
16589
16590 if (CALL_P (insn)
16591 || (NONJUMP_INSN_P (insn)
16592 && (asm_noperands (PATTERN (insn)) >= 0
16593 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16594 return I387_CW_UNINITIALIZED;
16595
16596 if (recog_memoized (insn) < 0)
16597 return I387_CW_ANY;
16598
16599 mode = get_attr_i387_cw (insn);
16600
16601 switch (entity)
16602 {
16603 case I387_TRUNC:
16604 if (mode == I387_CW_TRUNC)
16605 return mode;
16606 break;
16607
16608 case I387_FLOOR:
16609 if (mode == I387_CW_FLOOR)
16610 return mode;
16611 break;
16612
16613 case I387_CEIL:
16614 if (mode == I387_CW_CEIL)
16615 return mode;
16616 break;
16617
16618 case I387_MASK_PM:
16619 if (mode == I387_CW_MASK_PM)
16620 return mode;
16621 break;
16622
16623 default:
16624 gcc_unreachable ();
16625 }
16626
16627 return I387_CW_ANY;
16628 }
16629
16630 /* Return mode that entity must be switched into
16631 prior to the execution of insn. */
16632
16633 static int
16634 ix86_mode_needed (int entity, rtx_insn *insn)
16635 {
16636 switch (entity)
16637 {
16638 case AVX_U128:
16639 return ix86_avx_u128_mode_needed (insn);
16640 case I387_TRUNC:
16641 case I387_FLOOR:
16642 case I387_CEIL:
16643 case I387_MASK_PM:
16644 return ix86_i387_mode_needed (entity, insn);
16645 default:
16646 gcc_unreachable ();
16647 }
16648 return 0;
16649 }
16650
16651 /* Check if a 256bit AVX register is referenced in stores. */
16652
16653 static void
16654 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16655 {
16656 if (ix86_check_avx256_register (dest))
16657 {
16658 bool *used = (bool *) data;
16659 *used = true;
16660 }
16661 }
16662
16663 /* Calculate mode of upper 128bit AVX registers after the insn. */
16664
16665 static int
16666 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16667 {
16668 rtx pat = PATTERN (insn);
16669
16670 if (vzeroupper_operation (pat, VOIDmode)
16671 || vzeroall_operation (pat, VOIDmode))
16672 return AVX_U128_CLEAN;
16673
16674 /* We know that state is clean after CALL insn if there are no
16675 256bit registers used in the function return register. */
16676 if (CALL_P (insn))
16677 {
16678 bool avx_reg256_found = false;
16679 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16680
16681 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16682 }
16683
16684 /* Otherwise, return current mode. Remember that if insn
16685 references AVX 256bit registers, the mode was already changed
16686 to DIRTY from MODE_NEEDED. */
16687 return mode;
16688 }
16689
16690 /* Return the mode that an insn results in. */
16691
16692 int
16693 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16694 {
16695 switch (entity)
16696 {
16697 case AVX_U128:
16698 return ix86_avx_u128_mode_after (mode, insn);
16699 case I387_TRUNC:
16700 case I387_FLOOR:
16701 case I387_CEIL:
16702 case I387_MASK_PM:
16703 return mode;
16704 default:
16705 gcc_unreachable ();
16706 }
16707 }
16708
16709 static int
16710 ix86_avx_u128_mode_entry (void)
16711 {
16712 tree arg;
16713
16714 /* Entry mode is set to AVX_U128_DIRTY if there are
16715 256bit modes used in function arguments. */
16716 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16717 arg = TREE_CHAIN (arg))
16718 {
16719 rtx incoming = DECL_INCOMING_RTL (arg);
16720
16721 if (incoming && ix86_check_avx256_register (incoming))
16722 return AVX_U128_DIRTY;
16723 }
16724
16725 return AVX_U128_CLEAN;
16726 }
16727
16728 /* Return a mode that ENTITY is assumed to be
16729 switched to at function entry. */
16730
16731 static int
16732 ix86_mode_entry (int entity)
16733 {
16734 switch (entity)
16735 {
16736 case AVX_U128:
16737 return ix86_avx_u128_mode_entry ();
16738 case I387_TRUNC:
16739 case I387_FLOOR:
16740 case I387_CEIL:
16741 case I387_MASK_PM:
16742 return I387_CW_ANY;
16743 default:
16744 gcc_unreachable ();
16745 }
16746 }
16747
16748 static int
16749 ix86_avx_u128_mode_exit (void)
16750 {
16751 rtx reg = crtl->return_rtx;
16752
16753 /* Exit mode is set to AVX_U128_DIRTY if there are
16754 256bit modes used in the function return register. */
16755 if (reg && ix86_check_avx256_register (reg))
16756 return AVX_U128_DIRTY;
16757
16758 return AVX_U128_CLEAN;
16759 }
16760
16761 /* Return a mode that ENTITY is assumed to be
16762 switched to at function exit. */
16763
16764 static int
16765 ix86_mode_exit (int entity)
16766 {
16767 switch (entity)
16768 {
16769 case AVX_U128:
16770 return ix86_avx_u128_mode_exit ();
16771 case I387_TRUNC:
16772 case I387_FLOOR:
16773 case I387_CEIL:
16774 case I387_MASK_PM:
16775 return I387_CW_ANY;
16776 default:
16777 gcc_unreachable ();
16778 }
16779 }
16780
16781 static int
16782 ix86_mode_priority (int, int n)
16783 {
16784 return n;
16785 }
16786
16787 /* Output code to initialize control word copies used by trunc?f?i and
16788 rounding patterns. CURRENT_MODE is set to current control word,
16789 while NEW_MODE is set to new control word. */
16790
16791 static void
16792 emit_i387_cw_initialization (int mode)
16793 {
16794 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16795 rtx new_mode;
16796
16797 enum ix86_stack_slot slot;
16798
16799 rtx reg = gen_reg_rtx (HImode);
16800
16801 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16802 emit_move_insn (reg, copy_rtx (stored_mode));
16803
16804 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16805 || optimize_insn_for_size_p ())
16806 {
16807 switch (mode)
16808 {
16809 case I387_CW_TRUNC:
16810 /* round toward zero (truncate) */
16811 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16812 slot = SLOT_CW_TRUNC;
16813 break;
16814
16815 case I387_CW_FLOOR:
16816 /* round down toward -oo */
16817 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16818 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16819 slot = SLOT_CW_FLOOR;
16820 break;
16821
16822 case I387_CW_CEIL:
16823 /* round up toward +oo */
16824 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16825 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16826 slot = SLOT_CW_CEIL;
16827 break;
16828
16829 case I387_CW_MASK_PM:
16830 /* mask precision exception for nearbyint() */
16831 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16832 slot = SLOT_CW_MASK_PM;
16833 break;
16834
16835 default:
16836 gcc_unreachable ();
16837 }
16838 }
16839 else
16840 {
16841 switch (mode)
16842 {
16843 case I387_CW_TRUNC:
16844 /* round toward zero (truncate) */
16845 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16846 slot = SLOT_CW_TRUNC;
16847 break;
16848
16849 case I387_CW_FLOOR:
16850 /* round down toward -oo */
16851 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16852 slot = SLOT_CW_FLOOR;
16853 break;
16854
16855 case I387_CW_CEIL:
16856 /* round up toward +oo */
16857 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16858 slot = SLOT_CW_CEIL;
16859 break;
16860
16861 case I387_CW_MASK_PM:
16862 /* mask precision exception for nearbyint() */
16863 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16864 slot = SLOT_CW_MASK_PM;
16865 break;
16866
16867 default:
16868 gcc_unreachable ();
16869 }
16870 }
16871
16872 gcc_assert (slot < MAX_386_STACK_LOCALS);
16873
16874 new_mode = assign_386_stack_local (HImode, slot);
16875 emit_move_insn (new_mode, reg);
16876 }
16877
16878 /* Emit vzeroupper. */
16879
16880 void
16881 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16882 {
16883 int i;
16884
16885 /* Cancel automatic vzeroupper insertion if there are
16886 live call-saved SSE registers at the insertion point. */
16887
16888 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16889 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16890 return;
16891
16892 if (TARGET_64BIT)
16893 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16894 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16895 return;
16896
16897 emit_insn (gen_avx_vzeroupper ());
16898 }
16899
16900 /* Generate one or more insns to set ENTITY to MODE. */
16901
16902 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16903 is the set of hard registers live at the point where the insn(s)
16904 are to be inserted. */
16905
16906 static void
16907 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16908 HARD_REG_SET regs_live)
16909 {
16910 switch (entity)
16911 {
16912 case AVX_U128:
16913 if (mode == AVX_U128_CLEAN)
16914 ix86_avx_emit_vzeroupper (regs_live);
16915 break;
16916 case I387_TRUNC:
16917 case I387_FLOOR:
16918 case I387_CEIL:
16919 case I387_MASK_PM:
16920 if (mode != I387_CW_ANY
16921 && mode != I387_CW_UNINITIALIZED)
16922 emit_i387_cw_initialization (mode);
16923 break;
16924 default:
16925 gcc_unreachable ();
16926 }
16927 }
16928
16929 /* Output code for INSN to convert a float to a signed int. OPERANDS
16930 are the insn operands. The output may be [HSD]Imode and the input
16931 operand may be [SDX]Fmode. */
16932
16933 const char *
16934 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16935 {
16936 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16937 int dimode_p = GET_MODE (operands[0]) == DImode;
16938 int round_mode = get_attr_i387_cw (insn);
16939
16940 /* Jump through a hoop or two for DImode, since the hardware has no
16941 non-popping instruction. We used to do this a different way, but
16942 that was somewhat fragile and broke with post-reload splitters. */
16943 if ((dimode_p || fisttp) && !stack_top_dies)
16944 output_asm_insn ("fld\t%y1", operands);
16945
16946 gcc_assert (STACK_TOP_P (operands[1]));
16947 gcc_assert (MEM_P (operands[0]));
16948 gcc_assert (GET_MODE (operands[1]) != TFmode);
16949
16950 if (fisttp)
16951 output_asm_insn ("fisttp%Z0\t%0", operands);
16952 else
16953 {
16954 if (round_mode != I387_CW_ANY)
16955 output_asm_insn ("fldcw\t%3", operands);
16956 if (stack_top_dies || dimode_p)
16957 output_asm_insn ("fistp%Z0\t%0", operands);
16958 else
16959 output_asm_insn ("fist%Z0\t%0", operands);
16960 if (round_mode != I387_CW_ANY)
16961 output_asm_insn ("fldcw\t%2", operands);
16962 }
16963
16964 return "";
16965 }
16966
16967 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16968 have the values zero or one, indicates the ffreep insn's operand
16969 from the OPERANDS array. */
16970
16971 static const char *
16972 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16973 {
16974 if (TARGET_USE_FFREEP)
16975 #ifdef HAVE_AS_IX86_FFREEP
16976 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16977 #else
16978 {
16979 static char retval[32];
16980 int regno = REGNO (operands[opno]);
16981
16982 gcc_assert (STACK_REGNO_P (regno));
16983
16984 regno -= FIRST_STACK_REG;
16985
16986 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16987 return retval;
16988 }
16989 #endif
16990
16991 return opno ? "fstp\t%y1" : "fstp\t%y0";
16992 }
16993
16994
16995 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16996 should be used. UNORDERED_P is true when fucom should be used. */
16997
16998 const char *
16999 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17000 {
17001 int stack_top_dies;
17002 rtx cmp_op0, cmp_op1;
17003 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17004
17005 if (eflags_p)
17006 {
17007 cmp_op0 = operands[0];
17008 cmp_op1 = operands[1];
17009 }
17010 else
17011 {
17012 cmp_op0 = operands[1];
17013 cmp_op1 = operands[2];
17014 }
17015
17016 if (is_sse)
17017 {
17018 if (GET_MODE (operands[0]) == SFmode)
17019 if (unordered_p)
17020 return "%vucomiss\t{%1, %0|%0, %1}";
17021 else
17022 return "%vcomiss\t{%1, %0|%0, %1}";
17023 else
17024 if (unordered_p)
17025 return "%vucomisd\t{%1, %0|%0, %1}";
17026 else
17027 return "%vcomisd\t{%1, %0|%0, %1}";
17028 }
17029
17030 gcc_assert (STACK_TOP_P (cmp_op0));
17031
17032 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17033
17034 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17035 {
17036 if (stack_top_dies)
17037 {
17038 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17039 return output_387_ffreep (operands, 1);
17040 }
17041 else
17042 return "ftst\n\tfnstsw\t%0";
17043 }
17044
17045 if (STACK_REG_P (cmp_op1)
17046 && stack_top_dies
17047 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17048 && REGNO (cmp_op1) != FIRST_STACK_REG)
17049 {
17050 /* If both the top of the 387 stack dies, and the other operand
17051 is also a stack register that dies, then this must be a
17052 `fcompp' float compare */
17053
17054 if (eflags_p)
17055 {
17056 /* There is no double popping fcomi variant. Fortunately,
17057 eflags is immune from the fstp's cc clobbering. */
17058 if (unordered_p)
17059 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17060 else
17061 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17062 return output_387_ffreep (operands, 0);
17063 }
17064 else
17065 {
17066 if (unordered_p)
17067 return "fucompp\n\tfnstsw\t%0";
17068 else
17069 return "fcompp\n\tfnstsw\t%0";
17070 }
17071 }
17072 else
17073 {
17074 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17075
17076 static const char * const alt[16] =
17077 {
17078 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17079 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17080 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17081 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17082
17083 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17084 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17085 NULL,
17086 NULL,
17087
17088 "fcomi\t{%y1, %0|%0, %y1}",
17089 "fcomip\t{%y1, %0|%0, %y1}",
17090 "fucomi\t{%y1, %0|%0, %y1}",
17091 "fucomip\t{%y1, %0|%0, %y1}",
17092
17093 NULL,
17094 NULL,
17095 NULL,
17096 NULL
17097 };
17098
17099 int mask;
17100 const char *ret;
17101
17102 mask = eflags_p << 3;
17103 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17104 mask |= unordered_p << 1;
17105 mask |= stack_top_dies;
17106
17107 gcc_assert (mask < 16);
17108 ret = alt[mask];
17109 gcc_assert (ret);
17110
17111 return ret;
17112 }
17113 }
17114
17115 void
17116 ix86_output_addr_vec_elt (FILE *file, int value)
17117 {
17118 const char *directive = ASM_LONG;
17119
17120 #ifdef ASM_QUAD
17121 if (TARGET_LP64)
17122 directive = ASM_QUAD;
17123 #else
17124 gcc_assert (!TARGET_64BIT);
17125 #endif
17126
17127 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17128 }
17129
17130 void
17131 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17132 {
17133 const char *directive = ASM_LONG;
17134
17135 #ifdef ASM_QUAD
17136 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17137 directive = ASM_QUAD;
17138 #else
17139 gcc_assert (!TARGET_64BIT);
17140 #endif
17141 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17142 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17143 fprintf (file, "%s%s%d-%s%d\n",
17144 directive, LPREFIX, value, LPREFIX, rel);
17145 else if (HAVE_AS_GOTOFF_IN_DATA)
17146 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17147 #if TARGET_MACHO
17148 else if (TARGET_MACHO)
17149 {
17150 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17151 machopic_output_function_base_name (file);
17152 putc ('\n', file);
17153 }
17154 #endif
17155 else
17156 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17157 GOT_SYMBOL_NAME, LPREFIX, value);
17158 }
17159 \f
17160 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17161 for the target. */
17162
17163 void
17164 ix86_expand_clear (rtx dest)
17165 {
17166 rtx tmp;
17167
17168 /* We play register width games, which are only valid after reload. */
17169 gcc_assert (reload_completed);
17170
17171 /* Avoid HImode and its attendant prefix byte. */
17172 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17173 dest = gen_rtx_REG (SImode, REGNO (dest));
17174 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17175
17176 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17177 {
17178 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17179 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17180 }
17181
17182 emit_insn (tmp);
17183 }
17184
17185 /* X is an unchanging MEM. If it is a constant pool reference, return
17186 the constant pool rtx, else NULL. */
17187
17188 rtx
17189 maybe_get_pool_constant (rtx x)
17190 {
17191 x = ix86_delegitimize_address (XEXP (x, 0));
17192
17193 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17194 return get_pool_constant (x);
17195
17196 return NULL_RTX;
17197 }
17198
17199 void
17200 ix86_expand_move (machine_mode mode, rtx operands[])
17201 {
17202 rtx op0, op1;
17203 enum tls_model model;
17204
17205 op0 = operands[0];
17206 op1 = operands[1];
17207
17208 if (GET_CODE (op1) == SYMBOL_REF)
17209 {
17210 rtx tmp;
17211
17212 model = SYMBOL_REF_TLS_MODEL (op1);
17213 if (model)
17214 {
17215 op1 = legitimize_tls_address (op1, model, true);
17216 op1 = force_operand (op1, op0);
17217 if (op1 == op0)
17218 return;
17219 op1 = convert_to_mode (mode, op1, 1);
17220 }
17221 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17222 op1 = tmp;
17223 }
17224 else if (GET_CODE (op1) == CONST
17225 && GET_CODE (XEXP (op1, 0)) == PLUS
17226 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17227 {
17228 rtx addend = XEXP (XEXP (op1, 0), 1);
17229 rtx symbol = XEXP (XEXP (op1, 0), 0);
17230 rtx tmp;
17231
17232 model = SYMBOL_REF_TLS_MODEL (symbol);
17233 if (model)
17234 tmp = legitimize_tls_address (symbol, model, true);
17235 else
17236 tmp = legitimize_pe_coff_symbol (symbol, true);
17237
17238 if (tmp)
17239 {
17240 tmp = force_operand (tmp, NULL);
17241 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17242 op0, 1, OPTAB_DIRECT);
17243 if (tmp == op0)
17244 return;
17245 op1 = convert_to_mode (mode, tmp, 1);
17246 }
17247 }
17248
17249 if ((flag_pic || MACHOPIC_INDIRECT)
17250 && symbolic_operand (op1, mode))
17251 {
17252 if (TARGET_MACHO && !TARGET_64BIT)
17253 {
17254 #if TARGET_MACHO
17255 /* dynamic-no-pic */
17256 if (MACHOPIC_INDIRECT)
17257 {
17258 rtx temp = ((reload_in_progress
17259 || ((op0 && REG_P (op0))
17260 && mode == Pmode))
17261 ? op0 : gen_reg_rtx (Pmode));
17262 op1 = machopic_indirect_data_reference (op1, temp);
17263 if (MACHOPIC_PURE)
17264 op1 = machopic_legitimize_pic_address (op1, mode,
17265 temp == op1 ? 0 : temp);
17266 }
17267 if (op0 != op1 && GET_CODE (op0) != MEM)
17268 {
17269 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17270 emit_insn (insn);
17271 return;
17272 }
17273 if (GET_CODE (op0) == MEM)
17274 op1 = force_reg (Pmode, op1);
17275 else
17276 {
17277 rtx temp = op0;
17278 if (GET_CODE (temp) != REG)
17279 temp = gen_reg_rtx (Pmode);
17280 temp = legitimize_pic_address (op1, temp);
17281 if (temp == op0)
17282 return;
17283 op1 = temp;
17284 }
17285 /* dynamic-no-pic */
17286 #endif
17287 }
17288 else
17289 {
17290 if (MEM_P (op0))
17291 op1 = force_reg (mode, op1);
17292 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17293 {
17294 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17295 op1 = legitimize_pic_address (op1, reg);
17296 if (op0 == op1)
17297 return;
17298 op1 = convert_to_mode (mode, op1, 1);
17299 }
17300 }
17301 }
17302 else
17303 {
17304 if (MEM_P (op0)
17305 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17306 || !push_operand (op0, mode))
17307 && MEM_P (op1))
17308 op1 = force_reg (mode, op1);
17309
17310 if (push_operand (op0, mode)
17311 && ! general_no_elim_operand (op1, mode))
17312 op1 = copy_to_mode_reg (mode, op1);
17313
17314 /* Force large constants in 64bit compilation into register
17315 to get them CSEed. */
17316 if (can_create_pseudo_p ()
17317 && (mode == DImode) && TARGET_64BIT
17318 && immediate_operand (op1, mode)
17319 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17320 && !register_operand (op0, mode)
17321 && optimize)
17322 op1 = copy_to_mode_reg (mode, op1);
17323
17324 if (can_create_pseudo_p ()
17325 && FLOAT_MODE_P (mode)
17326 && GET_CODE (op1) == CONST_DOUBLE)
17327 {
17328 /* If we are loading a floating point constant to a register,
17329 force the value to memory now, since we'll get better code
17330 out the back end. */
17331
17332 op1 = validize_mem (force_const_mem (mode, op1));
17333 if (!register_operand (op0, mode))
17334 {
17335 rtx temp = gen_reg_rtx (mode);
17336 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17337 emit_move_insn (op0, temp);
17338 return;
17339 }
17340 }
17341 }
17342
17343 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17344 }
17345
17346 void
17347 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17348 {
17349 rtx op0 = operands[0], op1 = operands[1];
17350 unsigned int align = GET_MODE_ALIGNMENT (mode);
17351
17352 if (push_operand (op0, VOIDmode))
17353 op0 = emit_move_resolve_push (mode, op0);
17354
17355 /* Force constants other than zero into memory. We do not know how
17356 the instructions used to build constants modify the upper 64 bits
17357 of the register, once we have that information we may be able
17358 to handle some of them more efficiently. */
17359 if (can_create_pseudo_p ()
17360 && register_operand (op0, mode)
17361 && (CONSTANT_P (op1)
17362 || (GET_CODE (op1) == SUBREG
17363 && CONSTANT_P (SUBREG_REG (op1))))
17364 && !standard_sse_constant_p (op1))
17365 op1 = validize_mem (force_const_mem (mode, op1));
17366
17367 /* We need to check memory alignment for SSE mode since attribute
17368 can make operands unaligned. */
17369 if (can_create_pseudo_p ()
17370 && SSE_REG_MODE_P (mode)
17371 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17372 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17373 {
17374 rtx tmp[2];
17375
17376 /* ix86_expand_vector_move_misalign() does not like constants ... */
17377 if (CONSTANT_P (op1)
17378 || (GET_CODE (op1) == SUBREG
17379 && CONSTANT_P (SUBREG_REG (op1))))
17380 op1 = validize_mem (force_const_mem (mode, op1));
17381
17382 /* ... nor both arguments in memory. */
17383 if (!register_operand (op0, mode)
17384 && !register_operand (op1, mode))
17385 op1 = force_reg (mode, op1);
17386
17387 tmp[0] = op0; tmp[1] = op1;
17388 ix86_expand_vector_move_misalign (mode, tmp);
17389 return;
17390 }
17391
17392 /* Make operand1 a register if it isn't already. */
17393 if (can_create_pseudo_p ()
17394 && !register_operand (op0, mode)
17395 && !register_operand (op1, mode))
17396 {
17397 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17398 return;
17399 }
17400
17401 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17402 }
17403
17404 /* Split 32-byte AVX unaligned load and store if needed. */
17405
17406 static void
17407 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17408 {
17409 rtx m;
17410 rtx (*extract) (rtx, rtx, rtx);
17411 rtx (*load_unaligned) (rtx, rtx);
17412 rtx (*store_unaligned) (rtx, rtx);
17413 machine_mode mode;
17414
17415 switch (GET_MODE (op0))
17416 {
17417 default:
17418 gcc_unreachable ();
17419 case V32QImode:
17420 extract = gen_avx_vextractf128v32qi;
17421 load_unaligned = gen_avx_loaddquv32qi;
17422 store_unaligned = gen_avx_storedquv32qi;
17423 mode = V16QImode;
17424 break;
17425 case V8SFmode:
17426 extract = gen_avx_vextractf128v8sf;
17427 load_unaligned = gen_avx_loadups256;
17428 store_unaligned = gen_avx_storeups256;
17429 mode = V4SFmode;
17430 break;
17431 case V4DFmode:
17432 extract = gen_avx_vextractf128v4df;
17433 load_unaligned = gen_avx_loadupd256;
17434 store_unaligned = gen_avx_storeupd256;
17435 mode = V2DFmode;
17436 break;
17437 }
17438
17439 if (MEM_P (op1))
17440 {
17441 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17442 {
17443 rtx r = gen_reg_rtx (mode);
17444 m = adjust_address (op1, mode, 0);
17445 emit_move_insn (r, m);
17446 m = adjust_address (op1, mode, 16);
17447 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17448 emit_move_insn (op0, r);
17449 }
17450 /* Normal *mov<mode>_internal pattern will handle
17451 unaligned loads just fine if misaligned_operand
17452 is true, and without the UNSPEC it can be combined
17453 with arithmetic instructions. */
17454 else if (misaligned_operand (op1, GET_MODE (op1)))
17455 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17456 else
17457 emit_insn (load_unaligned (op0, op1));
17458 }
17459 else if (MEM_P (op0))
17460 {
17461 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17462 {
17463 m = adjust_address (op0, mode, 0);
17464 emit_insn (extract (m, op1, const0_rtx));
17465 m = adjust_address (op0, mode, 16);
17466 emit_insn (extract (m, op1, const1_rtx));
17467 }
17468 else
17469 emit_insn (store_unaligned (op0, op1));
17470 }
17471 else
17472 gcc_unreachable ();
17473 }
17474
17475 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17476 straight to ix86_expand_vector_move. */
17477 /* Code generation for scalar reg-reg moves of single and double precision data:
17478 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17479 movaps reg, reg
17480 else
17481 movss reg, reg
17482 if (x86_sse_partial_reg_dependency == true)
17483 movapd reg, reg
17484 else
17485 movsd reg, reg
17486
17487 Code generation for scalar loads of double precision data:
17488 if (x86_sse_split_regs == true)
17489 movlpd mem, reg (gas syntax)
17490 else
17491 movsd mem, reg
17492
17493 Code generation for unaligned packed loads of single precision data
17494 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17495 if (x86_sse_unaligned_move_optimal)
17496 movups mem, reg
17497
17498 if (x86_sse_partial_reg_dependency == true)
17499 {
17500 xorps reg, reg
17501 movlps mem, reg
17502 movhps mem+8, reg
17503 }
17504 else
17505 {
17506 movlps mem, reg
17507 movhps mem+8, reg
17508 }
17509
17510 Code generation for unaligned packed loads of double precision data
17511 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17512 if (x86_sse_unaligned_move_optimal)
17513 movupd mem, reg
17514
17515 if (x86_sse_split_regs == true)
17516 {
17517 movlpd mem, reg
17518 movhpd mem+8, reg
17519 }
17520 else
17521 {
17522 movsd mem, reg
17523 movhpd mem+8, reg
17524 }
17525 */
17526
17527 void
17528 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17529 {
17530 rtx op0, op1, orig_op0 = NULL_RTX, m;
17531 rtx (*load_unaligned) (rtx, rtx);
17532 rtx (*store_unaligned) (rtx, rtx);
17533
17534 op0 = operands[0];
17535 op1 = operands[1];
17536
17537 if (GET_MODE_SIZE (mode) == 64)
17538 {
17539 switch (GET_MODE_CLASS (mode))
17540 {
17541 case MODE_VECTOR_INT:
17542 case MODE_INT:
17543 if (GET_MODE (op0) != V16SImode)
17544 {
17545 if (!MEM_P (op0))
17546 {
17547 orig_op0 = op0;
17548 op0 = gen_reg_rtx (V16SImode);
17549 }
17550 else
17551 op0 = gen_lowpart (V16SImode, op0);
17552 }
17553 op1 = gen_lowpart (V16SImode, op1);
17554 /* FALLTHRU */
17555
17556 case MODE_VECTOR_FLOAT:
17557 switch (GET_MODE (op0))
17558 {
17559 default:
17560 gcc_unreachable ();
17561 case V16SImode:
17562 load_unaligned = gen_avx512f_loaddquv16si;
17563 store_unaligned = gen_avx512f_storedquv16si;
17564 break;
17565 case V16SFmode:
17566 load_unaligned = gen_avx512f_loadups512;
17567 store_unaligned = gen_avx512f_storeups512;
17568 break;
17569 case V8DFmode:
17570 load_unaligned = gen_avx512f_loadupd512;
17571 store_unaligned = gen_avx512f_storeupd512;
17572 break;
17573 }
17574
17575 if (MEM_P (op1))
17576 emit_insn (load_unaligned (op0, op1));
17577 else if (MEM_P (op0))
17578 emit_insn (store_unaligned (op0, op1));
17579 else
17580 gcc_unreachable ();
17581 if (orig_op0)
17582 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17583 break;
17584
17585 default:
17586 gcc_unreachable ();
17587 }
17588
17589 return;
17590 }
17591
17592 if (TARGET_AVX
17593 && GET_MODE_SIZE (mode) == 32)
17594 {
17595 switch (GET_MODE_CLASS (mode))
17596 {
17597 case MODE_VECTOR_INT:
17598 case MODE_INT:
17599 if (GET_MODE (op0) != V32QImode)
17600 {
17601 if (!MEM_P (op0))
17602 {
17603 orig_op0 = op0;
17604 op0 = gen_reg_rtx (V32QImode);
17605 }
17606 else
17607 op0 = gen_lowpart (V32QImode, op0);
17608 }
17609 op1 = gen_lowpart (V32QImode, op1);
17610 /* FALLTHRU */
17611
17612 case MODE_VECTOR_FLOAT:
17613 ix86_avx256_split_vector_move_misalign (op0, op1);
17614 if (orig_op0)
17615 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17616 break;
17617
17618 default:
17619 gcc_unreachable ();
17620 }
17621
17622 return;
17623 }
17624
17625 if (MEM_P (op1))
17626 {
17627 /* Normal *mov<mode>_internal pattern will handle
17628 unaligned loads just fine if misaligned_operand
17629 is true, and without the UNSPEC it can be combined
17630 with arithmetic instructions. */
17631 if (TARGET_AVX
17632 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17633 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17634 && misaligned_operand (op1, GET_MODE (op1)))
17635 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17636 /* ??? If we have typed data, then it would appear that using
17637 movdqu is the only way to get unaligned data loaded with
17638 integer type. */
17639 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17640 {
17641 if (GET_MODE (op0) != V16QImode)
17642 {
17643 orig_op0 = op0;
17644 op0 = gen_reg_rtx (V16QImode);
17645 }
17646 op1 = gen_lowpart (V16QImode, op1);
17647 /* We will eventually emit movups based on insn attributes. */
17648 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17649 if (orig_op0)
17650 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17651 }
17652 else if (TARGET_SSE2 && mode == V2DFmode)
17653 {
17654 rtx zero;
17655
17656 if (TARGET_AVX
17657 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17658 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17659 || optimize_insn_for_size_p ())
17660 {
17661 /* We will eventually emit movups based on insn attributes. */
17662 emit_insn (gen_sse2_loadupd (op0, op1));
17663 return;
17664 }
17665
17666 /* When SSE registers are split into halves, we can avoid
17667 writing to the top half twice. */
17668 if (TARGET_SSE_SPLIT_REGS)
17669 {
17670 emit_clobber (op0);
17671 zero = op0;
17672 }
17673 else
17674 {
17675 /* ??? Not sure about the best option for the Intel chips.
17676 The following would seem to satisfy; the register is
17677 entirely cleared, breaking the dependency chain. We
17678 then store to the upper half, with a dependency depth
17679 of one. A rumor has it that Intel recommends two movsd
17680 followed by an unpacklpd, but this is unconfirmed. And
17681 given that the dependency depth of the unpacklpd would
17682 still be one, I'm not sure why this would be better. */
17683 zero = CONST0_RTX (V2DFmode);
17684 }
17685
17686 m = adjust_address (op1, DFmode, 0);
17687 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17688 m = adjust_address (op1, DFmode, 8);
17689 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17690 }
17691 else
17692 {
17693 rtx t;
17694
17695 if (TARGET_AVX
17696 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17697 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17698 || optimize_insn_for_size_p ())
17699 {
17700 if (GET_MODE (op0) != V4SFmode)
17701 {
17702 orig_op0 = op0;
17703 op0 = gen_reg_rtx (V4SFmode);
17704 }
17705 op1 = gen_lowpart (V4SFmode, op1);
17706 emit_insn (gen_sse_loadups (op0, op1));
17707 if (orig_op0)
17708 emit_move_insn (orig_op0,
17709 gen_lowpart (GET_MODE (orig_op0), op0));
17710 return;
17711 }
17712
17713 if (mode != V4SFmode)
17714 t = gen_reg_rtx (V4SFmode);
17715 else
17716 t = op0;
17717
17718 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17719 emit_move_insn (t, CONST0_RTX (V4SFmode));
17720 else
17721 emit_clobber (t);
17722
17723 m = adjust_address (op1, V2SFmode, 0);
17724 emit_insn (gen_sse_loadlps (t, t, m));
17725 m = adjust_address (op1, V2SFmode, 8);
17726 emit_insn (gen_sse_loadhps (t, t, m));
17727 if (mode != V4SFmode)
17728 emit_move_insn (op0, gen_lowpart (mode, t));
17729 }
17730 }
17731 else if (MEM_P (op0))
17732 {
17733 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17734 {
17735 op0 = gen_lowpart (V16QImode, op0);
17736 op1 = gen_lowpart (V16QImode, op1);
17737 /* We will eventually emit movups based on insn attributes. */
17738 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17739 }
17740 else if (TARGET_SSE2 && mode == V2DFmode)
17741 {
17742 if (TARGET_AVX
17743 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17744 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17745 || optimize_insn_for_size_p ())
17746 /* We will eventually emit movups based on insn attributes. */
17747 emit_insn (gen_sse2_storeupd (op0, op1));
17748 else
17749 {
17750 m = adjust_address (op0, DFmode, 0);
17751 emit_insn (gen_sse2_storelpd (m, op1));
17752 m = adjust_address (op0, DFmode, 8);
17753 emit_insn (gen_sse2_storehpd (m, op1));
17754 }
17755 }
17756 else
17757 {
17758 if (mode != V4SFmode)
17759 op1 = gen_lowpart (V4SFmode, op1);
17760
17761 if (TARGET_AVX
17762 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17763 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17764 || optimize_insn_for_size_p ())
17765 {
17766 op0 = gen_lowpart (V4SFmode, op0);
17767 emit_insn (gen_sse_storeups (op0, op1));
17768 }
17769 else
17770 {
17771 m = adjust_address (op0, V2SFmode, 0);
17772 emit_insn (gen_sse_storelps (m, op1));
17773 m = adjust_address (op0, V2SFmode, 8);
17774 emit_insn (gen_sse_storehps (m, op1));
17775 }
17776 }
17777 }
17778 else
17779 gcc_unreachable ();
17780 }
17781
17782 /* Helper function of ix86_fixup_binary_operands to canonicalize
17783 operand order. Returns true if the operands should be swapped. */
17784
17785 static bool
17786 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17787 rtx operands[])
17788 {
17789 rtx dst = operands[0];
17790 rtx src1 = operands[1];
17791 rtx src2 = operands[2];
17792
17793 /* If the operation is not commutative, we can't do anything. */
17794 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17795 return false;
17796
17797 /* Highest priority is that src1 should match dst. */
17798 if (rtx_equal_p (dst, src1))
17799 return false;
17800 if (rtx_equal_p (dst, src2))
17801 return true;
17802
17803 /* Next highest priority is that immediate constants come second. */
17804 if (immediate_operand (src2, mode))
17805 return false;
17806 if (immediate_operand (src1, mode))
17807 return true;
17808
17809 /* Lowest priority is that memory references should come second. */
17810 if (MEM_P (src2))
17811 return false;
17812 if (MEM_P (src1))
17813 return true;
17814
17815 return false;
17816 }
17817
17818
17819 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17820 destination to use for the operation. If different from the true
17821 destination in operands[0], a copy operation will be required. */
17822
17823 rtx
17824 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17825 rtx operands[])
17826 {
17827 rtx dst = operands[0];
17828 rtx src1 = operands[1];
17829 rtx src2 = operands[2];
17830
17831 /* Canonicalize operand order. */
17832 if (ix86_swap_binary_operands_p (code, mode, operands))
17833 {
17834 /* It is invalid to swap operands of different modes. */
17835 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17836
17837 std::swap (src1, src2);
17838 }
17839
17840 /* Both source operands cannot be in memory. */
17841 if (MEM_P (src1) && MEM_P (src2))
17842 {
17843 /* Optimization: Only read from memory once. */
17844 if (rtx_equal_p (src1, src2))
17845 {
17846 src2 = force_reg (mode, src2);
17847 src1 = src2;
17848 }
17849 else if (rtx_equal_p (dst, src1))
17850 src2 = force_reg (mode, src2);
17851 else
17852 src1 = force_reg (mode, src1);
17853 }
17854
17855 /* If the destination is memory, and we do not have matching source
17856 operands, do things in registers. */
17857 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17858 dst = gen_reg_rtx (mode);
17859
17860 /* Source 1 cannot be a constant. */
17861 if (CONSTANT_P (src1))
17862 src1 = force_reg (mode, src1);
17863
17864 /* Source 1 cannot be a non-matching memory. */
17865 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17866 src1 = force_reg (mode, src1);
17867
17868 /* Improve address combine. */
17869 if (code == PLUS
17870 && GET_MODE_CLASS (mode) == MODE_INT
17871 && MEM_P (src2))
17872 src2 = force_reg (mode, src2);
17873
17874 operands[1] = src1;
17875 operands[2] = src2;
17876 return dst;
17877 }
17878
17879 /* Similarly, but assume that the destination has already been
17880 set up properly. */
17881
17882 void
17883 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17884 machine_mode mode, rtx operands[])
17885 {
17886 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17887 gcc_assert (dst == operands[0]);
17888 }
17889
17890 /* Attempt to expand a binary operator. Make the expansion closer to the
17891 actual machine, then just general_operand, which will allow 3 separate
17892 memory references (one output, two input) in a single insn. */
17893
17894 void
17895 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17896 rtx operands[])
17897 {
17898 rtx src1, src2, dst, op, clob;
17899
17900 dst = ix86_fixup_binary_operands (code, mode, operands);
17901 src1 = operands[1];
17902 src2 = operands[2];
17903
17904 /* Emit the instruction. */
17905
17906 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17907 if (reload_in_progress)
17908 {
17909 /* Reload doesn't know about the flags register, and doesn't know that
17910 it doesn't want to clobber it. We can only do this with PLUS. */
17911 gcc_assert (code == PLUS);
17912 emit_insn (op);
17913 }
17914 else if (reload_completed
17915 && code == PLUS
17916 && !rtx_equal_p (dst, src1))
17917 {
17918 /* This is going to be an LEA; avoid splitting it later. */
17919 emit_insn (op);
17920 }
17921 else
17922 {
17923 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17924 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17925 }
17926
17927 /* Fix up the destination if needed. */
17928 if (dst != operands[0])
17929 emit_move_insn (operands[0], dst);
17930 }
17931
17932 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17933 the given OPERANDS. */
17934
17935 void
17936 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17937 rtx operands[])
17938 {
17939 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17940 if (GET_CODE (operands[1]) == SUBREG)
17941 {
17942 op1 = operands[1];
17943 op2 = operands[2];
17944 }
17945 else if (GET_CODE (operands[2]) == SUBREG)
17946 {
17947 op1 = operands[2];
17948 op2 = operands[1];
17949 }
17950 /* Optimize (__m128i) d | (__m128i) e and similar code
17951 when d and e are float vectors into float vector logical
17952 insn. In C/C++ without using intrinsics there is no other way
17953 to express vector logical operation on float vectors than
17954 to cast them temporarily to integer vectors. */
17955 if (op1
17956 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17957 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17958 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17959 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17960 && SUBREG_BYTE (op1) == 0
17961 && (GET_CODE (op2) == CONST_VECTOR
17962 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17963 && SUBREG_BYTE (op2) == 0))
17964 && can_create_pseudo_p ())
17965 {
17966 rtx dst;
17967 switch (GET_MODE (SUBREG_REG (op1)))
17968 {
17969 case V4SFmode:
17970 case V8SFmode:
17971 case V16SFmode:
17972 case V2DFmode:
17973 case V4DFmode:
17974 case V8DFmode:
17975 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17976 if (GET_CODE (op2) == CONST_VECTOR)
17977 {
17978 op2 = gen_lowpart (GET_MODE (dst), op2);
17979 op2 = force_reg (GET_MODE (dst), op2);
17980 }
17981 else
17982 {
17983 op1 = operands[1];
17984 op2 = SUBREG_REG (operands[2]);
17985 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17986 op2 = force_reg (GET_MODE (dst), op2);
17987 }
17988 op1 = SUBREG_REG (op1);
17989 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17990 op1 = force_reg (GET_MODE (dst), op1);
17991 emit_insn (gen_rtx_SET (VOIDmode, dst,
17992 gen_rtx_fmt_ee (code, GET_MODE (dst),
17993 op1, op2)));
17994 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17995 return;
17996 default:
17997 break;
17998 }
17999 }
18000 if (!nonimmediate_operand (operands[1], mode))
18001 operands[1] = force_reg (mode, operands[1]);
18002 if (!nonimmediate_operand (operands[2], mode))
18003 operands[2] = force_reg (mode, operands[2]);
18004 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18005 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18006 gen_rtx_fmt_ee (code, mode, operands[1],
18007 operands[2])));
18008 }
18009
18010 /* Return TRUE or FALSE depending on whether the binary operator meets the
18011 appropriate constraints. */
18012
18013 bool
18014 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18015 rtx operands[3])
18016 {
18017 rtx dst = operands[0];
18018 rtx src1 = operands[1];
18019 rtx src2 = operands[2];
18020
18021 /* Both source operands cannot be in memory. */
18022 if (MEM_P (src1) && MEM_P (src2))
18023 return false;
18024
18025 /* Canonicalize operand order for commutative operators. */
18026 if (ix86_swap_binary_operands_p (code, mode, operands))
18027 std::swap (src1, src2);
18028
18029 /* If the destination is memory, we must have a matching source operand. */
18030 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18031 return false;
18032
18033 /* Source 1 cannot be a constant. */
18034 if (CONSTANT_P (src1))
18035 return false;
18036
18037 /* Source 1 cannot be a non-matching memory. */
18038 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18039 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18040 return (code == AND
18041 && (mode == HImode
18042 || mode == SImode
18043 || (TARGET_64BIT && mode == DImode))
18044 && satisfies_constraint_L (src2));
18045
18046 return true;
18047 }
18048
18049 /* Attempt to expand a unary operator. Make the expansion closer to the
18050 actual machine, then just general_operand, which will allow 2 separate
18051 memory references (one output, one input) in a single insn. */
18052
18053 void
18054 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18055 rtx operands[])
18056 {
18057 int matching_memory;
18058 rtx src, dst, op, clob;
18059
18060 dst = operands[0];
18061 src = operands[1];
18062
18063 /* If the destination is memory, and we do not have matching source
18064 operands, do things in registers. */
18065 matching_memory = 0;
18066 if (MEM_P (dst))
18067 {
18068 if (rtx_equal_p (dst, src))
18069 matching_memory = 1;
18070 else
18071 dst = gen_reg_rtx (mode);
18072 }
18073
18074 /* When source operand is memory, destination must match. */
18075 if (MEM_P (src) && !matching_memory)
18076 src = force_reg (mode, src);
18077
18078 /* Emit the instruction. */
18079
18080 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18081 if (reload_in_progress || code == NOT)
18082 {
18083 /* Reload doesn't know about the flags register, and doesn't know that
18084 it doesn't want to clobber it. */
18085 gcc_assert (code == NOT);
18086 emit_insn (op);
18087 }
18088 else
18089 {
18090 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18091 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18092 }
18093
18094 /* Fix up the destination if needed. */
18095 if (dst != operands[0])
18096 emit_move_insn (operands[0], dst);
18097 }
18098
18099 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18100 divisor are within the range [0-255]. */
18101
18102 void
18103 ix86_split_idivmod (machine_mode mode, rtx operands[],
18104 bool signed_p)
18105 {
18106 rtx_code_label *end_label, *qimode_label;
18107 rtx insn, div, mod;
18108 rtx scratch, tmp0, tmp1, tmp2;
18109 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18110 rtx (*gen_zero_extend) (rtx, rtx);
18111 rtx (*gen_test_ccno_1) (rtx, rtx);
18112
18113 switch (mode)
18114 {
18115 case SImode:
18116 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18117 gen_test_ccno_1 = gen_testsi_ccno_1;
18118 gen_zero_extend = gen_zero_extendqisi2;
18119 break;
18120 case DImode:
18121 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18122 gen_test_ccno_1 = gen_testdi_ccno_1;
18123 gen_zero_extend = gen_zero_extendqidi2;
18124 break;
18125 default:
18126 gcc_unreachable ();
18127 }
18128
18129 end_label = gen_label_rtx ();
18130 qimode_label = gen_label_rtx ();
18131
18132 scratch = gen_reg_rtx (mode);
18133
18134 /* Use 8bit unsigned divimod if dividend and divisor are within
18135 the range [0-255]. */
18136 emit_move_insn (scratch, operands[2]);
18137 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18138 scratch, 1, OPTAB_DIRECT);
18139 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18140 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18141 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18142 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18143 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18144 pc_rtx);
18145 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18146 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18147 JUMP_LABEL (insn) = qimode_label;
18148
18149 /* Generate original signed/unsigned divimod. */
18150 div = gen_divmod4_1 (operands[0], operands[1],
18151 operands[2], operands[3]);
18152 emit_insn (div);
18153
18154 /* Branch to the end. */
18155 emit_jump_insn (gen_jump (end_label));
18156 emit_barrier ();
18157
18158 /* Generate 8bit unsigned divide. */
18159 emit_label (qimode_label);
18160 /* Don't use operands[0] for result of 8bit divide since not all
18161 registers support QImode ZERO_EXTRACT. */
18162 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18163 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18164 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18165 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18166
18167 if (signed_p)
18168 {
18169 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18170 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18171 }
18172 else
18173 {
18174 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18175 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18176 }
18177
18178 /* Extract remainder from AH. */
18179 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18180 if (REG_P (operands[1]))
18181 insn = emit_move_insn (operands[1], tmp1);
18182 else
18183 {
18184 /* Need a new scratch register since the old one has result
18185 of 8bit divide. */
18186 scratch = gen_reg_rtx (mode);
18187 emit_move_insn (scratch, tmp1);
18188 insn = emit_move_insn (operands[1], scratch);
18189 }
18190 set_unique_reg_note (insn, REG_EQUAL, mod);
18191
18192 /* Zero extend quotient from AL. */
18193 tmp1 = gen_lowpart (QImode, tmp0);
18194 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18195 set_unique_reg_note (insn, REG_EQUAL, div);
18196
18197 emit_label (end_label);
18198 }
18199
18200 #define LEA_MAX_STALL (3)
18201 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18202
18203 /* Increase given DISTANCE in half-cycles according to
18204 dependencies between PREV and NEXT instructions.
18205 Add 1 half-cycle if there is no dependency and
18206 go to next cycle if there is some dependecy. */
18207
18208 static unsigned int
18209 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18210 {
18211 df_ref def, use;
18212
18213 if (!prev || !next)
18214 return distance + (distance & 1) + 2;
18215
18216 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18217 return distance + 1;
18218
18219 FOR_EACH_INSN_USE (use, next)
18220 FOR_EACH_INSN_DEF (def, prev)
18221 if (!DF_REF_IS_ARTIFICIAL (def)
18222 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18223 return distance + (distance & 1) + 2;
18224
18225 return distance + 1;
18226 }
18227
18228 /* Function checks if instruction INSN defines register number
18229 REGNO1 or REGNO2. */
18230
18231 static bool
18232 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18233 rtx insn)
18234 {
18235 df_ref def;
18236
18237 FOR_EACH_INSN_DEF (def, insn)
18238 if (DF_REF_REG_DEF_P (def)
18239 && !DF_REF_IS_ARTIFICIAL (def)
18240 && (regno1 == DF_REF_REGNO (def)
18241 || regno2 == DF_REF_REGNO (def)))
18242 return true;
18243
18244 return false;
18245 }
18246
18247 /* Function checks if instruction INSN uses register number
18248 REGNO as a part of address expression. */
18249
18250 static bool
18251 insn_uses_reg_mem (unsigned int regno, rtx insn)
18252 {
18253 df_ref use;
18254
18255 FOR_EACH_INSN_USE (use, insn)
18256 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18257 return true;
18258
18259 return false;
18260 }
18261
18262 /* Search backward for non-agu definition of register number REGNO1
18263 or register number REGNO2 in basic block starting from instruction
18264 START up to head of basic block or instruction INSN.
18265
18266 Function puts true value into *FOUND var if definition was found
18267 and false otherwise.
18268
18269 Distance in half-cycles between START and found instruction or head
18270 of BB is added to DISTANCE and returned. */
18271
18272 static int
18273 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18274 rtx_insn *insn, int distance,
18275 rtx_insn *start, bool *found)
18276 {
18277 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18278 rtx_insn *prev = start;
18279 rtx_insn *next = NULL;
18280
18281 *found = false;
18282
18283 while (prev
18284 && prev != insn
18285 && distance < LEA_SEARCH_THRESHOLD)
18286 {
18287 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18288 {
18289 distance = increase_distance (prev, next, distance);
18290 if (insn_defines_reg (regno1, regno2, prev))
18291 {
18292 if (recog_memoized (prev) < 0
18293 || get_attr_type (prev) != TYPE_LEA)
18294 {
18295 *found = true;
18296 return distance;
18297 }
18298 }
18299
18300 next = prev;
18301 }
18302 if (prev == BB_HEAD (bb))
18303 break;
18304
18305 prev = PREV_INSN (prev);
18306 }
18307
18308 return distance;
18309 }
18310
18311 /* Search backward for non-agu definition of register number REGNO1
18312 or register number REGNO2 in INSN's basic block until
18313 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18314 2. Reach neighbour BBs boundary, or
18315 3. Reach agu definition.
18316 Returns the distance between the non-agu definition point and INSN.
18317 If no definition point, returns -1. */
18318
18319 static int
18320 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18321 rtx_insn *insn)
18322 {
18323 basic_block bb = BLOCK_FOR_INSN (insn);
18324 int distance = 0;
18325 bool found = false;
18326
18327 if (insn != BB_HEAD (bb))
18328 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18329 distance, PREV_INSN (insn),
18330 &found);
18331
18332 if (!found && distance < LEA_SEARCH_THRESHOLD)
18333 {
18334 edge e;
18335 edge_iterator ei;
18336 bool simple_loop = false;
18337
18338 FOR_EACH_EDGE (e, ei, bb->preds)
18339 if (e->src == bb)
18340 {
18341 simple_loop = true;
18342 break;
18343 }
18344
18345 if (simple_loop)
18346 distance = distance_non_agu_define_in_bb (regno1, regno2,
18347 insn, distance,
18348 BB_END (bb), &found);
18349 else
18350 {
18351 int shortest_dist = -1;
18352 bool found_in_bb = false;
18353
18354 FOR_EACH_EDGE (e, ei, bb->preds)
18355 {
18356 int bb_dist
18357 = distance_non_agu_define_in_bb (regno1, regno2,
18358 insn, distance,
18359 BB_END (e->src),
18360 &found_in_bb);
18361 if (found_in_bb)
18362 {
18363 if (shortest_dist < 0)
18364 shortest_dist = bb_dist;
18365 else if (bb_dist > 0)
18366 shortest_dist = MIN (bb_dist, shortest_dist);
18367
18368 found = true;
18369 }
18370 }
18371
18372 distance = shortest_dist;
18373 }
18374 }
18375
18376 /* get_attr_type may modify recog data. We want to make sure
18377 that recog data is valid for instruction INSN, on which
18378 distance_non_agu_define is called. INSN is unchanged here. */
18379 extract_insn_cached (insn);
18380
18381 if (!found)
18382 return -1;
18383
18384 return distance >> 1;
18385 }
18386
18387 /* Return the distance in half-cycles between INSN and the next
18388 insn that uses register number REGNO in memory address added
18389 to DISTANCE. Return -1 if REGNO0 is set.
18390
18391 Put true value into *FOUND if register usage was found and
18392 false otherwise.
18393 Put true value into *REDEFINED if register redefinition was
18394 found and false otherwise. */
18395
18396 static int
18397 distance_agu_use_in_bb (unsigned int regno,
18398 rtx_insn *insn, int distance, rtx_insn *start,
18399 bool *found, bool *redefined)
18400 {
18401 basic_block bb = NULL;
18402 rtx_insn *next = start;
18403 rtx_insn *prev = NULL;
18404
18405 *found = false;
18406 *redefined = false;
18407
18408 if (start != NULL_RTX)
18409 {
18410 bb = BLOCK_FOR_INSN (start);
18411 if (start != BB_HEAD (bb))
18412 /* If insn and start belong to the same bb, set prev to insn,
18413 so the call to increase_distance will increase the distance
18414 between insns by 1. */
18415 prev = insn;
18416 }
18417
18418 while (next
18419 && next != insn
18420 && distance < LEA_SEARCH_THRESHOLD)
18421 {
18422 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18423 {
18424 distance = increase_distance(prev, next, distance);
18425 if (insn_uses_reg_mem (regno, next))
18426 {
18427 /* Return DISTANCE if OP0 is used in memory
18428 address in NEXT. */
18429 *found = true;
18430 return distance;
18431 }
18432
18433 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18434 {
18435 /* Return -1 if OP0 is set in NEXT. */
18436 *redefined = true;
18437 return -1;
18438 }
18439
18440 prev = next;
18441 }
18442
18443 if (next == BB_END (bb))
18444 break;
18445
18446 next = NEXT_INSN (next);
18447 }
18448
18449 return distance;
18450 }
18451
18452 /* Return the distance between INSN and the next insn that uses
18453 register number REGNO0 in memory address. Return -1 if no such
18454 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18455
18456 static int
18457 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18458 {
18459 basic_block bb = BLOCK_FOR_INSN (insn);
18460 int distance = 0;
18461 bool found = false;
18462 bool redefined = false;
18463
18464 if (insn != BB_END (bb))
18465 distance = distance_agu_use_in_bb (regno0, insn, distance,
18466 NEXT_INSN (insn),
18467 &found, &redefined);
18468
18469 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18470 {
18471 edge e;
18472 edge_iterator ei;
18473 bool simple_loop = false;
18474
18475 FOR_EACH_EDGE (e, ei, bb->succs)
18476 if (e->dest == bb)
18477 {
18478 simple_loop = true;
18479 break;
18480 }
18481
18482 if (simple_loop)
18483 distance = distance_agu_use_in_bb (regno0, insn,
18484 distance, BB_HEAD (bb),
18485 &found, &redefined);
18486 else
18487 {
18488 int shortest_dist = -1;
18489 bool found_in_bb = false;
18490 bool redefined_in_bb = false;
18491
18492 FOR_EACH_EDGE (e, ei, bb->succs)
18493 {
18494 int bb_dist
18495 = distance_agu_use_in_bb (regno0, insn,
18496 distance, BB_HEAD (e->dest),
18497 &found_in_bb, &redefined_in_bb);
18498 if (found_in_bb)
18499 {
18500 if (shortest_dist < 0)
18501 shortest_dist = bb_dist;
18502 else if (bb_dist > 0)
18503 shortest_dist = MIN (bb_dist, shortest_dist);
18504
18505 found = true;
18506 }
18507 }
18508
18509 distance = shortest_dist;
18510 }
18511 }
18512
18513 if (!found || redefined)
18514 return -1;
18515
18516 return distance >> 1;
18517 }
18518
18519 /* Define this macro to tune LEA priority vs ADD, it take effect when
18520 there is a dilemma of choicing LEA or ADD
18521 Negative value: ADD is more preferred than LEA
18522 Zero: Netrual
18523 Positive value: LEA is more preferred than ADD*/
18524 #define IX86_LEA_PRIORITY 0
18525
18526 /* Return true if usage of lea INSN has performance advantage
18527 over a sequence of instructions. Instructions sequence has
18528 SPLIT_COST cycles higher latency than lea latency. */
18529
18530 static bool
18531 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18532 unsigned int regno2, int split_cost, bool has_scale)
18533 {
18534 int dist_define, dist_use;
18535
18536 /* For Silvermont if using a 2-source or 3-source LEA for
18537 non-destructive destination purposes, or due to wanting
18538 ability to use SCALE, the use of LEA is justified. */
18539 if (TARGET_SILVERMONT || TARGET_INTEL)
18540 {
18541 if (has_scale)
18542 return true;
18543 if (split_cost < 1)
18544 return false;
18545 if (regno0 == regno1 || regno0 == regno2)
18546 return false;
18547 return true;
18548 }
18549
18550 dist_define = distance_non_agu_define (regno1, regno2, insn);
18551 dist_use = distance_agu_use (regno0, insn);
18552
18553 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18554 {
18555 /* If there is no non AGU operand definition, no AGU
18556 operand usage and split cost is 0 then both lea
18557 and non lea variants have same priority. Currently
18558 we prefer lea for 64 bit code and non lea on 32 bit
18559 code. */
18560 if (dist_use < 0 && split_cost == 0)
18561 return TARGET_64BIT || IX86_LEA_PRIORITY;
18562 else
18563 return true;
18564 }
18565
18566 /* With longer definitions distance lea is more preferable.
18567 Here we change it to take into account splitting cost and
18568 lea priority. */
18569 dist_define += split_cost + IX86_LEA_PRIORITY;
18570
18571 /* If there is no use in memory addess then we just check
18572 that split cost exceeds AGU stall. */
18573 if (dist_use < 0)
18574 return dist_define > LEA_MAX_STALL;
18575
18576 /* If this insn has both backward non-agu dependence and forward
18577 agu dependence, the one with short distance takes effect. */
18578 return dist_define >= dist_use;
18579 }
18580
18581 /* Return true if it is legal to clobber flags by INSN and
18582 false otherwise. */
18583
18584 static bool
18585 ix86_ok_to_clobber_flags (rtx_insn *insn)
18586 {
18587 basic_block bb = BLOCK_FOR_INSN (insn);
18588 df_ref use;
18589 bitmap live;
18590
18591 while (insn)
18592 {
18593 if (NONDEBUG_INSN_P (insn))
18594 {
18595 FOR_EACH_INSN_USE (use, insn)
18596 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18597 return false;
18598
18599 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18600 return true;
18601 }
18602
18603 if (insn == BB_END (bb))
18604 break;
18605
18606 insn = NEXT_INSN (insn);
18607 }
18608
18609 live = df_get_live_out(bb);
18610 return !REGNO_REG_SET_P (live, FLAGS_REG);
18611 }
18612
18613 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18614 move and add to avoid AGU stalls. */
18615
18616 bool
18617 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18618 {
18619 unsigned int regno0, regno1, regno2;
18620
18621 /* Check if we need to optimize. */
18622 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18623 return false;
18624
18625 /* Check it is correct to split here. */
18626 if (!ix86_ok_to_clobber_flags(insn))
18627 return false;
18628
18629 regno0 = true_regnum (operands[0]);
18630 regno1 = true_regnum (operands[1]);
18631 regno2 = true_regnum (operands[2]);
18632
18633 /* We need to split only adds with non destructive
18634 destination operand. */
18635 if (regno0 == regno1 || regno0 == regno2)
18636 return false;
18637 else
18638 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18639 }
18640
18641 /* Return true if we should emit lea instruction instead of mov
18642 instruction. */
18643
18644 bool
18645 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18646 {
18647 unsigned int regno0, regno1;
18648
18649 /* Check if we need to optimize. */
18650 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18651 return false;
18652
18653 /* Use lea for reg to reg moves only. */
18654 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18655 return false;
18656
18657 regno0 = true_regnum (operands[0]);
18658 regno1 = true_regnum (operands[1]);
18659
18660 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18661 }
18662
18663 /* Return true if we need to split lea into a sequence of
18664 instructions to avoid AGU stalls. */
18665
18666 bool
18667 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18668 {
18669 unsigned int regno0, regno1, regno2;
18670 int split_cost;
18671 struct ix86_address parts;
18672 int ok;
18673
18674 /* Check we need to optimize. */
18675 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18676 return false;
18677
18678 /* The "at least two components" test below might not catch simple
18679 move or zero extension insns if parts.base is non-NULL and parts.disp
18680 is const0_rtx as the only components in the address, e.g. if the
18681 register is %rbp or %r13. As this test is much cheaper and moves or
18682 zero extensions are the common case, do this check first. */
18683 if (REG_P (operands[1])
18684 || (SImode_address_operand (operands[1], VOIDmode)
18685 && REG_P (XEXP (operands[1], 0))))
18686 return false;
18687
18688 /* Check if it is OK to split here. */
18689 if (!ix86_ok_to_clobber_flags (insn))
18690 return false;
18691
18692 ok = ix86_decompose_address (operands[1], &parts);
18693 gcc_assert (ok);
18694
18695 /* There should be at least two components in the address. */
18696 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18697 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18698 return false;
18699
18700 /* We should not split into add if non legitimate pic
18701 operand is used as displacement. */
18702 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18703 return false;
18704
18705 regno0 = true_regnum (operands[0]) ;
18706 regno1 = INVALID_REGNUM;
18707 regno2 = INVALID_REGNUM;
18708
18709 if (parts.base)
18710 regno1 = true_regnum (parts.base);
18711 if (parts.index)
18712 regno2 = true_regnum (parts.index);
18713
18714 split_cost = 0;
18715
18716 /* Compute how many cycles we will add to execution time
18717 if split lea into a sequence of instructions. */
18718 if (parts.base || parts.index)
18719 {
18720 /* Have to use mov instruction if non desctructive
18721 destination form is used. */
18722 if (regno1 != regno0 && regno2 != regno0)
18723 split_cost += 1;
18724
18725 /* Have to add index to base if both exist. */
18726 if (parts.base && parts.index)
18727 split_cost += 1;
18728
18729 /* Have to use shift and adds if scale is 2 or greater. */
18730 if (parts.scale > 1)
18731 {
18732 if (regno0 != regno1)
18733 split_cost += 1;
18734 else if (regno2 == regno0)
18735 split_cost += 4;
18736 else
18737 split_cost += parts.scale;
18738 }
18739
18740 /* Have to use add instruction with immediate if
18741 disp is non zero. */
18742 if (parts.disp && parts.disp != const0_rtx)
18743 split_cost += 1;
18744
18745 /* Subtract the price of lea. */
18746 split_cost -= 1;
18747 }
18748
18749 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18750 parts.scale > 1);
18751 }
18752
18753 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18754 matches destination. RTX includes clobber of FLAGS_REG. */
18755
18756 static void
18757 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18758 rtx dst, rtx src)
18759 {
18760 rtx op, clob;
18761
18762 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18763 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18764
18765 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18766 }
18767
18768 /* Return true if regno1 def is nearest to the insn. */
18769
18770 static bool
18771 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18772 {
18773 rtx_insn *prev = insn;
18774 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18775
18776 if (insn == start)
18777 return false;
18778 while (prev && prev != start)
18779 {
18780 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18781 {
18782 prev = PREV_INSN (prev);
18783 continue;
18784 }
18785 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18786 return true;
18787 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18788 return false;
18789 prev = PREV_INSN (prev);
18790 }
18791
18792 /* None of the regs is defined in the bb. */
18793 return false;
18794 }
18795
18796 /* Split lea instructions into a sequence of instructions
18797 which are executed on ALU to avoid AGU stalls.
18798 It is assumed that it is allowed to clobber flags register
18799 at lea position. */
18800
18801 void
18802 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18803 {
18804 unsigned int regno0, regno1, regno2;
18805 struct ix86_address parts;
18806 rtx target, tmp;
18807 int ok, adds;
18808
18809 ok = ix86_decompose_address (operands[1], &parts);
18810 gcc_assert (ok);
18811
18812 target = gen_lowpart (mode, operands[0]);
18813
18814 regno0 = true_regnum (target);
18815 regno1 = INVALID_REGNUM;
18816 regno2 = INVALID_REGNUM;
18817
18818 if (parts.base)
18819 {
18820 parts.base = gen_lowpart (mode, parts.base);
18821 regno1 = true_regnum (parts.base);
18822 }
18823
18824 if (parts.index)
18825 {
18826 parts.index = gen_lowpart (mode, parts.index);
18827 regno2 = true_regnum (parts.index);
18828 }
18829
18830 if (parts.disp)
18831 parts.disp = gen_lowpart (mode, parts.disp);
18832
18833 if (parts.scale > 1)
18834 {
18835 /* Case r1 = r1 + ... */
18836 if (regno1 == regno0)
18837 {
18838 /* If we have a case r1 = r1 + C * r2 then we
18839 should use multiplication which is very
18840 expensive. Assume cost model is wrong if we
18841 have such case here. */
18842 gcc_assert (regno2 != regno0);
18843
18844 for (adds = parts.scale; adds > 0; adds--)
18845 ix86_emit_binop (PLUS, mode, target, parts.index);
18846 }
18847 else
18848 {
18849 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18850 if (regno0 != regno2)
18851 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18852
18853 /* Use shift for scaling. */
18854 ix86_emit_binop (ASHIFT, mode, target,
18855 GEN_INT (exact_log2 (parts.scale)));
18856
18857 if (parts.base)
18858 ix86_emit_binop (PLUS, mode, target, parts.base);
18859
18860 if (parts.disp && parts.disp != const0_rtx)
18861 ix86_emit_binop (PLUS, mode, target, parts.disp);
18862 }
18863 }
18864 else if (!parts.base && !parts.index)
18865 {
18866 gcc_assert(parts.disp);
18867 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18868 }
18869 else
18870 {
18871 if (!parts.base)
18872 {
18873 if (regno0 != regno2)
18874 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18875 }
18876 else if (!parts.index)
18877 {
18878 if (regno0 != regno1)
18879 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18880 }
18881 else
18882 {
18883 if (regno0 == regno1)
18884 tmp = parts.index;
18885 else if (regno0 == regno2)
18886 tmp = parts.base;
18887 else
18888 {
18889 rtx tmp1;
18890
18891 /* Find better operand for SET instruction, depending
18892 on which definition is farther from the insn. */
18893 if (find_nearest_reg_def (insn, regno1, regno2))
18894 tmp = parts.index, tmp1 = parts.base;
18895 else
18896 tmp = parts.base, tmp1 = parts.index;
18897
18898 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18899
18900 if (parts.disp && parts.disp != const0_rtx)
18901 ix86_emit_binop (PLUS, mode, target, parts.disp);
18902
18903 ix86_emit_binop (PLUS, mode, target, tmp1);
18904 return;
18905 }
18906
18907 ix86_emit_binop (PLUS, mode, target, tmp);
18908 }
18909
18910 if (parts.disp && parts.disp != const0_rtx)
18911 ix86_emit_binop (PLUS, mode, target, parts.disp);
18912 }
18913 }
18914
18915 /* Return true if it is ok to optimize an ADD operation to LEA
18916 operation to avoid flag register consumation. For most processors,
18917 ADD is faster than LEA. For the processors like BONNELL, if the
18918 destination register of LEA holds an actual address which will be
18919 used soon, LEA is better and otherwise ADD is better. */
18920
18921 bool
18922 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18923 {
18924 unsigned int regno0 = true_regnum (operands[0]);
18925 unsigned int regno1 = true_regnum (operands[1]);
18926 unsigned int regno2 = true_regnum (operands[2]);
18927
18928 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18929 if (regno0 != regno1 && regno0 != regno2)
18930 return true;
18931
18932 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18933 return false;
18934
18935 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18936 }
18937
18938 /* Return true if destination reg of SET_BODY is shift count of
18939 USE_BODY. */
18940
18941 static bool
18942 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18943 {
18944 rtx set_dest;
18945 rtx shift_rtx;
18946 int i;
18947
18948 /* Retrieve destination of SET_BODY. */
18949 switch (GET_CODE (set_body))
18950 {
18951 case SET:
18952 set_dest = SET_DEST (set_body);
18953 if (!set_dest || !REG_P (set_dest))
18954 return false;
18955 break;
18956 case PARALLEL:
18957 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18958 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18959 use_body))
18960 return true;
18961 default:
18962 return false;
18963 break;
18964 }
18965
18966 /* Retrieve shift count of USE_BODY. */
18967 switch (GET_CODE (use_body))
18968 {
18969 case SET:
18970 shift_rtx = XEXP (use_body, 1);
18971 break;
18972 case PARALLEL:
18973 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18974 if (ix86_dep_by_shift_count_body (set_body,
18975 XVECEXP (use_body, 0, i)))
18976 return true;
18977 default:
18978 return false;
18979 break;
18980 }
18981
18982 if (shift_rtx
18983 && (GET_CODE (shift_rtx) == ASHIFT
18984 || GET_CODE (shift_rtx) == LSHIFTRT
18985 || GET_CODE (shift_rtx) == ASHIFTRT
18986 || GET_CODE (shift_rtx) == ROTATE
18987 || GET_CODE (shift_rtx) == ROTATERT))
18988 {
18989 rtx shift_count = XEXP (shift_rtx, 1);
18990
18991 /* Return true if shift count is dest of SET_BODY. */
18992 if (REG_P (shift_count))
18993 {
18994 /* Add check since it can be invoked before register
18995 allocation in pre-reload schedule. */
18996 if (reload_completed
18997 && true_regnum (set_dest) == true_regnum (shift_count))
18998 return true;
18999 else if (REGNO(set_dest) == REGNO(shift_count))
19000 return true;
19001 }
19002 }
19003
19004 return false;
19005 }
19006
19007 /* Return true if destination reg of SET_INSN is shift count of
19008 USE_INSN. */
19009
19010 bool
19011 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19012 {
19013 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19014 PATTERN (use_insn));
19015 }
19016
19017 /* Return TRUE or FALSE depending on whether the unary operator meets the
19018 appropriate constraints. */
19019
19020 bool
19021 ix86_unary_operator_ok (enum rtx_code,
19022 machine_mode,
19023 rtx operands[2])
19024 {
19025 /* If one of operands is memory, source and destination must match. */
19026 if ((MEM_P (operands[0])
19027 || MEM_P (operands[1]))
19028 && ! rtx_equal_p (operands[0], operands[1]))
19029 return false;
19030 return true;
19031 }
19032
19033 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19034 are ok, keeping in mind the possible movddup alternative. */
19035
19036 bool
19037 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19038 {
19039 if (MEM_P (operands[0]))
19040 return rtx_equal_p (operands[0], operands[1 + high]);
19041 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19042 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19043 return true;
19044 }
19045
19046 /* Post-reload splitter for converting an SF or DFmode value in an
19047 SSE register into an unsigned SImode. */
19048
19049 void
19050 ix86_split_convert_uns_si_sse (rtx operands[])
19051 {
19052 machine_mode vecmode;
19053 rtx value, large, zero_or_two31, input, two31, x;
19054
19055 large = operands[1];
19056 zero_or_two31 = operands[2];
19057 input = operands[3];
19058 two31 = operands[4];
19059 vecmode = GET_MODE (large);
19060 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19061
19062 /* Load up the value into the low element. We must ensure that the other
19063 elements are valid floats -- zero is the easiest such value. */
19064 if (MEM_P (input))
19065 {
19066 if (vecmode == V4SFmode)
19067 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19068 else
19069 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19070 }
19071 else
19072 {
19073 input = gen_rtx_REG (vecmode, REGNO (input));
19074 emit_move_insn (value, CONST0_RTX (vecmode));
19075 if (vecmode == V4SFmode)
19076 emit_insn (gen_sse_movss (value, value, input));
19077 else
19078 emit_insn (gen_sse2_movsd (value, value, input));
19079 }
19080
19081 emit_move_insn (large, two31);
19082 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19083
19084 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19085 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19086
19087 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19088 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19089
19090 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19091 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19092
19093 large = gen_rtx_REG (V4SImode, REGNO (large));
19094 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19095
19096 x = gen_rtx_REG (V4SImode, REGNO (value));
19097 if (vecmode == V4SFmode)
19098 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19099 else
19100 emit_insn (gen_sse2_cvttpd2dq (x, value));
19101 value = x;
19102
19103 emit_insn (gen_xorv4si3 (value, value, large));
19104 }
19105
19106 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19107 Expects the 64-bit DImode to be supplied in a pair of integral
19108 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19109 -mfpmath=sse, !optimize_size only. */
19110
19111 void
19112 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19113 {
19114 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19115 rtx int_xmm, fp_xmm;
19116 rtx biases, exponents;
19117 rtx x;
19118
19119 int_xmm = gen_reg_rtx (V4SImode);
19120 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19121 emit_insn (gen_movdi_to_sse (int_xmm, input));
19122 else if (TARGET_SSE_SPLIT_REGS)
19123 {
19124 emit_clobber (int_xmm);
19125 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19126 }
19127 else
19128 {
19129 x = gen_reg_rtx (V2DImode);
19130 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19131 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19132 }
19133
19134 x = gen_rtx_CONST_VECTOR (V4SImode,
19135 gen_rtvec (4, GEN_INT (0x43300000UL),
19136 GEN_INT (0x45300000UL),
19137 const0_rtx, const0_rtx));
19138 exponents = validize_mem (force_const_mem (V4SImode, x));
19139
19140 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19141 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19142
19143 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19144 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19145 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19146 (0x1.0p84 + double(fp_value_hi_xmm)).
19147 Note these exponents differ by 32. */
19148
19149 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19150
19151 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19152 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19153 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19154 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19155 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19156 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19157 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19158 biases = validize_mem (force_const_mem (V2DFmode, biases));
19159 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19160
19161 /* Add the upper and lower DFmode values together. */
19162 if (TARGET_SSE3)
19163 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19164 else
19165 {
19166 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19167 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19168 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19169 }
19170
19171 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19172 }
19173
19174 /* Not used, but eases macroization of patterns. */
19175 void
19176 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19177 {
19178 gcc_unreachable ();
19179 }
19180
19181 /* Convert an unsigned SImode value into a DFmode. Only currently used
19182 for SSE, but applicable anywhere. */
19183
19184 void
19185 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19186 {
19187 REAL_VALUE_TYPE TWO31r;
19188 rtx x, fp;
19189
19190 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19191 NULL, 1, OPTAB_DIRECT);
19192
19193 fp = gen_reg_rtx (DFmode);
19194 emit_insn (gen_floatsidf2 (fp, x));
19195
19196 real_ldexp (&TWO31r, &dconst1, 31);
19197 x = const_double_from_real_value (TWO31r, DFmode);
19198
19199 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19200 if (x != target)
19201 emit_move_insn (target, x);
19202 }
19203
19204 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19205 32-bit mode; otherwise we have a direct convert instruction. */
19206
19207 void
19208 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19209 {
19210 REAL_VALUE_TYPE TWO32r;
19211 rtx fp_lo, fp_hi, x;
19212
19213 fp_lo = gen_reg_rtx (DFmode);
19214 fp_hi = gen_reg_rtx (DFmode);
19215
19216 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19217
19218 real_ldexp (&TWO32r, &dconst1, 32);
19219 x = const_double_from_real_value (TWO32r, DFmode);
19220 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19221
19222 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19223
19224 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19225 0, OPTAB_DIRECT);
19226 if (x != target)
19227 emit_move_insn (target, x);
19228 }
19229
19230 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19231 For x86_32, -mfpmath=sse, !optimize_size only. */
19232 void
19233 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19234 {
19235 REAL_VALUE_TYPE ONE16r;
19236 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19237
19238 real_ldexp (&ONE16r, &dconst1, 16);
19239 x = const_double_from_real_value (ONE16r, SFmode);
19240 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19241 NULL, 0, OPTAB_DIRECT);
19242 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19243 NULL, 0, OPTAB_DIRECT);
19244 fp_hi = gen_reg_rtx (SFmode);
19245 fp_lo = gen_reg_rtx (SFmode);
19246 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19247 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19248 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19249 0, OPTAB_DIRECT);
19250 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19251 0, OPTAB_DIRECT);
19252 if (!rtx_equal_p (target, fp_hi))
19253 emit_move_insn (target, fp_hi);
19254 }
19255
19256 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19257 a vector of unsigned ints VAL to vector of floats TARGET. */
19258
19259 void
19260 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19261 {
19262 rtx tmp[8];
19263 REAL_VALUE_TYPE TWO16r;
19264 machine_mode intmode = GET_MODE (val);
19265 machine_mode fltmode = GET_MODE (target);
19266 rtx (*cvt) (rtx, rtx);
19267
19268 if (intmode == V4SImode)
19269 cvt = gen_floatv4siv4sf2;
19270 else
19271 cvt = gen_floatv8siv8sf2;
19272 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19273 tmp[0] = force_reg (intmode, tmp[0]);
19274 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19275 OPTAB_DIRECT);
19276 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19277 NULL_RTX, 1, OPTAB_DIRECT);
19278 tmp[3] = gen_reg_rtx (fltmode);
19279 emit_insn (cvt (tmp[3], tmp[1]));
19280 tmp[4] = gen_reg_rtx (fltmode);
19281 emit_insn (cvt (tmp[4], tmp[2]));
19282 real_ldexp (&TWO16r, &dconst1, 16);
19283 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19284 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19285 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19286 OPTAB_DIRECT);
19287 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19288 OPTAB_DIRECT);
19289 if (tmp[7] != target)
19290 emit_move_insn (target, tmp[7]);
19291 }
19292
19293 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19294 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19295 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19296 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19297
19298 rtx
19299 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19300 {
19301 REAL_VALUE_TYPE TWO31r;
19302 rtx two31r, tmp[4];
19303 machine_mode mode = GET_MODE (val);
19304 machine_mode scalarmode = GET_MODE_INNER (mode);
19305 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19306 rtx (*cmp) (rtx, rtx, rtx, rtx);
19307 int i;
19308
19309 for (i = 0; i < 3; i++)
19310 tmp[i] = gen_reg_rtx (mode);
19311 real_ldexp (&TWO31r, &dconst1, 31);
19312 two31r = const_double_from_real_value (TWO31r, scalarmode);
19313 two31r = ix86_build_const_vector (mode, 1, two31r);
19314 two31r = force_reg (mode, two31r);
19315 switch (mode)
19316 {
19317 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19318 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19319 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19320 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19321 default: gcc_unreachable ();
19322 }
19323 tmp[3] = gen_rtx_LE (mode, two31r, val);
19324 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19325 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19326 0, OPTAB_DIRECT);
19327 if (intmode == V4SImode || TARGET_AVX2)
19328 *xorp = expand_simple_binop (intmode, ASHIFT,
19329 gen_lowpart (intmode, tmp[0]),
19330 GEN_INT (31), NULL_RTX, 0,
19331 OPTAB_DIRECT);
19332 else
19333 {
19334 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19335 two31 = ix86_build_const_vector (intmode, 1, two31);
19336 *xorp = expand_simple_binop (intmode, AND,
19337 gen_lowpart (intmode, tmp[0]),
19338 two31, NULL_RTX, 0,
19339 OPTAB_DIRECT);
19340 }
19341 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19342 0, OPTAB_DIRECT);
19343 }
19344
19345 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19346 then replicate the value for all elements of the vector
19347 register. */
19348
19349 rtx
19350 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19351 {
19352 int i, n_elt;
19353 rtvec v;
19354 machine_mode scalar_mode;
19355
19356 switch (mode)
19357 {
19358 case V64QImode:
19359 case V32QImode:
19360 case V16QImode:
19361 case V32HImode:
19362 case V16HImode:
19363 case V8HImode:
19364 case V16SImode:
19365 case V8SImode:
19366 case V4SImode:
19367 case V8DImode:
19368 case V4DImode:
19369 case V2DImode:
19370 gcc_assert (vect);
19371 case V16SFmode:
19372 case V8SFmode:
19373 case V4SFmode:
19374 case V8DFmode:
19375 case V4DFmode:
19376 case V2DFmode:
19377 n_elt = GET_MODE_NUNITS (mode);
19378 v = rtvec_alloc (n_elt);
19379 scalar_mode = GET_MODE_INNER (mode);
19380
19381 RTVEC_ELT (v, 0) = value;
19382
19383 for (i = 1; i < n_elt; ++i)
19384 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19385
19386 return gen_rtx_CONST_VECTOR (mode, v);
19387
19388 default:
19389 gcc_unreachable ();
19390 }
19391 }
19392
19393 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19394 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19395 for an SSE register. If VECT is true, then replicate the mask for
19396 all elements of the vector register. If INVERT is true, then create
19397 a mask excluding the sign bit. */
19398
19399 rtx
19400 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19401 {
19402 machine_mode vec_mode, imode;
19403 HOST_WIDE_INT hi, lo;
19404 int shift = 63;
19405 rtx v;
19406 rtx mask;
19407
19408 /* Find the sign bit, sign extended to 2*HWI. */
19409 switch (mode)
19410 {
19411 case V16SImode:
19412 case V16SFmode:
19413 case V8SImode:
19414 case V4SImode:
19415 case V8SFmode:
19416 case V4SFmode:
19417 vec_mode = mode;
19418 mode = GET_MODE_INNER (mode);
19419 imode = SImode;
19420 lo = 0x80000000, hi = lo < 0;
19421 break;
19422
19423 case V8DImode:
19424 case V4DImode:
19425 case V2DImode:
19426 case V8DFmode:
19427 case V4DFmode:
19428 case V2DFmode:
19429 vec_mode = mode;
19430 mode = GET_MODE_INNER (mode);
19431 imode = DImode;
19432 if (HOST_BITS_PER_WIDE_INT >= 64)
19433 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19434 else
19435 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19436 break;
19437
19438 case TImode:
19439 case TFmode:
19440 vec_mode = VOIDmode;
19441 if (HOST_BITS_PER_WIDE_INT >= 64)
19442 {
19443 imode = TImode;
19444 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19445 }
19446 else
19447 {
19448 rtvec vec;
19449
19450 imode = DImode;
19451 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19452
19453 if (invert)
19454 {
19455 lo = ~lo, hi = ~hi;
19456 v = constm1_rtx;
19457 }
19458 else
19459 v = const0_rtx;
19460
19461 mask = immed_double_const (lo, hi, imode);
19462
19463 vec = gen_rtvec (2, v, mask);
19464 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19465 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19466
19467 return v;
19468 }
19469 break;
19470
19471 default:
19472 gcc_unreachable ();
19473 }
19474
19475 if (invert)
19476 lo = ~lo, hi = ~hi;
19477
19478 /* Force this value into the low part of a fp vector constant. */
19479 mask = immed_double_const (lo, hi, imode);
19480 mask = gen_lowpart (mode, mask);
19481
19482 if (vec_mode == VOIDmode)
19483 return force_reg (mode, mask);
19484
19485 v = ix86_build_const_vector (vec_mode, vect, mask);
19486 return force_reg (vec_mode, v);
19487 }
19488
19489 /* Generate code for floating point ABS or NEG. */
19490
19491 void
19492 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19493 rtx operands[])
19494 {
19495 rtx mask, set, dst, src;
19496 bool use_sse = false;
19497 bool vector_mode = VECTOR_MODE_P (mode);
19498 machine_mode vmode = mode;
19499
19500 if (vector_mode)
19501 use_sse = true;
19502 else if (mode == TFmode)
19503 use_sse = true;
19504 else if (TARGET_SSE_MATH)
19505 {
19506 use_sse = SSE_FLOAT_MODE_P (mode);
19507 if (mode == SFmode)
19508 vmode = V4SFmode;
19509 else if (mode == DFmode)
19510 vmode = V2DFmode;
19511 }
19512
19513 /* NEG and ABS performed with SSE use bitwise mask operations.
19514 Create the appropriate mask now. */
19515 if (use_sse)
19516 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19517 else
19518 mask = NULL_RTX;
19519
19520 dst = operands[0];
19521 src = operands[1];
19522
19523 set = gen_rtx_fmt_e (code, mode, src);
19524 set = gen_rtx_SET (VOIDmode, dst, set);
19525
19526 if (mask)
19527 {
19528 rtx use, clob;
19529 rtvec par;
19530
19531 use = gen_rtx_USE (VOIDmode, mask);
19532 if (vector_mode)
19533 par = gen_rtvec (2, set, use);
19534 else
19535 {
19536 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19537 par = gen_rtvec (3, set, use, clob);
19538 }
19539 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19540 }
19541 else
19542 emit_insn (set);
19543 }
19544
19545 /* Expand a copysign operation. Special case operand 0 being a constant. */
19546
19547 void
19548 ix86_expand_copysign (rtx operands[])
19549 {
19550 machine_mode mode, vmode;
19551 rtx dest, op0, op1, mask, nmask;
19552
19553 dest = operands[0];
19554 op0 = operands[1];
19555 op1 = operands[2];
19556
19557 mode = GET_MODE (dest);
19558
19559 if (mode == SFmode)
19560 vmode = V4SFmode;
19561 else if (mode == DFmode)
19562 vmode = V2DFmode;
19563 else
19564 vmode = mode;
19565
19566 if (GET_CODE (op0) == CONST_DOUBLE)
19567 {
19568 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19569
19570 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19571 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19572
19573 if (mode == SFmode || mode == DFmode)
19574 {
19575 if (op0 == CONST0_RTX (mode))
19576 op0 = CONST0_RTX (vmode);
19577 else
19578 {
19579 rtx v = ix86_build_const_vector (vmode, false, op0);
19580
19581 op0 = force_reg (vmode, v);
19582 }
19583 }
19584 else if (op0 != CONST0_RTX (mode))
19585 op0 = force_reg (mode, op0);
19586
19587 mask = ix86_build_signbit_mask (vmode, 0, 0);
19588
19589 if (mode == SFmode)
19590 copysign_insn = gen_copysignsf3_const;
19591 else if (mode == DFmode)
19592 copysign_insn = gen_copysigndf3_const;
19593 else
19594 copysign_insn = gen_copysigntf3_const;
19595
19596 emit_insn (copysign_insn (dest, op0, op1, mask));
19597 }
19598 else
19599 {
19600 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19601
19602 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19603 mask = ix86_build_signbit_mask (vmode, 0, 0);
19604
19605 if (mode == SFmode)
19606 copysign_insn = gen_copysignsf3_var;
19607 else if (mode == DFmode)
19608 copysign_insn = gen_copysigndf3_var;
19609 else
19610 copysign_insn = gen_copysigntf3_var;
19611
19612 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19613 }
19614 }
19615
19616 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19617 be a constant, and so has already been expanded into a vector constant. */
19618
19619 void
19620 ix86_split_copysign_const (rtx operands[])
19621 {
19622 machine_mode mode, vmode;
19623 rtx dest, op0, mask, x;
19624
19625 dest = operands[0];
19626 op0 = operands[1];
19627 mask = operands[3];
19628
19629 mode = GET_MODE (dest);
19630 vmode = GET_MODE (mask);
19631
19632 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19633 x = gen_rtx_AND (vmode, dest, mask);
19634 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19635
19636 if (op0 != CONST0_RTX (vmode))
19637 {
19638 x = gen_rtx_IOR (vmode, dest, op0);
19639 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19640 }
19641 }
19642
19643 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19644 so we have to do two masks. */
19645
19646 void
19647 ix86_split_copysign_var (rtx operands[])
19648 {
19649 machine_mode mode, vmode;
19650 rtx dest, scratch, op0, op1, mask, nmask, x;
19651
19652 dest = operands[0];
19653 scratch = operands[1];
19654 op0 = operands[2];
19655 op1 = operands[3];
19656 nmask = operands[4];
19657 mask = operands[5];
19658
19659 mode = GET_MODE (dest);
19660 vmode = GET_MODE (mask);
19661
19662 if (rtx_equal_p (op0, op1))
19663 {
19664 /* Shouldn't happen often (it's useless, obviously), but when it does
19665 we'd generate incorrect code if we continue below. */
19666 emit_move_insn (dest, op0);
19667 return;
19668 }
19669
19670 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19671 {
19672 gcc_assert (REGNO (op1) == REGNO (scratch));
19673
19674 x = gen_rtx_AND (vmode, scratch, mask);
19675 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19676
19677 dest = mask;
19678 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19679 x = gen_rtx_NOT (vmode, dest);
19680 x = gen_rtx_AND (vmode, x, op0);
19681 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19682 }
19683 else
19684 {
19685 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19686 {
19687 x = gen_rtx_AND (vmode, scratch, mask);
19688 }
19689 else /* alternative 2,4 */
19690 {
19691 gcc_assert (REGNO (mask) == REGNO (scratch));
19692 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19693 x = gen_rtx_AND (vmode, scratch, op1);
19694 }
19695 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19696
19697 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19698 {
19699 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19700 x = gen_rtx_AND (vmode, dest, nmask);
19701 }
19702 else /* alternative 3,4 */
19703 {
19704 gcc_assert (REGNO (nmask) == REGNO (dest));
19705 dest = nmask;
19706 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19707 x = gen_rtx_AND (vmode, dest, op0);
19708 }
19709 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19710 }
19711
19712 x = gen_rtx_IOR (vmode, dest, scratch);
19713 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19714 }
19715
19716 /* Return TRUE or FALSE depending on whether the first SET in INSN
19717 has source and destination with matching CC modes, and that the
19718 CC mode is at least as constrained as REQ_MODE. */
19719
19720 bool
19721 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19722 {
19723 rtx set;
19724 machine_mode set_mode;
19725
19726 set = PATTERN (insn);
19727 if (GET_CODE (set) == PARALLEL)
19728 set = XVECEXP (set, 0, 0);
19729 gcc_assert (GET_CODE (set) == SET);
19730 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19731
19732 set_mode = GET_MODE (SET_DEST (set));
19733 switch (set_mode)
19734 {
19735 case CCNOmode:
19736 if (req_mode != CCNOmode
19737 && (req_mode != CCmode
19738 || XEXP (SET_SRC (set), 1) != const0_rtx))
19739 return false;
19740 break;
19741 case CCmode:
19742 if (req_mode == CCGCmode)
19743 return false;
19744 /* FALLTHRU */
19745 case CCGCmode:
19746 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19747 return false;
19748 /* FALLTHRU */
19749 case CCGOCmode:
19750 if (req_mode == CCZmode)
19751 return false;
19752 /* FALLTHRU */
19753 case CCZmode:
19754 break;
19755
19756 case CCAmode:
19757 case CCCmode:
19758 case CCOmode:
19759 case CCSmode:
19760 if (set_mode != req_mode)
19761 return false;
19762 break;
19763
19764 default:
19765 gcc_unreachable ();
19766 }
19767
19768 return GET_MODE (SET_SRC (set)) == set_mode;
19769 }
19770
19771 /* Generate insn patterns to do an integer compare of OPERANDS. */
19772
19773 static rtx
19774 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19775 {
19776 machine_mode cmpmode;
19777 rtx tmp, flags;
19778
19779 cmpmode = SELECT_CC_MODE (code, op0, op1);
19780 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19781
19782 /* This is very simple, but making the interface the same as in the
19783 FP case makes the rest of the code easier. */
19784 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19785 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19786
19787 /* Return the test that should be put into the flags user, i.e.
19788 the bcc, scc, or cmov instruction. */
19789 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19790 }
19791
19792 /* Figure out whether to use ordered or unordered fp comparisons.
19793 Return the appropriate mode to use. */
19794
19795 machine_mode
19796 ix86_fp_compare_mode (enum rtx_code)
19797 {
19798 /* ??? In order to make all comparisons reversible, we do all comparisons
19799 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19800 all forms trapping and nontrapping comparisons, we can make inequality
19801 comparisons trapping again, since it results in better code when using
19802 FCOM based compares. */
19803 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19804 }
19805
19806 machine_mode
19807 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19808 {
19809 machine_mode mode = GET_MODE (op0);
19810
19811 if (SCALAR_FLOAT_MODE_P (mode))
19812 {
19813 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19814 return ix86_fp_compare_mode (code);
19815 }
19816
19817 switch (code)
19818 {
19819 /* Only zero flag is needed. */
19820 case EQ: /* ZF=0 */
19821 case NE: /* ZF!=0 */
19822 return CCZmode;
19823 /* Codes needing carry flag. */
19824 case GEU: /* CF=0 */
19825 case LTU: /* CF=1 */
19826 /* Detect overflow checks. They need just the carry flag. */
19827 if (GET_CODE (op0) == PLUS
19828 && rtx_equal_p (op1, XEXP (op0, 0)))
19829 return CCCmode;
19830 else
19831 return CCmode;
19832 case GTU: /* CF=0 & ZF=0 */
19833 case LEU: /* CF=1 | ZF=1 */
19834 return CCmode;
19835 /* Codes possibly doable only with sign flag when
19836 comparing against zero. */
19837 case GE: /* SF=OF or SF=0 */
19838 case LT: /* SF<>OF or SF=1 */
19839 if (op1 == const0_rtx)
19840 return CCGOCmode;
19841 else
19842 /* For other cases Carry flag is not required. */
19843 return CCGCmode;
19844 /* Codes doable only with sign flag when comparing
19845 against zero, but we miss jump instruction for it
19846 so we need to use relational tests against overflow
19847 that thus needs to be zero. */
19848 case GT: /* ZF=0 & SF=OF */
19849 case LE: /* ZF=1 | SF<>OF */
19850 if (op1 == const0_rtx)
19851 return CCNOmode;
19852 else
19853 return CCGCmode;
19854 /* strcmp pattern do (use flags) and combine may ask us for proper
19855 mode. */
19856 case USE:
19857 return CCmode;
19858 default:
19859 gcc_unreachable ();
19860 }
19861 }
19862
19863 /* Return the fixed registers used for condition codes. */
19864
19865 static bool
19866 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19867 {
19868 *p1 = FLAGS_REG;
19869 *p2 = FPSR_REG;
19870 return true;
19871 }
19872
19873 /* If two condition code modes are compatible, return a condition code
19874 mode which is compatible with both. Otherwise, return
19875 VOIDmode. */
19876
19877 static machine_mode
19878 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19879 {
19880 if (m1 == m2)
19881 return m1;
19882
19883 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19884 return VOIDmode;
19885
19886 if ((m1 == CCGCmode && m2 == CCGOCmode)
19887 || (m1 == CCGOCmode && m2 == CCGCmode))
19888 return CCGCmode;
19889
19890 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19891 return m2;
19892 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19893 return m1;
19894
19895 switch (m1)
19896 {
19897 default:
19898 gcc_unreachable ();
19899
19900 case CCmode:
19901 case CCGCmode:
19902 case CCGOCmode:
19903 case CCNOmode:
19904 case CCAmode:
19905 case CCCmode:
19906 case CCOmode:
19907 case CCSmode:
19908 case CCZmode:
19909 switch (m2)
19910 {
19911 default:
19912 return VOIDmode;
19913
19914 case CCmode:
19915 case CCGCmode:
19916 case CCGOCmode:
19917 case CCNOmode:
19918 case CCAmode:
19919 case CCCmode:
19920 case CCOmode:
19921 case CCSmode:
19922 case CCZmode:
19923 return CCmode;
19924 }
19925
19926 case CCFPmode:
19927 case CCFPUmode:
19928 /* These are only compatible with themselves, which we already
19929 checked above. */
19930 return VOIDmode;
19931 }
19932 }
19933
19934
19935 /* Return a comparison we can do and that it is equivalent to
19936 swap_condition (code) apart possibly from orderedness.
19937 But, never change orderedness if TARGET_IEEE_FP, returning
19938 UNKNOWN in that case if necessary. */
19939
19940 static enum rtx_code
19941 ix86_fp_swap_condition (enum rtx_code code)
19942 {
19943 switch (code)
19944 {
19945 case GT: /* GTU - CF=0 & ZF=0 */
19946 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19947 case GE: /* GEU - CF=0 */
19948 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19949 case UNLT: /* LTU - CF=1 */
19950 return TARGET_IEEE_FP ? UNKNOWN : GT;
19951 case UNLE: /* LEU - CF=1 | ZF=1 */
19952 return TARGET_IEEE_FP ? UNKNOWN : GE;
19953 default:
19954 return swap_condition (code);
19955 }
19956 }
19957
19958 /* Return cost of comparison CODE using the best strategy for performance.
19959 All following functions do use number of instructions as a cost metrics.
19960 In future this should be tweaked to compute bytes for optimize_size and
19961 take into account performance of various instructions on various CPUs. */
19962
19963 static int
19964 ix86_fp_comparison_cost (enum rtx_code code)
19965 {
19966 int arith_cost;
19967
19968 /* The cost of code using bit-twiddling on %ah. */
19969 switch (code)
19970 {
19971 case UNLE:
19972 case UNLT:
19973 case LTGT:
19974 case GT:
19975 case GE:
19976 case UNORDERED:
19977 case ORDERED:
19978 case UNEQ:
19979 arith_cost = 4;
19980 break;
19981 case LT:
19982 case NE:
19983 case EQ:
19984 case UNGE:
19985 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19986 break;
19987 case LE:
19988 case UNGT:
19989 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19990 break;
19991 default:
19992 gcc_unreachable ();
19993 }
19994
19995 switch (ix86_fp_comparison_strategy (code))
19996 {
19997 case IX86_FPCMP_COMI:
19998 return arith_cost > 4 ? 3 : 2;
19999 case IX86_FPCMP_SAHF:
20000 return arith_cost > 4 ? 4 : 3;
20001 default:
20002 return arith_cost;
20003 }
20004 }
20005
20006 /* Return strategy to use for floating-point. We assume that fcomi is always
20007 preferrable where available, since that is also true when looking at size
20008 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20009
20010 enum ix86_fpcmp_strategy
20011 ix86_fp_comparison_strategy (enum rtx_code)
20012 {
20013 /* Do fcomi/sahf based test when profitable. */
20014
20015 if (TARGET_CMOVE)
20016 return IX86_FPCMP_COMI;
20017
20018 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20019 return IX86_FPCMP_SAHF;
20020
20021 return IX86_FPCMP_ARITH;
20022 }
20023
20024 /* Swap, force into registers, or otherwise massage the two operands
20025 to a fp comparison. The operands are updated in place; the new
20026 comparison code is returned. */
20027
20028 static enum rtx_code
20029 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20030 {
20031 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20032 rtx op0 = *pop0, op1 = *pop1;
20033 machine_mode op_mode = GET_MODE (op0);
20034 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20035
20036 /* All of the unordered compare instructions only work on registers.
20037 The same is true of the fcomi compare instructions. The XFmode
20038 compare instructions require registers except when comparing
20039 against zero or when converting operand 1 from fixed point to
20040 floating point. */
20041
20042 if (!is_sse
20043 && (fpcmp_mode == CCFPUmode
20044 || (op_mode == XFmode
20045 && ! (standard_80387_constant_p (op0) == 1
20046 || standard_80387_constant_p (op1) == 1)
20047 && GET_CODE (op1) != FLOAT)
20048 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20049 {
20050 op0 = force_reg (op_mode, op0);
20051 op1 = force_reg (op_mode, op1);
20052 }
20053 else
20054 {
20055 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20056 things around if they appear profitable, otherwise force op0
20057 into a register. */
20058
20059 if (standard_80387_constant_p (op0) == 0
20060 || (MEM_P (op0)
20061 && ! (standard_80387_constant_p (op1) == 0
20062 || MEM_P (op1))))
20063 {
20064 enum rtx_code new_code = ix86_fp_swap_condition (code);
20065 if (new_code != UNKNOWN)
20066 {
20067 std::swap (op0, op1);
20068 code = new_code;
20069 }
20070 }
20071
20072 if (!REG_P (op0))
20073 op0 = force_reg (op_mode, op0);
20074
20075 if (CONSTANT_P (op1))
20076 {
20077 int tmp = standard_80387_constant_p (op1);
20078 if (tmp == 0)
20079 op1 = validize_mem (force_const_mem (op_mode, op1));
20080 else if (tmp == 1)
20081 {
20082 if (TARGET_CMOVE)
20083 op1 = force_reg (op_mode, op1);
20084 }
20085 else
20086 op1 = force_reg (op_mode, op1);
20087 }
20088 }
20089
20090 /* Try to rearrange the comparison to make it cheaper. */
20091 if (ix86_fp_comparison_cost (code)
20092 > ix86_fp_comparison_cost (swap_condition (code))
20093 && (REG_P (op1) || can_create_pseudo_p ()))
20094 {
20095 std::swap (op0, op1);
20096 code = swap_condition (code);
20097 if (!REG_P (op0))
20098 op0 = force_reg (op_mode, op0);
20099 }
20100
20101 *pop0 = op0;
20102 *pop1 = op1;
20103 return code;
20104 }
20105
20106 /* Convert comparison codes we use to represent FP comparison to integer
20107 code that will result in proper branch. Return UNKNOWN if no such code
20108 is available. */
20109
20110 enum rtx_code
20111 ix86_fp_compare_code_to_integer (enum rtx_code code)
20112 {
20113 switch (code)
20114 {
20115 case GT:
20116 return GTU;
20117 case GE:
20118 return GEU;
20119 case ORDERED:
20120 case UNORDERED:
20121 return code;
20122 break;
20123 case UNEQ:
20124 return EQ;
20125 break;
20126 case UNLT:
20127 return LTU;
20128 break;
20129 case UNLE:
20130 return LEU;
20131 break;
20132 case LTGT:
20133 return NE;
20134 break;
20135 default:
20136 return UNKNOWN;
20137 }
20138 }
20139
20140 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20141
20142 static rtx
20143 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20144 {
20145 machine_mode fpcmp_mode, intcmp_mode;
20146 rtx tmp, tmp2;
20147
20148 fpcmp_mode = ix86_fp_compare_mode (code);
20149 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20150
20151 /* Do fcomi/sahf based test when profitable. */
20152 switch (ix86_fp_comparison_strategy (code))
20153 {
20154 case IX86_FPCMP_COMI:
20155 intcmp_mode = fpcmp_mode;
20156 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20157 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20158 tmp);
20159 emit_insn (tmp);
20160 break;
20161
20162 case IX86_FPCMP_SAHF:
20163 intcmp_mode = fpcmp_mode;
20164 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20165 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20166 tmp);
20167
20168 if (!scratch)
20169 scratch = gen_reg_rtx (HImode);
20170 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20171 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20172 break;
20173
20174 case IX86_FPCMP_ARITH:
20175 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20176 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20177 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20178 if (!scratch)
20179 scratch = gen_reg_rtx (HImode);
20180 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20181
20182 /* In the unordered case, we have to check C2 for NaN's, which
20183 doesn't happen to work out to anything nice combination-wise.
20184 So do some bit twiddling on the value we've got in AH to come
20185 up with an appropriate set of condition codes. */
20186
20187 intcmp_mode = CCNOmode;
20188 switch (code)
20189 {
20190 case GT:
20191 case UNGT:
20192 if (code == GT || !TARGET_IEEE_FP)
20193 {
20194 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20195 code = EQ;
20196 }
20197 else
20198 {
20199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20200 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20201 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20202 intcmp_mode = CCmode;
20203 code = GEU;
20204 }
20205 break;
20206 case LT:
20207 case UNLT:
20208 if (code == LT && TARGET_IEEE_FP)
20209 {
20210 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20211 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20212 intcmp_mode = CCmode;
20213 code = EQ;
20214 }
20215 else
20216 {
20217 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20218 code = NE;
20219 }
20220 break;
20221 case GE:
20222 case UNGE:
20223 if (code == GE || !TARGET_IEEE_FP)
20224 {
20225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20226 code = EQ;
20227 }
20228 else
20229 {
20230 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20231 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20232 code = NE;
20233 }
20234 break;
20235 case LE:
20236 case UNLE:
20237 if (code == LE && TARGET_IEEE_FP)
20238 {
20239 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20240 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20241 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20242 intcmp_mode = CCmode;
20243 code = LTU;
20244 }
20245 else
20246 {
20247 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20248 code = NE;
20249 }
20250 break;
20251 case EQ:
20252 case UNEQ:
20253 if (code == EQ && TARGET_IEEE_FP)
20254 {
20255 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20256 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20257 intcmp_mode = CCmode;
20258 code = EQ;
20259 }
20260 else
20261 {
20262 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20263 code = NE;
20264 }
20265 break;
20266 case NE:
20267 case LTGT:
20268 if (code == NE && TARGET_IEEE_FP)
20269 {
20270 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20271 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20272 GEN_INT (0x40)));
20273 code = NE;
20274 }
20275 else
20276 {
20277 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20278 code = EQ;
20279 }
20280 break;
20281
20282 case UNORDERED:
20283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20284 code = NE;
20285 break;
20286 case ORDERED:
20287 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20288 code = EQ;
20289 break;
20290
20291 default:
20292 gcc_unreachable ();
20293 }
20294 break;
20295
20296 default:
20297 gcc_unreachable();
20298 }
20299
20300 /* Return the test that should be put into the flags user, i.e.
20301 the bcc, scc, or cmov instruction. */
20302 return gen_rtx_fmt_ee (code, VOIDmode,
20303 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20304 const0_rtx);
20305 }
20306
20307 static rtx
20308 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20309 {
20310 rtx ret;
20311
20312 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20313 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20314
20315 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20316 {
20317 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20318 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20319 }
20320 else
20321 ret = ix86_expand_int_compare (code, op0, op1);
20322
20323 return ret;
20324 }
20325
20326 void
20327 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20328 {
20329 machine_mode mode = GET_MODE (op0);
20330 rtx tmp;
20331
20332 switch (mode)
20333 {
20334 case SFmode:
20335 case DFmode:
20336 case XFmode:
20337 case QImode:
20338 case HImode:
20339 case SImode:
20340 simple:
20341 tmp = ix86_expand_compare (code, op0, op1);
20342 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20343 gen_rtx_LABEL_REF (VOIDmode, label),
20344 pc_rtx);
20345 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20346 return;
20347
20348 case DImode:
20349 if (TARGET_64BIT)
20350 goto simple;
20351 case TImode:
20352 /* Expand DImode branch into multiple compare+branch. */
20353 {
20354 rtx lo[2], hi[2];
20355 rtx_code_label *label2;
20356 enum rtx_code code1, code2, code3;
20357 machine_mode submode;
20358
20359 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20360 {
20361 std::swap (op0, op1);
20362 code = swap_condition (code);
20363 }
20364
20365 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20366 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20367
20368 submode = mode == DImode ? SImode : DImode;
20369
20370 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20371 avoid two branches. This costs one extra insn, so disable when
20372 optimizing for size. */
20373
20374 if ((code == EQ || code == NE)
20375 && (!optimize_insn_for_size_p ()
20376 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20377 {
20378 rtx xor0, xor1;
20379
20380 xor1 = hi[0];
20381 if (hi[1] != const0_rtx)
20382 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20383 NULL_RTX, 0, OPTAB_WIDEN);
20384
20385 xor0 = lo[0];
20386 if (lo[1] != const0_rtx)
20387 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20388 NULL_RTX, 0, OPTAB_WIDEN);
20389
20390 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20391 NULL_RTX, 0, OPTAB_WIDEN);
20392
20393 ix86_expand_branch (code, tmp, const0_rtx, label);
20394 return;
20395 }
20396
20397 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20398 op1 is a constant and the low word is zero, then we can just
20399 examine the high word. Similarly for low word -1 and
20400 less-or-equal-than or greater-than. */
20401
20402 if (CONST_INT_P (hi[1]))
20403 switch (code)
20404 {
20405 case LT: case LTU: case GE: case GEU:
20406 if (lo[1] == const0_rtx)
20407 {
20408 ix86_expand_branch (code, hi[0], hi[1], label);
20409 return;
20410 }
20411 break;
20412 case LE: case LEU: case GT: case GTU:
20413 if (lo[1] == constm1_rtx)
20414 {
20415 ix86_expand_branch (code, hi[0], hi[1], label);
20416 return;
20417 }
20418 break;
20419 default:
20420 break;
20421 }
20422
20423 /* Otherwise, we need two or three jumps. */
20424
20425 label2 = gen_label_rtx ();
20426
20427 code1 = code;
20428 code2 = swap_condition (code);
20429 code3 = unsigned_condition (code);
20430
20431 switch (code)
20432 {
20433 case LT: case GT: case LTU: case GTU:
20434 break;
20435
20436 case LE: code1 = LT; code2 = GT; break;
20437 case GE: code1 = GT; code2 = LT; break;
20438 case LEU: code1 = LTU; code2 = GTU; break;
20439 case GEU: code1 = GTU; code2 = LTU; break;
20440
20441 case EQ: code1 = UNKNOWN; code2 = NE; break;
20442 case NE: code2 = UNKNOWN; break;
20443
20444 default:
20445 gcc_unreachable ();
20446 }
20447
20448 /*
20449 * a < b =>
20450 * if (hi(a) < hi(b)) goto true;
20451 * if (hi(a) > hi(b)) goto false;
20452 * if (lo(a) < lo(b)) goto true;
20453 * false:
20454 */
20455
20456 if (code1 != UNKNOWN)
20457 ix86_expand_branch (code1, hi[0], hi[1], label);
20458 if (code2 != UNKNOWN)
20459 ix86_expand_branch (code2, hi[0], hi[1], label2);
20460
20461 ix86_expand_branch (code3, lo[0], lo[1], label);
20462
20463 if (code2 != UNKNOWN)
20464 emit_label (label2);
20465 return;
20466 }
20467
20468 default:
20469 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20470 goto simple;
20471 }
20472 }
20473
20474 /* Split branch based on floating point condition. */
20475 void
20476 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20477 rtx target1, rtx target2, rtx tmp)
20478 {
20479 rtx condition;
20480 rtx i;
20481
20482 if (target2 != pc_rtx)
20483 {
20484 rtx tmp = target2;
20485 code = reverse_condition_maybe_unordered (code);
20486 target2 = target1;
20487 target1 = tmp;
20488 }
20489
20490 condition = ix86_expand_fp_compare (code, op1, op2,
20491 tmp);
20492
20493 i = emit_jump_insn (gen_rtx_SET
20494 (VOIDmode, pc_rtx,
20495 gen_rtx_IF_THEN_ELSE (VOIDmode,
20496 condition, target1, target2)));
20497 if (split_branch_probability >= 0)
20498 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20499 }
20500
20501 void
20502 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20503 {
20504 rtx ret;
20505
20506 gcc_assert (GET_MODE (dest) == QImode);
20507
20508 ret = ix86_expand_compare (code, op0, op1);
20509 PUT_MODE (ret, QImode);
20510 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20511 }
20512
20513 /* Expand comparison setting or clearing carry flag. Return true when
20514 successful and set pop for the operation. */
20515 static bool
20516 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20517 {
20518 machine_mode mode =
20519 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20520
20521 /* Do not handle double-mode compares that go through special path. */
20522 if (mode == (TARGET_64BIT ? TImode : DImode))
20523 return false;
20524
20525 if (SCALAR_FLOAT_MODE_P (mode))
20526 {
20527 rtx compare_op;
20528 rtx_insn *compare_seq;
20529
20530 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20531
20532 /* Shortcut: following common codes never translate
20533 into carry flag compares. */
20534 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20535 || code == ORDERED || code == UNORDERED)
20536 return false;
20537
20538 /* These comparisons require zero flag; swap operands so they won't. */
20539 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20540 && !TARGET_IEEE_FP)
20541 {
20542 std::swap (op0, op1);
20543 code = swap_condition (code);
20544 }
20545
20546 /* Try to expand the comparison and verify that we end up with
20547 carry flag based comparison. This fails to be true only when
20548 we decide to expand comparison using arithmetic that is not
20549 too common scenario. */
20550 start_sequence ();
20551 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20552 compare_seq = get_insns ();
20553 end_sequence ();
20554
20555 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20556 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20557 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20558 else
20559 code = GET_CODE (compare_op);
20560
20561 if (code != LTU && code != GEU)
20562 return false;
20563
20564 emit_insn (compare_seq);
20565 *pop = compare_op;
20566 return true;
20567 }
20568
20569 if (!INTEGRAL_MODE_P (mode))
20570 return false;
20571
20572 switch (code)
20573 {
20574 case LTU:
20575 case GEU:
20576 break;
20577
20578 /* Convert a==0 into (unsigned)a<1. */
20579 case EQ:
20580 case NE:
20581 if (op1 != const0_rtx)
20582 return false;
20583 op1 = const1_rtx;
20584 code = (code == EQ ? LTU : GEU);
20585 break;
20586
20587 /* Convert a>b into b<a or a>=b-1. */
20588 case GTU:
20589 case LEU:
20590 if (CONST_INT_P (op1))
20591 {
20592 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20593 /* Bail out on overflow. We still can swap operands but that
20594 would force loading of the constant into register. */
20595 if (op1 == const0_rtx
20596 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20597 return false;
20598 code = (code == GTU ? GEU : LTU);
20599 }
20600 else
20601 {
20602 std::swap (op1, op0);
20603 code = (code == GTU ? LTU : GEU);
20604 }
20605 break;
20606
20607 /* Convert a>=0 into (unsigned)a<0x80000000. */
20608 case LT:
20609 case GE:
20610 if (mode == DImode || op1 != const0_rtx)
20611 return false;
20612 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20613 code = (code == LT ? GEU : LTU);
20614 break;
20615 case LE:
20616 case GT:
20617 if (mode == DImode || op1 != constm1_rtx)
20618 return false;
20619 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20620 code = (code == LE ? GEU : LTU);
20621 break;
20622
20623 default:
20624 return false;
20625 }
20626 /* Swapping operands may cause constant to appear as first operand. */
20627 if (!nonimmediate_operand (op0, VOIDmode))
20628 {
20629 if (!can_create_pseudo_p ())
20630 return false;
20631 op0 = force_reg (mode, op0);
20632 }
20633 *pop = ix86_expand_compare (code, op0, op1);
20634 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20635 return true;
20636 }
20637
20638 bool
20639 ix86_expand_int_movcc (rtx operands[])
20640 {
20641 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20642 rtx_insn *compare_seq;
20643 rtx compare_op;
20644 machine_mode mode = GET_MODE (operands[0]);
20645 bool sign_bit_compare_p = false;
20646 rtx op0 = XEXP (operands[1], 0);
20647 rtx op1 = XEXP (operands[1], 1);
20648
20649 if (GET_MODE (op0) == TImode
20650 || (GET_MODE (op0) == DImode
20651 && !TARGET_64BIT))
20652 return false;
20653
20654 start_sequence ();
20655 compare_op = ix86_expand_compare (code, op0, op1);
20656 compare_seq = get_insns ();
20657 end_sequence ();
20658
20659 compare_code = GET_CODE (compare_op);
20660
20661 if ((op1 == const0_rtx && (code == GE || code == LT))
20662 || (op1 == constm1_rtx && (code == GT || code == LE)))
20663 sign_bit_compare_p = true;
20664
20665 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20666 HImode insns, we'd be swallowed in word prefix ops. */
20667
20668 if ((mode != HImode || TARGET_FAST_PREFIX)
20669 && (mode != (TARGET_64BIT ? TImode : DImode))
20670 && CONST_INT_P (operands[2])
20671 && CONST_INT_P (operands[3]))
20672 {
20673 rtx out = operands[0];
20674 HOST_WIDE_INT ct = INTVAL (operands[2]);
20675 HOST_WIDE_INT cf = INTVAL (operands[3]);
20676 HOST_WIDE_INT diff;
20677
20678 diff = ct - cf;
20679 /* Sign bit compares are better done using shifts than we do by using
20680 sbb. */
20681 if (sign_bit_compare_p
20682 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20683 {
20684 /* Detect overlap between destination and compare sources. */
20685 rtx tmp = out;
20686
20687 if (!sign_bit_compare_p)
20688 {
20689 rtx flags;
20690 bool fpcmp = false;
20691
20692 compare_code = GET_CODE (compare_op);
20693
20694 flags = XEXP (compare_op, 0);
20695
20696 if (GET_MODE (flags) == CCFPmode
20697 || GET_MODE (flags) == CCFPUmode)
20698 {
20699 fpcmp = true;
20700 compare_code
20701 = ix86_fp_compare_code_to_integer (compare_code);
20702 }
20703
20704 /* To simplify rest of code, restrict to the GEU case. */
20705 if (compare_code == LTU)
20706 {
20707 HOST_WIDE_INT tmp = ct;
20708 ct = cf;
20709 cf = tmp;
20710 compare_code = reverse_condition (compare_code);
20711 code = reverse_condition (code);
20712 }
20713 else
20714 {
20715 if (fpcmp)
20716 PUT_CODE (compare_op,
20717 reverse_condition_maybe_unordered
20718 (GET_CODE (compare_op)));
20719 else
20720 PUT_CODE (compare_op,
20721 reverse_condition (GET_CODE (compare_op)));
20722 }
20723 diff = ct - cf;
20724
20725 if (reg_overlap_mentioned_p (out, op0)
20726 || reg_overlap_mentioned_p (out, op1))
20727 tmp = gen_reg_rtx (mode);
20728
20729 if (mode == DImode)
20730 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20731 else
20732 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20733 flags, compare_op));
20734 }
20735 else
20736 {
20737 if (code == GT || code == GE)
20738 code = reverse_condition (code);
20739 else
20740 {
20741 HOST_WIDE_INT tmp = ct;
20742 ct = cf;
20743 cf = tmp;
20744 diff = ct - cf;
20745 }
20746 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20747 }
20748
20749 if (diff == 1)
20750 {
20751 /*
20752 * cmpl op0,op1
20753 * sbbl dest,dest
20754 * [addl dest, ct]
20755 *
20756 * Size 5 - 8.
20757 */
20758 if (ct)
20759 tmp = expand_simple_binop (mode, PLUS,
20760 tmp, GEN_INT (ct),
20761 copy_rtx (tmp), 1, OPTAB_DIRECT);
20762 }
20763 else if (cf == -1)
20764 {
20765 /*
20766 * cmpl op0,op1
20767 * sbbl dest,dest
20768 * orl $ct, dest
20769 *
20770 * Size 8.
20771 */
20772 tmp = expand_simple_binop (mode, IOR,
20773 tmp, GEN_INT (ct),
20774 copy_rtx (tmp), 1, OPTAB_DIRECT);
20775 }
20776 else if (diff == -1 && ct)
20777 {
20778 /*
20779 * cmpl op0,op1
20780 * sbbl dest,dest
20781 * notl dest
20782 * [addl dest, cf]
20783 *
20784 * Size 8 - 11.
20785 */
20786 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20787 if (cf)
20788 tmp = expand_simple_binop (mode, PLUS,
20789 copy_rtx (tmp), GEN_INT (cf),
20790 copy_rtx (tmp), 1, OPTAB_DIRECT);
20791 }
20792 else
20793 {
20794 /*
20795 * cmpl op0,op1
20796 * sbbl dest,dest
20797 * [notl dest]
20798 * andl cf - ct, dest
20799 * [addl dest, ct]
20800 *
20801 * Size 8 - 11.
20802 */
20803
20804 if (cf == 0)
20805 {
20806 cf = ct;
20807 ct = 0;
20808 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20809 }
20810
20811 tmp = expand_simple_binop (mode, AND,
20812 copy_rtx (tmp),
20813 gen_int_mode (cf - ct, mode),
20814 copy_rtx (tmp), 1, OPTAB_DIRECT);
20815 if (ct)
20816 tmp = expand_simple_binop (mode, PLUS,
20817 copy_rtx (tmp), GEN_INT (ct),
20818 copy_rtx (tmp), 1, OPTAB_DIRECT);
20819 }
20820
20821 if (!rtx_equal_p (tmp, out))
20822 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20823
20824 return true;
20825 }
20826
20827 if (diff < 0)
20828 {
20829 machine_mode cmp_mode = GET_MODE (op0);
20830
20831 std::swap (ct, cf);
20832 diff = -diff;
20833
20834 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20835 {
20836 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20837
20838 /* We may be reversing unordered compare to normal compare, that
20839 is not valid in general (we may convert non-trapping condition
20840 to trapping one), however on i386 we currently emit all
20841 comparisons unordered. */
20842 compare_code = reverse_condition_maybe_unordered (compare_code);
20843 code = reverse_condition_maybe_unordered (code);
20844 }
20845 else
20846 {
20847 compare_code = reverse_condition (compare_code);
20848 code = reverse_condition (code);
20849 }
20850 }
20851
20852 compare_code = UNKNOWN;
20853 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20854 && CONST_INT_P (op1))
20855 {
20856 if (op1 == const0_rtx
20857 && (code == LT || code == GE))
20858 compare_code = code;
20859 else if (op1 == constm1_rtx)
20860 {
20861 if (code == LE)
20862 compare_code = LT;
20863 else if (code == GT)
20864 compare_code = GE;
20865 }
20866 }
20867
20868 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20869 if (compare_code != UNKNOWN
20870 && GET_MODE (op0) == GET_MODE (out)
20871 && (cf == -1 || ct == -1))
20872 {
20873 /* If lea code below could be used, only optimize
20874 if it results in a 2 insn sequence. */
20875
20876 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20877 || diff == 3 || diff == 5 || diff == 9)
20878 || (compare_code == LT && ct == -1)
20879 || (compare_code == GE && cf == -1))
20880 {
20881 /*
20882 * notl op1 (if necessary)
20883 * sarl $31, op1
20884 * orl cf, op1
20885 */
20886 if (ct != -1)
20887 {
20888 cf = ct;
20889 ct = -1;
20890 code = reverse_condition (code);
20891 }
20892
20893 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20894
20895 out = expand_simple_binop (mode, IOR,
20896 out, GEN_INT (cf),
20897 out, 1, OPTAB_DIRECT);
20898 if (out != operands[0])
20899 emit_move_insn (operands[0], out);
20900
20901 return true;
20902 }
20903 }
20904
20905
20906 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20907 || diff == 3 || diff == 5 || diff == 9)
20908 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20909 && (mode != DImode
20910 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20911 {
20912 /*
20913 * xorl dest,dest
20914 * cmpl op1,op2
20915 * setcc dest
20916 * lea cf(dest*(ct-cf)),dest
20917 *
20918 * Size 14.
20919 *
20920 * This also catches the degenerate setcc-only case.
20921 */
20922
20923 rtx tmp;
20924 int nops;
20925
20926 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20927
20928 nops = 0;
20929 /* On x86_64 the lea instruction operates on Pmode, so we need
20930 to get arithmetics done in proper mode to match. */
20931 if (diff == 1)
20932 tmp = copy_rtx (out);
20933 else
20934 {
20935 rtx out1;
20936 out1 = copy_rtx (out);
20937 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20938 nops++;
20939 if (diff & 1)
20940 {
20941 tmp = gen_rtx_PLUS (mode, tmp, out1);
20942 nops++;
20943 }
20944 }
20945 if (cf != 0)
20946 {
20947 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20948 nops++;
20949 }
20950 if (!rtx_equal_p (tmp, out))
20951 {
20952 if (nops == 1)
20953 out = force_operand (tmp, copy_rtx (out));
20954 else
20955 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20956 }
20957 if (!rtx_equal_p (out, operands[0]))
20958 emit_move_insn (operands[0], copy_rtx (out));
20959
20960 return true;
20961 }
20962
20963 /*
20964 * General case: Jumpful:
20965 * xorl dest,dest cmpl op1, op2
20966 * cmpl op1, op2 movl ct, dest
20967 * setcc dest jcc 1f
20968 * decl dest movl cf, dest
20969 * andl (cf-ct),dest 1:
20970 * addl ct,dest
20971 *
20972 * Size 20. Size 14.
20973 *
20974 * This is reasonably steep, but branch mispredict costs are
20975 * high on modern cpus, so consider failing only if optimizing
20976 * for space.
20977 */
20978
20979 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20980 && BRANCH_COST (optimize_insn_for_speed_p (),
20981 false) >= 2)
20982 {
20983 if (cf == 0)
20984 {
20985 machine_mode cmp_mode = GET_MODE (op0);
20986
20987 cf = ct;
20988 ct = 0;
20989
20990 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20991 {
20992 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20993
20994 /* We may be reversing unordered compare to normal compare,
20995 that is not valid in general (we may convert non-trapping
20996 condition to trapping one), however on i386 we currently
20997 emit all comparisons unordered. */
20998 code = reverse_condition_maybe_unordered (code);
20999 }
21000 else
21001 {
21002 code = reverse_condition (code);
21003 if (compare_code != UNKNOWN)
21004 compare_code = reverse_condition (compare_code);
21005 }
21006 }
21007
21008 if (compare_code != UNKNOWN)
21009 {
21010 /* notl op1 (if needed)
21011 sarl $31, op1
21012 andl (cf-ct), op1
21013 addl ct, op1
21014
21015 For x < 0 (resp. x <= -1) there will be no notl,
21016 so if possible swap the constants to get rid of the
21017 complement.
21018 True/false will be -1/0 while code below (store flag
21019 followed by decrement) is 0/-1, so the constants need
21020 to be exchanged once more. */
21021
21022 if (compare_code == GE || !cf)
21023 {
21024 code = reverse_condition (code);
21025 compare_code = LT;
21026 }
21027 else
21028 std::swap (cf, ct);
21029
21030 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21031 }
21032 else
21033 {
21034 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21035
21036 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21037 constm1_rtx,
21038 copy_rtx (out), 1, OPTAB_DIRECT);
21039 }
21040
21041 out = expand_simple_binop (mode, AND, copy_rtx (out),
21042 gen_int_mode (cf - ct, mode),
21043 copy_rtx (out), 1, OPTAB_DIRECT);
21044 if (ct)
21045 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21046 copy_rtx (out), 1, OPTAB_DIRECT);
21047 if (!rtx_equal_p (out, operands[0]))
21048 emit_move_insn (operands[0], copy_rtx (out));
21049
21050 return true;
21051 }
21052 }
21053
21054 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21055 {
21056 /* Try a few things more with specific constants and a variable. */
21057
21058 optab op;
21059 rtx var, orig_out, out, tmp;
21060
21061 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21062 return false;
21063
21064 /* If one of the two operands is an interesting constant, load a
21065 constant with the above and mask it in with a logical operation. */
21066
21067 if (CONST_INT_P (operands[2]))
21068 {
21069 var = operands[3];
21070 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21071 operands[3] = constm1_rtx, op = and_optab;
21072 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21073 operands[3] = const0_rtx, op = ior_optab;
21074 else
21075 return false;
21076 }
21077 else if (CONST_INT_P (operands[3]))
21078 {
21079 var = operands[2];
21080 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21081 operands[2] = constm1_rtx, op = and_optab;
21082 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21083 operands[2] = const0_rtx, op = ior_optab;
21084 else
21085 return false;
21086 }
21087 else
21088 return false;
21089
21090 orig_out = operands[0];
21091 tmp = gen_reg_rtx (mode);
21092 operands[0] = tmp;
21093
21094 /* Recurse to get the constant loaded. */
21095 if (ix86_expand_int_movcc (operands) == 0)
21096 return false;
21097
21098 /* Mask in the interesting variable. */
21099 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21100 OPTAB_WIDEN);
21101 if (!rtx_equal_p (out, orig_out))
21102 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21103
21104 return true;
21105 }
21106
21107 /*
21108 * For comparison with above,
21109 *
21110 * movl cf,dest
21111 * movl ct,tmp
21112 * cmpl op1,op2
21113 * cmovcc tmp,dest
21114 *
21115 * Size 15.
21116 */
21117
21118 if (! nonimmediate_operand (operands[2], mode))
21119 operands[2] = force_reg (mode, operands[2]);
21120 if (! nonimmediate_operand (operands[3], mode))
21121 operands[3] = force_reg (mode, operands[3]);
21122
21123 if (! register_operand (operands[2], VOIDmode)
21124 && (mode == QImode
21125 || ! register_operand (operands[3], VOIDmode)))
21126 operands[2] = force_reg (mode, operands[2]);
21127
21128 if (mode == QImode
21129 && ! register_operand (operands[3], VOIDmode))
21130 operands[3] = force_reg (mode, operands[3]);
21131
21132 emit_insn (compare_seq);
21133 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21134 gen_rtx_IF_THEN_ELSE (mode,
21135 compare_op, operands[2],
21136 operands[3])));
21137 return true;
21138 }
21139
21140 /* Swap, force into registers, or otherwise massage the two operands
21141 to an sse comparison with a mask result. Thus we differ a bit from
21142 ix86_prepare_fp_compare_args which expects to produce a flags result.
21143
21144 The DEST operand exists to help determine whether to commute commutative
21145 operators. The POP0/POP1 operands are updated in place. The new
21146 comparison code is returned, or UNKNOWN if not implementable. */
21147
21148 static enum rtx_code
21149 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21150 rtx *pop0, rtx *pop1)
21151 {
21152 switch (code)
21153 {
21154 case LTGT:
21155 case UNEQ:
21156 /* AVX supports all the needed comparisons. */
21157 if (TARGET_AVX)
21158 break;
21159 /* We have no LTGT as an operator. We could implement it with
21160 NE & ORDERED, but this requires an extra temporary. It's
21161 not clear that it's worth it. */
21162 return UNKNOWN;
21163
21164 case LT:
21165 case LE:
21166 case UNGT:
21167 case UNGE:
21168 /* These are supported directly. */
21169 break;
21170
21171 case EQ:
21172 case NE:
21173 case UNORDERED:
21174 case ORDERED:
21175 /* AVX has 3 operand comparisons, no need to swap anything. */
21176 if (TARGET_AVX)
21177 break;
21178 /* For commutative operators, try to canonicalize the destination
21179 operand to be first in the comparison - this helps reload to
21180 avoid extra moves. */
21181 if (!dest || !rtx_equal_p (dest, *pop1))
21182 break;
21183 /* FALLTHRU */
21184
21185 case GE:
21186 case GT:
21187 case UNLE:
21188 case UNLT:
21189 /* These are not supported directly before AVX, and furthermore
21190 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21191 comparison operands to transform into something that is
21192 supported. */
21193 std::swap (*pop0, *pop1);
21194 code = swap_condition (code);
21195 break;
21196
21197 default:
21198 gcc_unreachable ();
21199 }
21200
21201 return code;
21202 }
21203
21204 /* Detect conditional moves that exactly match min/max operational
21205 semantics. Note that this is IEEE safe, as long as we don't
21206 interchange the operands.
21207
21208 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21209 and TRUE if the operation is successful and instructions are emitted. */
21210
21211 static bool
21212 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21213 rtx cmp_op1, rtx if_true, rtx if_false)
21214 {
21215 machine_mode mode;
21216 bool is_min;
21217 rtx tmp;
21218
21219 if (code == LT)
21220 ;
21221 else if (code == UNGE)
21222 std::swap (if_true, if_false);
21223 else
21224 return false;
21225
21226 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21227 is_min = true;
21228 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21229 is_min = false;
21230 else
21231 return false;
21232
21233 mode = GET_MODE (dest);
21234
21235 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21236 but MODE may be a vector mode and thus not appropriate. */
21237 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21238 {
21239 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21240 rtvec v;
21241
21242 if_true = force_reg (mode, if_true);
21243 v = gen_rtvec (2, if_true, if_false);
21244 tmp = gen_rtx_UNSPEC (mode, v, u);
21245 }
21246 else
21247 {
21248 code = is_min ? SMIN : SMAX;
21249 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21250 }
21251
21252 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21253 return true;
21254 }
21255
21256 /* Expand an sse vector comparison. Return the register with the result. */
21257
21258 static rtx
21259 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21260 rtx op_true, rtx op_false)
21261 {
21262 machine_mode mode = GET_MODE (dest);
21263 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21264
21265 /* In general case result of comparison can differ from operands' type. */
21266 machine_mode cmp_mode;
21267
21268 /* In AVX512F the result of comparison is an integer mask. */
21269 bool maskcmp = false;
21270 rtx x;
21271
21272 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21273 {
21274 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21275 gcc_assert (cmp_mode != BLKmode);
21276
21277 maskcmp = true;
21278 }
21279 else
21280 cmp_mode = cmp_ops_mode;
21281
21282
21283 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21284 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21285 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21286
21287 if (optimize
21288 || reg_overlap_mentioned_p (dest, op_true)
21289 || reg_overlap_mentioned_p (dest, op_false))
21290 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21291
21292 /* Compare patterns for int modes are unspec in AVX512F only. */
21293 if (maskcmp && (code == GT || code == EQ))
21294 {
21295 rtx (*gen)(rtx, rtx, rtx);
21296
21297 switch (cmp_ops_mode)
21298 {
21299 case V16SImode:
21300 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21301 break;
21302 case V8DImode:
21303 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21304 break;
21305 default:
21306 gen = NULL;
21307 }
21308
21309 if (gen)
21310 {
21311 emit_insn (gen (dest, cmp_op0, cmp_op1));
21312 return dest;
21313 }
21314 }
21315 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21316
21317 if (cmp_mode != mode && !maskcmp)
21318 {
21319 x = force_reg (cmp_ops_mode, x);
21320 convert_move (dest, x, false);
21321 }
21322 else
21323 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21324
21325 return dest;
21326 }
21327
21328 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21329 operations. This is used for both scalar and vector conditional moves. */
21330
21331 static void
21332 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21333 {
21334 machine_mode mode = GET_MODE (dest);
21335 machine_mode cmpmode = GET_MODE (cmp);
21336
21337 /* In AVX512F the result of comparison is an integer mask. */
21338 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21339
21340 rtx t2, t3, x;
21341
21342 if (vector_all_ones_operand (op_true, mode)
21343 && rtx_equal_p (op_false, CONST0_RTX (mode))
21344 && !maskcmp)
21345 {
21346 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21347 }
21348 else if (op_false == CONST0_RTX (mode)
21349 && !maskcmp)
21350 {
21351 op_true = force_reg (mode, op_true);
21352 x = gen_rtx_AND (mode, cmp, op_true);
21353 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21354 }
21355 else if (op_true == CONST0_RTX (mode)
21356 && !maskcmp)
21357 {
21358 op_false = force_reg (mode, op_false);
21359 x = gen_rtx_NOT (mode, cmp);
21360 x = gen_rtx_AND (mode, x, op_false);
21361 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21362 }
21363 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21364 && !maskcmp)
21365 {
21366 op_false = force_reg (mode, op_false);
21367 x = gen_rtx_IOR (mode, cmp, op_false);
21368 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21369 }
21370 else if (TARGET_XOP
21371 && !maskcmp)
21372 {
21373 op_true = force_reg (mode, op_true);
21374
21375 if (!nonimmediate_operand (op_false, mode))
21376 op_false = force_reg (mode, op_false);
21377
21378 emit_insn (gen_rtx_SET (mode, dest,
21379 gen_rtx_IF_THEN_ELSE (mode, cmp,
21380 op_true,
21381 op_false)));
21382 }
21383 else
21384 {
21385 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21386 rtx d = dest;
21387
21388 if (!nonimmediate_operand (op_true, mode))
21389 op_true = force_reg (mode, op_true);
21390
21391 op_false = force_reg (mode, op_false);
21392
21393 switch (mode)
21394 {
21395 case V4SFmode:
21396 if (TARGET_SSE4_1)
21397 gen = gen_sse4_1_blendvps;
21398 break;
21399 case V2DFmode:
21400 if (TARGET_SSE4_1)
21401 gen = gen_sse4_1_blendvpd;
21402 break;
21403 case V16QImode:
21404 case V8HImode:
21405 case V4SImode:
21406 case V2DImode:
21407 if (TARGET_SSE4_1)
21408 {
21409 gen = gen_sse4_1_pblendvb;
21410 if (mode != V16QImode)
21411 d = gen_reg_rtx (V16QImode);
21412 op_false = gen_lowpart (V16QImode, op_false);
21413 op_true = gen_lowpart (V16QImode, op_true);
21414 cmp = gen_lowpart (V16QImode, cmp);
21415 }
21416 break;
21417 case V8SFmode:
21418 if (TARGET_AVX)
21419 gen = gen_avx_blendvps256;
21420 break;
21421 case V4DFmode:
21422 if (TARGET_AVX)
21423 gen = gen_avx_blendvpd256;
21424 break;
21425 case V32QImode:
21426 case V16HImode:
21427 case V8SImode:
21428 case V4DImode:
21429 if (TARGET_AVX2)
21430 {
21431 gen = gen_avx2_pblendvb;
21432 if (mode != V32QImode)
21433 d = gen_reg_rtx (V32QImode);
21434 op_false = gen_lowpart (V32QImode, op_false);
21435 op_true = gen_lowpart (V32QImode, op_true);
21436 cmp = gen_lowpart (V32QImode, cmp);
21437 }
21438 break;
21439
21440 case V64QImode:
21441 gen = gen_avx512bw_blendmv64qi;
21442 break;
21443 case V32HImode:
21444 gen = gen_avx512bw_blendmv32hi;
21445 break;
21446 case V16SImode:
21447 gen = gen_avx512f_blendmv16si;
21448 break;
21449 case V8DImode:
21450 gen = gen_avx512f_blendmv8di;
21451 break;
21452 case V8DFmode:
21453 gen = gen_avx512f_blendmv8df;
21454 break;
21455 case V16SFmode:
21456 gen = gen_avx512f_blendmv16sf;
21457 break;
21458
21459 default:
21460 break;
21461 }
21462
21463 if (gen != NULL)
21464 {
21465 emit_insn (gen (d, op_false, op_true, cmp));
21466 if (d != dest)
21467 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21468 }
21469 else
21470 {
21471 op_true = force_reg (mode, op_true);
21472
21473 t2 = gen_reg_rtx (mode);
21474 if (optimize)
21475 t3 = gen_reg_rtx (mode);
21476 else
21477 t3 = dest;
21478
21479 x = gen_rtx_AND (mode, op_true, cmp);
21480 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21481
21482 x = gen_rtx_NOT (mode, cmp);
21483 x = gen_rtx_AND (mode, x, op_false);
21484 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21485
21486 x = gen_rtx_IOR (mode, t3, t2);
21487 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21488 }
21489 }
21490 }
21491
21492 /* Expand a floating-point conditional move. Return true if successful. */
21493
21494 bool
21495 ix86_expand_fp_movcc (rtx operands[])
21496 {
21497 machine_mode mode = GET_MODE (operands[0]);
21498 enum rtx_code code = GET_CODE (operands[1]);
21499 rtx tmp, compare_op;
21500 rtx op0 = XEXP (operands[1], 0);
21501 rtx op1 = XEXP (operands[1], 1);
21502
21503 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21504 {
21505 machine_mode cmode;
21506
21507 /* Since we've no cmove for sse registers, don't force bad register
21508 allocation just to gain access to it. Deny movcc when the
21509 comparison mode doesn't match the move mode. */
21510 cmode = GET_MODE (op0);
21511 if (cmode == VOIDmode)
21512 cmode = GET_MODE (op1);
21513 if (cmode != mode)
21514 return false;
21515
21516 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21517 if (code == UNKNOWN)
21518 return false;
21519
21520 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21521 operands[2], operands[3]))
21522 return true;
21523
21524 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21525 operands[2], operands[3]);
21526 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21527 return true;
21528 }
21529
21530 if (GET_MODE (op0) == TImode
21531 || (GET_MODE (op0) == DImode
21532 && !TARGET_64BIT))
21533 return false;
21534
21535 /* The floating point conditional move instructions don't directly
21536 support conditions resulting from a signed integer comparison. */
21537
21538 compare_op = ix86_expand_compare (code, op0, op1);
21539 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21540 {
21541 tmp = gen_reg_rtx (QImode);
21542 ix86_expand_setcc (tmp, code, op0, op1);
21543
21544 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21545 }
21546
21547 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21548 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21549 operands[2], operands[3])));
21550
21551 return true;
21552 }
21553
21554 /* Expand a floating-point vector conditional move; a vcond operation
21555 rather than a movcc operation. */
21556
21557 bool
21558 ix86_expand_fp_vcond (rtx operands[])
21559 {
21560 enum rtx_code code = GET_CODE (operands[3]);
21561 rtx cmp;
21562
21563 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21564 &operands[4], &operands[5]);
21565 if (code == UNKNOWN)
21566 {
21567 rtx temp;
21568 switch (GET_CODE (operands[3]))
21569 {
21570 case LTGT:
21571 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21572 operands[5], operands[0], operands[0]);
21573 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21574 operands[5], operands[1], operands[2]);
21575 code = AND;
21576 break;
21577 case UNEQ:
21578 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21579 operands[5], operands[0], operands[0]);
21580 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21581 operands[5], operands[1], operands[2]);
21582 code = IOR;
21583 break;
21584 default:
21585 gcc_unreachable ();
21586 }
21587 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21588 OPTAB_DIRECT);
21589 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21590 return true;
21591 }
21592
21593 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21594 operands[5], operands[1], operands[2]))
21595 return true;
21596
21597 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21598 operands[1], operands[2]);
21599 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21600 return true;
21601 }
21602
21603 /* Expand a signed/unsigned integral vector conditional move. */
21604
21605 bool
21606 ix86_expand_int_vcond (rtx operands[])
21607 {
21608 machine_mode data_mode = GET_MODE (operands[0]);
21609 machine_mode mode = GET_MODE (operands[4]);
21610 enum rtx_code code = GET_CODE (operands[3]);
21611 bool negate = false;
21612 rtx x, cop0, cop1;
21613
21614 cop0 = operands[4];
21615 cop1 = operands[5];
21616
21617 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21618 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21619 if ((code == LT || code == GE)
21620 && data_mode == mode
21621 && cop1 == CONST0_RTX (mode)
21622 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21623 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21624 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21625 && (GET_MODE_SIZE (data_mode) == 16
21626 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21627 {
21628 rtx negop = operands[2 - (code == LT)];
21629 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21630 if (negop == CONST1_RTX (data_mode))
21631 {
21632 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21633 operands[0], 1, OPTAB_DIRECT);
21634 if (res != operands[0])
21635 emit_move_insn (operands[0], res);
21636 return true;
21637 }
21638 else if (GET_MODE_INNER (data_mode) != DImode
21639 && vector_all_ones_operand (negop, data_mode))
21640 {
21641 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21642 operands[0], 0, OPTAB_DIRECT);
21643 if (res != operands[0])
21644 emit_move_insn (operands[0], res);
21645 return true;
21646 }
21647 }
21648
21649 if (!nonimmediate_operand (cop1, mode))
21650 cop1 = force_reg (mode, cop1);
21651 if (!general_operand (operands[1], data_mode))
21652 operands[1] = force_reg (data_mode, operands[1]);
21653 if (!general_operand (operands[2], data_mode))
21654 operands[2] = force_reg (data_mode, operands[2]);
21655
21656 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21657 if (TARGET_XOP
21658 && (mode == V16QImode || mode == V8HImode
21659 || mode == V4SImode || mode == V2DImode))
21660 ;
21661 else
21662 {
21663 /* Canonicalize the comparison to EQ, GT, GTU. */
21664 switch (code)
21665 {
21666 case EQ:
21667 case GT:
21668 case GTU:
21669 break;
21670
21671 case NE:
21672 case LE:
21673 case LEU:
21674 code = reverse_condition (code);
21675 negate = true;
21676 break;
21677
21678 case GE:
21679 case GEU:
21680 code = reverse_condition (code);
21681 negate = true;
21682 /* FALLTHRU */
21683
21684 case LT:
21685 case LTU:
21686 std::swap (cop0, cop1);
21687 code = swap_condition (code);
21688 break;
21689
21690 default:
21691 gcc_unreachable ();
21692 }
21693
21694 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21695 if (mode == V2DImode)
21696 {
21697 switch (code)
21698 {
21699 case EQ:
21700 /* SSE4.1 supports EQ. */
21701 if (!TARGET_SSE4_1)
21702 return false;
21703 break;
21704
21705 case GT:
21706 case GTU:
21707 /* SSE4.2 supports GT/GTU. */
21708 if (!TARGET_SSE4_2)
21709 return false;
21710 break;
21711
21712 default:
21713 gcc_unreachable ();
21714 }
21715 }
21716
21717 /* Unsigned parallel compare is not supported by the hardware.
21718 Play some tricks to turn this into a signed comparison
21719 against 0. */
21720 if (code == GTU)
21721 {
21722 cop0 = force_reg (mode, cop0);
21723
21724 switch (mode)
21725 {
21726 case V16SImode:
21727 case V8DImode:
21728 case V8SImode:
21729 case V4DImode:
21730 case V4SImode:
21731 case V2DImode:
21732 {
21733 rtx t1, t2, mask;
21734 rtx (*gen_sub3) (rtx, rtx, rtx);
21735
21736 switch (mode)
21737 {
21738 case V16SImode: gen_sub3 = gen_subv16si3; break;
21739 case V8DImode: gen_sub3 = gen_subv8di3; break;
21740 case V8SImode: gen_sub3 = gen_subv8si3; break;
21741 case V4DImode: gen_sub3 = gen_subv4di3; break;
21742 case V4SImode: gen_sub3 = gen_subv4si3; break;
21743 case V2DImode: gen_sub3 = gen_subv2di3; break;
21744 default:
21745 gcc_unreachable ();
21746 }
21747 /* Subtract (-(INT MAX) - 1) from both operands to make
21748 them signed. */
21749 mask = ix86_build_signbit_mask (mode, true, false);
21750 t1 = gen_reg_rtx (mode);
21751 emit_insn (gen_sub3 (t1, cop0, mask));
21752
21753 t2 = gen_reg_rtx (mode);
21754 emit_insn (gen_sub3 (t2, cop1, mask));
21755
21756 cop0 = t1;
21757 cop1 = t2;
21758 code = GT;
21759 }
21760 break;
21761
21762 case V64QImode:
21763 case V32HImode:
21764 case V32QImode:
21765 case V16HImode:
21766 case V16QImode:
21767 case V8HImode:
21768 /* Perform a parallel unsigned saturating subtraction. */
21769 x = gen_reg_rtx (mode);
21770 emit_insn (gen_rtx_SET (VOIDmode, x,
21771 gen_rtx_US_MINUS (mode, cop0, cop1)));
21772
21773 cop0 = x;
21774 cop1 = CONST0_RTX (mode);
21775 code = EQ;
21776 negate = !negate;
21777 break;
21778
21779 default:
21780 gcc_unreachable ();
21781 }
21782 }
21783 }
21784
21785 /* Allow the comparison to be done in one mode, but the movcc to
21786 happen in another mode. */
21787 if (data_mode == mode)
21788 {
21789 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21790 operands[1+negate], operands[2-negate]);
21791 }
21792 else
21793 {
21794 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21795 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21796 operands[1+negate], operands[2-negate]);
21797 if (GET_MODE (x) == mode)
21798 x = gen_lowpart (data_mode, x);
21799 }
21800
21801 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21802 operands[2-negate]);
21803 return true;
21804 }
21805
21806 /* AVX512F does support 64-byte integer vector operations,
21807 thus the longest vector we are faced with is V64QImode. */
21808 #define MAX_VECT_LEN 64
21809
21810 struct expand_vec_perm_d
21811 {
21812 rtx target, op0, op1;
21813 unsigned char perm[MAX_VECT_LEN];
21814 machine_mode vmode;
21815 unsigned char nelt;
21816 bool one_operand_p;
21817 bool testing_p;
21818 };
21819
21820 static bool
21821 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21822 struct expand_vec_perm_d *d)
21823 {
21824 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21825 expander, so args are either in d, or in op0, op1 etc. */
21826 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21827 machine_mode maskmode = mode;
21828 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21829
21830 switch (mode)
21831 {
21832 case V8HImode:
21833 if (TARGET_AVX512VL && TARGET_AVX512BW)
21834 gen = gen_avx512vl_vpermi2varv8hi3;
21835 break;
21836 case V16HImode:
21837 if (TARGET_AVX512VL && TARGET_AVX512BW)
21838 gen = gen_avx512vl_vpermi2varv16hi3;
21839 break;
21840 case V32HImode:
21841 if (TARGET_AVX512BW)
21842 gen = gen_avx512bw_vpermi2varv32hi3;
21843 break;
21844 case V4SImode:
21845 if (TARGET_AVX512VL)
21846 gen = gen_avx512vl_vpermi2varv4si3;
21847 break;
21848 case V8SImode:
21849 if (TARGET_AVX512VL)
21850 gen = gen_avx512vl_vpermi2varv8si3;
21851 break;
21852 case V16SImode:
21853 if (TARGET_AVX512F)
21854 gen = gen_avx512f_vpermi2varv16si3;
21855 break;
21856 case V4SFmode:
21857 if (TARGET_AVX512VL)
21858 {
21859 gen = gen_avx512vl_vpermi2varv4sf3;
21860 maskmode = V4SImode;
21861 }
21862 break;
21863 case V8SFmode:
21864 if (TARGET_AVX512VL)
21865 {
21866 gen = gen_avx512vl_vpermi2varv8sf3;
21867 maskmode = V8SImode;
21868 }
21869 break;
21870 case V16SFmode:
21871 if (TARGET_AVX512F)
21872 {
21873 gen = gen_avx512f_vpermi2varv16sf3;
21874 maskmode = V16SImode;
21875 }
21876 break;
21877 case V2DImode:
21878 if (TARGET_AVX512VL)
21879 gen = gen_avx512vl_vpermi2varv2di3;
21880 break;
21881 case V4DImode:
21882 if (TARGET_AVX512VL)
21883 gen = gen_avx512vl_vpermi2varv4di3;
21884 break;
21885 case V8DImode:
21886 if (TARGET_AVX512F)
21887 gen = gen_avx512f_vpermi2varv8di3;
21888 break;
21889 case V2DFmode:
21890 if (TARGET_AVX512VL)
21891 {
21892 gen = gen_avx512vl_vpermi2varv2df3;
21893 maskmode = V2DImode;
21894 }
21895 break;
21896 case V4DFmode:
21897 if (TARGET_AVX512VL)
21898 {
21899 gen = gen_avx512vl_vpermi2varv4df3;
21900 maskmode = V4DImode;
21901 }
21902 break;
21903 case V8DFmode:
21904 if (TARGET_AVX512F)
21905 {
21906 gen = gen_avx512f_vpermi2varv8df3;
21907 maskmode = V8DImode;
21908 }
21909 break;
21910 default:
21911 break;
21912 }
21913
21914 if (gen == NULL)
21915 return false;
21916
21917 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21918 expander, so args are either in d, or in op0, op1 etc. */
21919 if (d)
21920 {
21921 rtx vec[64];
21922 target = d->target;
21923 op0 = d->op0;
21924 op1 = d->op1;
21925 for (int i = 0; i < d->nelt; ++i)
21926 vec[i] = GEN_INT (d->perm[i]);
21927 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21928 }
21929
21930 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21931 return true;
21932 }
21933
21934 /* Expand a variable vector permutation. */
21935
21936 void
21937 ix86_expand_vec_perm (rtx operands[])
21938 {
21939 rtx target = operands[0];
21940 rtx op0 = operands[1];
21941 rtx op1 = operands[2];
21942 rtx mask = operands[3];
21943 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21944 machine_mode mode = GET_MODE (op0);
21945 machine_mode maskmode = GET_MODE (mask);
21946 int w, e, i;
21947 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21948
21949 /* Number of elements in the vector. */
21950 w = GET_MODE_NUNITS (mode);
21951 e = GET_MODE_UNIT_SIZE (mode);
21952 gcc_assert (w <= 64);
21953
21954 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21955 return;
21956
21957 if (TARGET_AVX2)
21958 {
21959 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21960 {
21961 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21962 an constant shuffle operand. With a tiny bit of effort we can
21963 use VPERMD instead. A re-interpretation stall for V4DFmode is
21964 unfortunate but there's no avoiding it.
21965 Similarly for V16HImode we don't have instructions for variable
21966 shuffling, while for V32QImode we can use after preparing suitable
21967 masks vpshufb; vpshufb; vpermq; vpor. */
21968
21969 if (mode == V16HImode)
21970 {
21971 maskmode = mode = V32QImode;
21972 w = 32;
21973 e = 1;
21974 }
21975 else
21976 {
21977 maskmode = mode = V8SImode;
21978 w = 8;
21979 e = 4;
21980 }
21981 t1 = gen_reg_rtx (maskmode);
21982
21983 /* Replicate the low bits of the V4DImode mask into V8SImode:
21984 mask = { A B C D }
21985 t1 = { A A B B C C D D }. */
21986 for (i = 0; i < w / 2; ++i)
21987 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21988 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21989 vt = force_reg (maskmode, vt);
21990 mask = gen_lowpart (maskmode, mask);
21991 if (maskmode == V8SImode)
21992 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21993 else
21994 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21995
21996 /* Multiply the shuffle indicies by two. */
21997 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21998 OPTAB_DIRECT);
21999
22000 /* Add one to the odd shuffle indicies:
22001 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22002 for (i = 0; i < w / 2; ++i)
22003 {
22004 vec[i * 2] = const0_rtx;
22005 vec[i * 2 + 1] = const1_rtx;
22006 }
22007 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22008 vt = validize_mem (force_const_mem (maskmode, vt));
22009 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22010 OPTAB_DIRECT);
22011
22012 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22013 operands[3] = mask = t1;
22014 target = gen_reg_rtx (mode);
22015 op0 = gen_lowpart (mode, op0);
22016 op1 = gen_lowpart (mode, op1);
22017 }
22018
22019 switch (mode)
22020 {
22021 case V8SImode:
22022 /* The VPERMD and VPERMPS instructions already properly ignore
22023 the high bits of the shuffle elements. No need for us to
22024 perform an AND ourselves. */
22025 if (one_operand_shuffle)
22026 {
22027 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22028 if (target != operands[0])
22029 emit_move_insn (operands[0],
22030 gen_lowpart (GET_MODE (operands[0]), target));
22031 }
22032 else
22033 {
22034 t1 = gen_reg_rtx (V8SImode);
22035 t2 = gen_reg_rtx (V8SImode);
22036 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22037 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22038 goto merge_two;
22039 }
22040 return;
22041
22042 case V8SFmode:
22043 mask = gen_lowpart (V8SImode, mask);
22044 if (one_operand_shuffle)
22045 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22046 else
22047 {
22048 t1 = gen_reg_rtx (V8SFmode);
22049 t2 = gen_reg_rtx (V8SFmode);
22050 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22051 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22052 goto merge_two;
22053 }
22054 return;
22055
22056 case V4SImode:
22057 /* By combining the two 128-bit input vectors into one 256-bit
22058 input vector, we can use VPERMD and VPERMPS for the full
22059 two-operand shuffle. */
22060 t1 = gen_reg_rtx (V8SImode);
22061 t2 = gen_reg_rtx (V8SImode);
22062 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22063 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22064 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22065 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22066 return;
22067
22068 case V4SFmode:
22069 t1 = gen_reg_rtx (V8SFmode);
22070 t2 = gen_reg_rtx (V8SImode);
22071 mask = gen_lowpart (V4SImode, mask);
22072 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22073 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22074 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22075 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22076 return;
22077
22078 case V32QImode:
22079 t1 = gen_reg_rtx (V32QImode);
22080 t2 = gen_reg_rtx (V32QImode);
22081 t3 = gen_reg_rtx (V32QImode);
22082 vt2 = GEN_INT (-128);
22083 for (i = 0; i < 32; i++)
22084 vec[i] = vt2;
22085 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22086 vt = force_reg (V32QImode, vt);
22087 for (i = 0; i < 32; i++)
22088 vec[i] = i < 16 ? vt2 : const0_rtx;
22089 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22090 vt2 = force_reg (V32QImode, vt2);
22091 /* From mask create two adjusted masks, which contain the same
22092 bits as mask in the low 7 bits of each vector element.
22093 The first mask will have the most significant bit clear
22094 if it requests element from the same 128-bit lane
22095 and MSB set if it requests element from the other 128-bit lane.
22096 The second mask will have the opposite values of the MSB,
22097 and additionally will have its 128-bit lanes swapped.
22098 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22099 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22100 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22101 stands for other 12 bytes. */
22102 /* The bit whether element is from the same lane or the other
22103 lane is bit 4, so shift it up by 3 to the MSB position. */
22104 t5 = gen_reg_rtx (V4DImode);
22105 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22106 GEN_INT (3)));
22107 /* Clear MSB bits from the mask just in case it had them set. */
22108 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22109 /* After this t1 will have MSB set for elements from other lane. */
22110 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22111 /* Clear bits other than MSB. */
22112 emit_insn (gen_andv32qi3 (t1, t1, vt));
22113 /* Or in the lower bits from mask into t3. */
22114 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22115 /* And invert MSB bits in t1, so MSB is set for elements from the same
22116 lane. */
22117 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22118 /* Swap 128-bit lanes in t3. */
22119 t6 = gen_reg_rtx (V4DImode);
22120 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22121 const2_rtx, GEN_INT (3),
22122 const0_rtx, const1_rtx));
22123 /* And or in the lower bits from mask into t1. */
22124 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22125 if (one_operand_shuffle)
22126 {
22127 /* Each of these shuffles will put 0s in places where
22128 element from the other 128-bit lane is needed, otherwise
22129 will shuffle in the requested value. */
22130 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22131 gen_lowpart (V32QImode, t6)));
22132 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22133 /* For t3 the 128-bit lanes are swapped again. */
22134 t7 = gen_reg_rtx (V4DImode);
22135 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22136 const2_rtx, GEN_INT (3),
22137 const0_rtx, const1_rtx));
22138 /* And oring both together leads to the result. */
22139 emit_insn (gen_iorv32qi3 (target, t1,
22140 gen_lowpart (V32QImode, t7)));
22141 if (target != operands[0])
22142 emit_move_insn (operands[0],
22143 gen_lowpart (GET_MODE (operands[0]), target));
22144 return;
22145 }
22146
22147 t4 = gen_reg_rtx (V32QImode);
22148 /* Similarly to the above one_operand_shuffle code,
22149 just for repeated twice for each operand. merge_two:
22150 code will merge the two results together. */
22151 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22152 gen_lowpart (V32QImode, t6)));
22153 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22154 gen_lowpart (V32QImode, t6)));
22155 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22156 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22157 t7 = gen_reg_rtx (V4DImode);
22158 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22159 const2_rtx, GEN_INT (3),
22160 const0_rtx, const1_rtx));
22161 t8 = gen_reg_rtx (V4DImode);
22162 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22163 const2_rtx, GEN_INT (3),
22164 const0_rtx, const1_rtx));
22165 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22166 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22167 t1 = t4;
22168 t2 = t3;
22169 goto merge_two;
22170
22171 default:
22172 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22173 break;
22174 }
22175 }
22176
22177 if (TARGET_XOP)
22178 {
22179 /* The XOP VPPERM insn supports three inputs. By ignoring the
22180 one_operand_shuffle special case, we avoid creating another
22181 set of constant vectors in memory. */
22182 one_operand_shuffle = false;
22183
22184 /* mask = mask & {2*w-1, ...} */
22185 vt = GEN_INT (2*w - 1);
22186 }
22187 else
22188 {
22189 /* mask = mask & {w-1, ...} */
22190 vt = GEN_INT (w - 1);
22191 }
22192
22193 for (i = 0; i < w; i++)
22194 vec[i] = vt;
22195 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22196 mask = expand_simple_binop (maskmode, AND, mask, vt,
22197 NULL_RTX, 0, OPTAB_DIRECT);
22198
22199 /* For non-QImode operations, convert the word permutation control
22200 into a byte permutation control. */
22201 if (mode != V16QImode)
22202 {
22203 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22204 GEN_INT (exact_log2 (e)),
22205 NULL_RTX, 0, OPTAB_DIRECT);
22206
22207 /* Convert mask to vector of chars. */
22208 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22209
22210 /* Replicate each of the input bytes into byte positions:
22211 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22212 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22213 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22214 for (i = 0; i < 16; ++i)
22215 vec[i] = GEN_INT (i/e * e);
22216 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22217 vt = validize_mem (force_const_mem (V16QImode, vt));
22218 if (TARGET_XOP)
22219 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22220 else
22221 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22222
22223 /* Convert it into the byte positions by doing
22224 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22225 for (i = 0; i < 16; ++i)
22226 vec[i] = GEN_INT (i % e);
22227 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22228 vt = validize_mem (force_const_mem (V16QImode, vt));
22229 emit_insn (gen_addv16qi3 (mask, mask, vt));
22230 }
22231
22232 /* The actual shuffle operations all operate on V16QImode. */
22233 op0 = gen_lowpart (V16QImode, op0);
22234 op1 = gen_lowpart (V16QImode, op1);
22235
22236 if (TARGET_XOP)
22237 {
22238 if (GET_MODE (target) != V16QImode)
22239 target = gen_reg_rtx (V16QImode);
22240 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22241 if (target != operands[0])
22242 emit_move_insn (operands[0],
22243 gen_lowpart (GET_MODE (operands[0]), target));
22244 }
22245 else if (one_operand_shuffle)
22246 {
22247 if (GET_MODE (target) != V16QImode)
22248 target = gen_reg_rtx (V16QImode);
22249 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22250 if (target != operands[0])
22251 emit_move_insn (operands[0],
22252 gen_lowpart (GET_MODE (operands[0]), target));
22253 }
22254 else
22255 {
22256 rtx xops[6];
22257 bool ok;
22258
22259 /* Shuffle the two input vectors independently. */
22260 t1 = gen_reg_rtx (V16QImode);
22261 t2 = gen_reg_rtx (V16QImode);
22262 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22263 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22264
22265 merge_two:
22266 /* Then merge them together. The key is whether any given control
22267 element contained a bit set that indicates the second word. */
22268 mask = operands[3];
22269 vt = GEN_INT (w);
22270 if (maskmode == V2DImode && !TARGET_SSE4_1)
22271 {
22272 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22273 more shuffle to convert the V2DI input mask into a V4SI
22274 input mask. At which point the masking that expand_int_vcond
22275 will work as desired. */
22276 rtx t3 = gen_reg_rtx (V4SImode);
22277 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22278 const0_rtx, const0_rtx,
22279 const2_rtx, const2_rtx));
22280 mask = t3;
22281 maskmode = V4SImode;
22282 e = w = 4;
22283 }
22284
22285 for (i = 0; i < w; i++)
22286 vec[i] = vt;
22287 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22288 vt = force_reg (maskmode, vt);
22289 mask = expand_simple_binop (maskmode, AND, mask, vt,
22290 NULL_RTX, 0, OPTAB_DIRECT);
22291
22292 if (GET_MODE (target) != mode)
22293 target = gen_reg_rtx (mode);
22294 xops[0] = target;
22295 xops[1] = gen_lowpart (mode, t2);
22296 xops[2] = gen_lowpart (mode, t1);
22297 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22298 xops[4] = mask;
22299 xops[5] = vt;
22300 ok = ix86_expand_int_vcond (xops);
22301 gcc_assert (ok);
22302 if (target != operands[0])
22303 emit_move_insn (operands[0],
22304 gen_lowpart (GET_MODE (operands[0]), target));
22305 }
22306 }
22307
22308 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22309 true if we should do zero extension, else sign extension. HIGH_P is
22310 true if we want the N/2 high elements, else the low elements. */
22311
22312 void
22313 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22314 {
22315 machine_mode imode = GET_MODE (src);
22316 rtx tmp;
22317
22318 if (TARGET_SSE4_1)
22319 {
22320 rtx (*unpack)(rtx, rtx);
22321 rtx (*extract)(rtx, rtx) = NULL;
22322 machine_mode halfmode = BLKmode;
22323
22324 switch (imode)
22325 {
22326 case V64QImode:
22327 if (unsigned_p)
22328 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22329 else
22330 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22331 halfmode = V32QImode;
22332 extract
22333 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22334 break;
22335 case V32QImode:
22336 if (unsigned_p)
22337 unpack = gen_avx2_zero_extendv16qiv16hi2;
22338 else
22339 unpack = gen_avx2_sign_extendv16qiv16hi2;
22340 halfmode = V16QImode;
22341 extract
22342 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22343 break;
22344 case V32HImode:
22345 if (unsigned_p)
22346 unpack = gen_avx512f_zero_extendv16hiv16si2;
22347 else
22348 unpack = gen_avx512f_sign_extendv16hiv16si2;
22349 halfmode = V16HImode;
22350 extract
22351 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22352 break;
22353 case V16HImode:
22354 if (unsigned_p)
22355 unpack = gen_avx2_zero_extendv8hiv8si2;
22356 else
22357 unpack = gen_avx2_sign_extendv8hiv8si2;
22358 halfmode = V8HImode;
22359 extract
22360 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22361 break;
22362 case V16SImode:
22363 if (unsigned_p)
22364 unpack = gen_avx512f_zero_extendv8siv8di2;
22365 else
22366 unpack = gen_avx512f_sign_extendv8siv8di2;
22367 halfmode = V8SImode;
22368 extract
22369 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22370 break;
22371 case V8SImode:
22372 if (unsigned_p)
22373 unpack = gen_avx2_zero_extendv4siv4di2;
22374 else
22375 unpack = gen_avx2_sign_extendv4siv4di2;
22376 halfmode = V4SImode;
22377 extract
22378 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22379 break;
22380 case V16QImode:
22381 if (unsigned_p)
22382 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22383 else
22384 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22385 break;
22386 case V8HImode:
22387 if (unsigned_p)
22388 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22389 else
22390 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22391 break;
22392 case V4SImode:
22393 if (unsigned_p)
22394 unpack = gen_sse4_1_zero_extendv2siv2di2;
22395 else
22396 unpack = gen_sse4_1_sign_extendv2siv2di2;
22397 break;
22398 default:
22399 gcc_unreachable ();
22400 }
22401
22402 if (GET_MODE_SIZE (imode) >= 32)
22403 {
22404 tmp = gen_reg_rtx (halfmode);
22405 emit_insn (extract (tmp, src));
22406 }
22407 else if (high_p)
22408 {
22409 /* Shift higher 8 bytes to lower 8 bytes. */
22410 tmp = gen_reg_rtx (V1TImode);
22411 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22412 GEN_INT (64)));
22413 tmp = gen_lowpart (imode, tmp);
22414 }
22415 else
22416 tmp = src;
22417
22418 emit_insn (unpack (dest, tmp));
22419 }
22420 else
22421 {
22422 rtx (*unpack)(rtx, rtx, rtx);
22423
22424 switch (imode)
22425 {
22426 case V16QImode:
22427 if (high_p)
22428 unpack = gen_vec_interleave_highv16qi;
22429 else
22430 unpack = gen_vec_interleave_lowv16qi;
22431 break;
22432 case V8HImode:
22433 if (high_p)
22434 unpack = gen_vec_interleave_highv8hi;
22435 else
22436 unpack = gen_vec_interleave_lowv8hi;
22437 break;
22438 case V4SImode:
22439 if (high_p)
22440 unpack = gen_vec_interleave_highv4si;
22441 else
22442 unpack = gen_vec_interleave_lowv4si;
22443 break;
22444 default:
22445 gcc_unreachable ();
22446 }
22447
22448 if (unsigned_p)
22449 tmp = force_reg (imode, CONST0_RTX (imode));
22450 else
22451 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22452 src, pc_rtx, pc_rtx);
22453
22454 rtx tmp2 = gen_reg_rtx (imode);
22455 emit_insn (unpack (tmp2, src, tmp));
22456 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22457 }
22458 }
22459
22460 /* Expand conditional increment or decrement using adb/sbb instructions.
22461 The default case using setcc followed by the conditional move can be
22462 done by generic code. */
22463 bool
22464 ix86_expand_int_addcc (rtx operands[])
22465 {
22466 enum rtx_code code = GET_CODE (operands[1]);
22467 rtx flags;
22468 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22469 rtx compare_op;
22470 rtx val = const0_rtx;
22471 bool fpcmp = false;
22472 machine_mode mode;
22473 rtx op0 = XEXP (operands[1], 0);
22474 rtx op1 = XEXP (operands[1], 1);
22475
22476 if (operands[3] != const1_rtx
22477 && operands[3] != constm1_rtx)
22478 return false;
22479 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22480 return false;
22481 code = GET_CODE (compare_op);
22482
22483 flags = XEXP (compare_op, 0);
22484
22485 if (GET_MODE (flags) == CCFPmode
22486 || GET_MODE (flags) == CCFPUmode)
22487 {
22488 fpcmp = true;
22489 code = ix86_fp_compare_code_to_integer (code);
22490 }
22491
22492 if (code != LTU)
22493 {
22494 val = constm1_rtx;
22495 if (fpcmp)
22496 PUT_CODE (compare_op,
22497 reverse_condition_maybe_unordered
22498 (GET_CODE (compare_op)));
22499 else
22500 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22501 }
22502
22503 mode = GET_MODE (operands[0]);
22504
22505 /* Construct either adc or sbb insn. */
22506 if ((code == LTU) == (operands[3] == constm1_rtx))
22507 {
22508 switch (mode)
22509 {
22510 case QImode:
22511 insn = gen_subqi3_carry;
22512 break;
22513 case HImode:
22514 insn = gen_subhi3_carry;
22515 break;
22516 case SImode:
22517 insn = gen_subsi3_carry;
22518 break;
22519 case DImode:
22520 insn = gen_subdi3_carry;
22521 break;
22522 default:
22523 gcc_unreachable ();
22524 }
22525 }
22526 else
22527 {
22528 switch (mode)
22529 {
22530 case QImode:
22531 insn = gen_addqi3_carry;
22532 break;
22533 case HImode:
22534 insn = gen_addhi3_carry;
22535 break;
22536 case SImode:
22537 insn = gen_addsi3_carry;
22538 break;
22539 case DImode:
22540 insn = gen_adddi3_carry;
22541 break;
22542 default:
22543 gcc_unreachable ();
22544 }
22545 }
22546 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22547
22548 return true;
22549 }
22550
22551
22552 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22553 but works for floating pointer parameters and nonoffsetable memories.
22554 For pushes, it returns just stack offsets; the values will be saved
22555 in the right order. Maximally three parts are generated. */
22556
22557 static int
22558 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22559 {
22560 int size;
22561
22562 if (!TARGET_64BIT)
22563 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22564 else
22565 size = (GET_MODE_SIZE (mode) + 4) / 8;
22566
22567 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22568 gcc_assert (size >= 2 && size <= 4);
22569
22570 /* Optimize constant pool reference to immediates. This is used by fp
22571 moves, that force all constants to memory to allow combining. */
22572 if (MEM_P (operand) && MEM_READONLY_P (operand))
22573 {
22574 rtx tmp = maybe_get_pool_constant (operand);
22575 if (tmp)
22576 operand = tmp;
22577 }
22578
22579 if (MEM_P (operand) && !offsettable_memref_p (operand))
22580 {
22581 /* The only non-offsetable memories we handle are pushes. */
22582 int ok = push_operand (operand, VOIDmode);
22583
22584 gcc_assert (ok);
22585
22586 operand = copy_rtx (operand);
22587 PUT_MODE (operand, word_mode);
22588 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22589 return size;
22590 }
22591
22592 if (GET_CODE (operand) == CONST_VECTOR)
22593 {
22594 machine_mode imode = int_mode_for_mode (mode);
22595 /* Caution: if we looked through a constant pool memory above,
22596 the operand may actually have a different mode now. That's
22597 ok, since we want to pun this all the way back to an integer. */
22598 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22599 gcc_assert (operand != NULL);
22600 mode = imode;
22601 }
22602
22603 if (!TARGET_64BIT)
22604 {
22605 if (mode == DImode)
22606 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22607 else
22608 {
22609 int i;
22610
22611 if (REG_P (operand))
22612 {
22613 gcc_assert (reload_completed);
22614 for (i = 0; i < size; i++)
22615 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22616 }
22617 else if (offsettable_memref_p (operand))
22618 {
22619 operand = adjust_address (operand, SImode, 0);
22620 parts[0] = operand;
22621 for (i = 1; i < size; i++)
22622 parts[i] = adjust_address (operand, SImode, 4 * i);
22623 }
22624 else if (GET_CODE (operand) == CONST_DOUBLE)
22625 {
22626 REAL_VALUE_TYPE r;
22627 long l[4];
22628
22629 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22630 switch (mode)
22631 {
22632 case TFmode:
22633 real_to_target (l, &r, mode);
22634 parts[3] = gen_int_mode (l[3], SImode);
22635 parts[2] = gen_int_mode (l[2], SImode);
22636 break;
22637 case XFmode:
22638 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22639 long double may not be 80-bit. */
22640 real_to_target (l, &r, mode);
22641 parts[2] = gen_int_mode (l[2], SImode);
22642 break;
22643 case DFmode:
22644 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22645 break;
22646 default:
22647 gcc_unreachable ();
22648 }
22649 parts[1] = gen_int_mode (l[1], SImode);
22650 parts[0] = gen_int_mode (l[0], SImode);
22651 }
22652 else
22653 gcc_unreachable ();
22654 }
22655 }
22656 else
22657 {
22658 if (mode == TImode)
22659 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22660 if (mode == XFmode || mode == TFmode)
22661 {
22662 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22663 if (REG_P (operand))
22664 {
22665 gcc_assert (reload_completed);
22666 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22667 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22668 }
22669 else if (offsettable_memref_p (operand))
22670 {
22671 operand = adjust_address (operand, DImode, 0);
22672 parts[0] = operand;
22673 parts[1] = adjust_address (operand, upper_mode, 8);
22674 }
22675 else if (GET_CODE (operand) == CONST_DOUBLE)
22676 {
22677 REAL_VALUE_TYPE r;
22678 long l[4];
22679
22680 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22681 real_to_target (l, &r, mode);
22682
22683 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22684 if (HOST_BITS_PER_WIDE_INT >= 64)
22685 parts[0]
22686 = gen_int_mode
22687 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22688 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22689 DImode);
22690 else
22691 parts[0] = immed_double_const (l[0], l[1], DImode);
22692
22693 if (upper_mode == SImode)
22694 parts[1] = gen_int_mode (l[2], SImode);
22695 else if (HOST_BITS_PER_WIDE_INT >= 64)
22696 parts[1]
22697 = gen_int_mode
22698 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22699 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22700 DImode);
22701 else
22702 parts[1] = immed_double_const (l[2], l[3], DImode);
22703 }
22704 else
22705 gcc_unreachable ();
22706 }
22707 }
22708
22709 return size;
22710 }
22711
22712 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22713 Return false when normal moves are needed; true when all required
22714 insns have been emitted. Operands 2-4 contain the input values
22715 int the correct order; operands 5-7 contain the output values. */
22716
22717 void
22718 ix86_split_long_move (rtx operands[])
22719 {
22720 rtx part[2][4];
22721 int nparts, i, j;
22722 int push = 0;
22723 int collisions = 0;
22724 machine_mode mode = GET_MODE (operands[0]);
22725 bool collisionparts[4];
22726
22727 /* The DFmode expanders may ask us to move double.
22728 For 64bit target this is single move. By hiding the fact
22729 here we simplify i386.md splitters. */
22730 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22731 {
22732 /* Optimize constant pool reference to immediates. This is used by
22733 fp moves, that force all constants to memory to allow combining. */
22734
22735 if (MEM_P (operands[1])
22736 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22737 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22738 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22739 if (push_operand (operands[0], VOIDmode))
22740 {
22741 operands[0] = copy_rtx (operands[0]);
22742 PUT_MODE (operands[0], word_mode);
22743 }
22744 else
22745 operands[0] = gen_lowpart (DImode, operands[0]);
22746 operands[1] = gen_lowpart (DImode, operands[1]);
22747 emit_move_insn (operands[0], operands[1]);
22748 return;
22749 }
22750
22751 /* The only non-offsettable memory we handle is push. */
22752 if (push_operand (operands[0], VOIDmode))
22753 push = 1;
22754 else
22755 gcc_assert (!MEM_P (operands[0])
22756 || offsettable_memref_p (operands[0]));
22757
22758 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22759 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22760
22761 /* When emitting push, take care for source operands on the stack. */
22762 if (push && MEM_P (operands[1])
22763 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22764 {
22765 rtx src_base = XEXP (part[1][nparts - 1], 0);
22766
22767 /* Compensate for the stack decrement by 4. */
22768 if (!TARGET_64BIT && nparts == 3
22769 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22770 src_base = plus_constant (Pmode, src_base, 4);
22771
22772 /* src_base refers to the stack pointer and is
22773 automatically decreased by emitted push. */
22774 for (i = 0; i < nparts; i++)
22775 part[1][i] = change_address (part[1][i],
22776 GET_MODE (part[1][i]), src_base);
22777 }
22778
22779 /* We need to do copy in the right order in case an address register
22780 of the source overlaps the destination. */
22781 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22782 {
22783 rtx tmp;
22784
22785 for (i = 0; i < nparts; i++)
22786 {
22787 collisionparts[i]
22788 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22789 if (collisionparts[i])
22790 collisions++;
22791 }
22792
22793 /* Collision in the middle part can be handled by reordering. */
22794 if (collisions == 1 && nparts == 3 && collisionparts [1])
22795 {
22796 std::swap (part[0][1], part[0][2]);
22797 std::swap (part[1][1], part[1][2]);
22798 }
22799 else if (collisions == 1
22800 && nparts == 4
22801 && (collisionparts [1] || collisionparts [2]))
22802 {
22803 if (collisionparts [1])
22804 {
22805 std::swap (part[0][1], part[0][2]);
22806 std::swap (part[1][1], part[1][2]);
22807 }
22808 else
22809 {
22810 std::swap (part[0][2], part[0][3]);
22811 std::swap (part[1][2], part[1][3]);
22812 }
22813 }
22814
22815 /* If there are more collisions, we can't handle it by reordering.
22816 Do an lea to the last part and use only one colliding move. */
22817 else if (collisions > 1)
22818 {
22819 rtx base;
22820
22821 collisions = 1;
22822
22823 base = part[0][nparts - 1];
22824
22825 /* Handle the case when the last part isn't valid for lea.
22826 Happens in 64-bit mode storing the 12-byte XFmode. */
22827 if (GET_MODE (base) != Pmode)
22828 base = gen_rtx_REG (Pmode, REGNO (base));
22829
22830 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22831 part[1][0] = replace_equiv_address (part[1][0], base);
22832 for (i = 1; i < nparts; i++)
22833 {
22834 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22835 part[1][i] = replace_equiv_address (part[1][i], tmp);
22836 }
22837 }
22838 }
22839
22840 if (push)
22841 {
22842 if (!TARGET_64BIT)
22843 {
22844 if (nparts == 3)
22845 {
22846 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22847 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22848 stack_pointer_rtx, GEN_INT (-4)));
22849 emit_move_insn (part[0][2], part[1][2]);
22850 }
22851 else if (nparts == 4)
22852 {
22853 emit_move_insn (part[0][3], part[1][3]);
22854 emit_move_insn (part[0][2], part[1][2]);
22855 }
22856 }
22857 else
22858 {
22859 /* In 64bit mode we don't have 32bit push available. In case this is
22860 register, it is OK - we will just use larger counterpart. We also
22861 retype memory - these comes from attempt to avoid REX prefix on
22862 moving of second half of TFmode value. */
22863 if (GET_MODE (part[1][1]) == SImode)
22864 {
22865 switch (GET_CODE (part[1][1]))
22866 {
22867 case MEM:
22868 part[1][1] = adjust_address (part[1][1], DImode, 0);
22869 break;
22870
22871 case REG:
22872 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22873 break;
22874
22875 default:
22876 gcc_unreachable ();
22877 }
22878
22879 if (GET_MODE (part[1][0]) == SImode)
22880 part[1][0] = part[1][1];
22881 }
22882 }
22883 emit_move_insn (part[0][1], part[1][1]);
22884 emit_move_insn (part[0][0], part[1][0]);
22885 return;
22886 }
22887
22888 /* Choose correct order to not overwrite the source before it is copied. */
22889 if ((REG_P (part[0][0])
22890 && REG_P (part[1][1])
22891 && (REGNO (part[0][0]) == REGNO (part[1][1])
22892 || (nparts == 3
22893 && REGNO (part[0][0]) == REGNO (part[1][2]))
22894 || (nparts == 4
22895 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22896 || (collisions > 0
22897 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22898 {
22899 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22900 {
22901 operands[2 + i] = part[0][j];
22902 operands[6 + i] = part[1][j];
22903 }
22904 }
22905 else
22906 {
22907 for (i = 0; i < nparts; i++)
22908 {
22909 operands[2 + i] = part[0][i];
22910 operands[6 + i] = part[1][i];
22911 }
22912 }
22913
22914 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22915 if (optimize_insn_for_size_p ())
22916 {
22917 for (j = 0; j < nparts - 1; j++)
22918 if (CONST_INT_P (operands[6 + j])
22919 && operands[6 + j] != const0_rtx
22920 && REG_P (operands[2 + j]))
22921 for (i = j; i < nparts - 1; i++)
22922 if (CONST_INT_P (operands[7 + i])
22923 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22924 operands[7 + i] = operands[2 + j];
22925 }
22926
22927 for (i = 0; i < nparts; i++)
22928 emit_move_insn (operands[2 + i], operands[6 + i]);
22929
22930 return;
22931 }
22932
22933 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22934 left shift by a constant, either using a single shift or
22935 a sequence of add instructions. */
22936
22937 static void
22938 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22939 {
22940 rtx (*insn)(rtx, rtx, rtx);
22941
22942 if (count == 1
22943 || (count * ix86_cost->add <= ix86_cost->shift_const
22944 && !optimize_insn_for_size_p ()))
22945 {
22946 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22947 while (count-- > 0)
22948 emit_insn (insn (operand, operand, operand));
22949 }
22950 else
22951 {
22952 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22953 emit_insn (insn (operand, operand, GEN_INT (count)));
22954 }
22955 }
22956
22957 void
22958 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22959 {
22960 rtx (*gen_ashl3)(rtx, rtx, rtx);
22961 rtx (*gen_shld)(rtx, rtx, rtx);
22962 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22963
22964 rtx low[2], high[2];
22965 int count;
22966
22967 if (CONST_INT_P (operands[2]))
22968 {
22969 split_double_mode (mode, operands, 2, low, high);
22970 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22971
22972 if (count >= half_width)
22973 {
22974 emit_move_insn (high[0], low[1]);
22975 emit_move_insn (low[0], const0_rtx);
22976
22977 if (count > half_width)
22978 ix86_expand_ashl_const (high[0], count - half_width, mode);
22979 }
22980 else
22981 {
22982 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22983
22984 if (!rtx_equal_p (operands[0], operands[1]))
22985 emit_move_insn (operands[0], operands[1]);
22986
22987 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22988 ix86_expand_ashl_const (low[0], count, mode);
22989 }
22990 return;
22991 }
22992
22993 split_double_mode (mode, operands, 1, low, high);
22994
22995 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22996
22997 if (operands[1] == const1_rtx)
22998 {
22999 /* Assuming we've chosen a QImode capable registers, then 1 << N
23000 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23001 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23002 {
23003 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23004
23005 ix86_expand_clear (low[0]);
23006 ix86_expand_clear (high[0]);
23007 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23008
23009 d = gen_lowpart (QImode, low[0]);
23010 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23011 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23012 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23013
23014 d = gen_lowpart (QImode, high[0]);
23015 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23016 s = gen_rtx_NE (QImode, flags, const0_rtx);
23017 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23018 }
23019
23020 /* Otherwise, we can get the same results by manually performing
23021 a bit extract operation on bit 5/6, and then performing the two
23022 shifts. The two methods of getting 0/1 into low/high are exactly
23023 the same size. Avoiding the shift in the bit extract case helps
23024 pentium4 a bit; no one else seems to care much either way. */
23025 else
23026 {
23027 machine_mode half_mode;
23028 rtx (*gen_lshr3)(rtx, rtx, rtx);
23029 rtx (*gen_and3)(rtx, rtx, rtx);
23030 rtx (*gen_xor3)(rtx, rtx, rtx);
23031 HOST_WIDE_INT bits;
23032 rtx x;
23033
23034 if (mode == DImode)
23035 {
23036 half_mode = SImode;
23037 gen_lshr3 = gen_lshrsi3;
23038 gen_and3 = gen_andsi3;
23039 gen_xor3 = gen_xorsi3;
23040 bits = 5;
23041 }
23042 else
23043 {
23044 half_mode = DImode;
23045 gen_lshr3 = gen_lshrdi3;
23046 gen_and3 = gen_anddi3;
23047 gen_xor3 = gen_xordi3;
23048 bits = 6;
23049 }
23050
23051 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23052 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23053 else
23054 x = gen_lowpart (half_mode, operands[2]);
23055 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23056
23057 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23058 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23059 emit_move_insn (low[0], high[0]);
23060 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23061 }
23062
23063 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23064 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23065 return;
23066 }
23067
23068 if (operands[1] == constm1_rtx)
23069 {
23070 /* For -1 << N, we can avoid the shld instruction, because we
23071 know that we're shifting 0...31/63 ones into a -1. */
23072 emit_move_insn (low[0], constm1_rtx);
23073 if (optimize_insn_for_size_p ())
23074 emit_move_insn (high[0], low[0]);
23075 else
23076 emit_move_insn (high[0], constm1_rtx);
23077 }
23078 else
23079 {
23080 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23081
23082 if (!rtx_equal_p (operands[0], operands[1]))
23083 emit_move_insn (operands[0], operands[1]);
23084
23085 split_double_mode (mode, operands, 1, low, high);
23086 emit_insn (gen_shld (high[0], low[0], operands[2]));
23087 }
23088
23089 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23090
23091 if (TARGET_CMOVE && scratch)
23092 {
23093 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23094 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23095
23096 ix86_expand_clear (scratch);
23097 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23098 }
23099 else
23100 {
23101 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23102 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23103
23104 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23105 }
23106 }
23107
23108 void
23109 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23110 {
23111 rtx (*gen_ashr3)(rtx, rtx, rtx)
23112 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23113 rtx (*gen_shrd)(rtx, rtx, rtx);
23114 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23115
23116 rtx low[2], high[2];
23117 int count;
23118
23119 if (CONST_INT_P (operands[2]))
23120 {
23121 split_double_mode (mode, operands, 2, low, high);
23122 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23123
23124 if (count == GET_MODE_BITSIZE (mode) - 1)
23125 {
23126 emit_move_insn (high[0], high[1]);
23127 emit_insn (gen_ashr3 (high[0], high[0],
23128 GEN_INT (half_width - 1)));
23129 emit_move_insn (low[0], high[0]);
23130
23131 }
23132 else if (count >= half_width)
23133 {
23134 emit_move_insn (low[0], high[1]);
23135 emit_move_insn (high[0], low[0]);
23136 emit_insn (gen_ashr3 (high[0], high[0],
23137 GEN_INT (half_width - 1)));
23138
23139 if (count > half_width)
23140 emit_insn (gen_ashr3 (low[0], low[0],
23141 GEN_INT (count - half_width)));
23142 }
23143 else
23144 {
23145 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23146
23147 if (!rtx_equal_p (operands[0], operands[1]))
23148 emit_move_insn (operands[0], operands[1]);
23149
23150 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23151 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23152 }
23153 }
23154 else
23155 {
23156 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23157
23158 if (!rtx_equal_p (operands[0], operands[1]))
23159 emit_move_insn (operands[0], operands[1]);
23160
23161 split_double_mode (mode, operands, 1, low, high);
23162
23163 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23164 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23165
23166 if (TARGET_CMOVE && scratch)
23167 {
23168 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23169 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23170
23171 emit_move_insn (scratch, high[0]);
23172 emit_insn (gen_ashr3 (scratch, scratch,
23173 GEN_INT (half_width - 1)));
23174 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23175 scratch));
23176 }
23177 else
23178 {
23179 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23180 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23181
23182 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23183 }
23184 }
23185 }
23186
23187 void
23188 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23189 {
23190 rtx (*gen_lshr3)(rtx, rtx, rtx)
23191 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23192 rtx (*gen_shrd)(rtx, rtx, rtx);
23193 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23194
23195 rtx low[2], high[2];
23196 int count;
23197
23198 if (CONST_INT_P (operands[2]))
23199 {
23200 split_double_mode (mode, operands, 2, low, high);
23201 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23202
23203 if (count >= half_width)
23204 {
23205 emit_move_insn (low[0], high[1]);
23206 ix86_expand_clear (high[0]);
23207
23208 if (count > half_width)
23209 emit_insn (gen_lshr3 (low[0], low[0],
23210 GEN_INT (count - half_width)));
23211 }
23212 else
23213 {
23214 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23215
23216 if (!rtx_equal_p (operands[0], operands[1]))
23217 emit_move_insn (operands[0], operands[1]);
23218
23219 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23220 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23221 }
23222 }
23223 else
23224 {
23225 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23226
23227 if (!rtx_equal_p (operands[0], operands[1]))
23228 emit_move_insn (operands[0], operands[1]);
23229
23230 split_double_mode (mode, operands, 1, low, high);
23231
23232 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23233 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23234
23235 if (TARGET_CMOVE && scratch)
23236 {
23237 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23238 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23239
23240 ix86_expand_clear (scratch);
23241 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23242 scratch));
23243 }
23244 else
23245 {
23246 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23247 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23248
23249 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23250 }
23251 }
23252 }
23253
23254 /* Predict just emitted jump instruction to be taken with probability PROB. */
23255 static void
23256 predict_jump (int prob)
23257 {
23258 rtx insn = get_last_insn ();
23259 gcc_assert (JUMP_P (insn));
23260 add_int_reg_note (insn, REG_BR_PROB, prob);
23261 }
23262
23263 /* Helper function for the string operations below. Dest VARIABLE whether
23264 it is aligned to VALUE bytes. If true, jump to the label. */
23265 static rtx_code_label *
23266 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23267 {
23268 rtx_code_label *label = gen_label_rtx ();
23269 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23270 if (GET_MODE (variable) == DImode)
23271 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23272 else
23273 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23274 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23275 1, label);
23276 if (epilogue)
23277 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23278 else
23279 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23280 return label;
23281 }
23282
23283 /* Adjust COUNTER by the VALUE. */
23284 static void
23285 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23286 {
23287 rtx (*gen_add)(rtx, rtx, rtx)
23288 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23289
23290 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23291 }
23292
23293 /* Zero extend possibly SImode EXP to Pmode register. */
23294 rtx
23295 ix86_zero_extend_to_Pmode (rtx exp)
23296 {
23297 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23298 }
23299
23300 /* Divide COUNTREG by SCALE. */
23301 static rtx
23302 scale_counter (rtx countreg, int scale)
23303 {
23304 rtx sc;
23305
23306 if (scale == 1)
23307 return countreg;
23308 if (CONST_INT_P (countreg))
23309 return GEN_INT (INTVAL (countreg) / scale);
23310 gcc_assert (REG_P (countreg));
23311
23312 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23313 GEN_INT (exact_log2 (scale)),
23314 NULL, 1, OPTAB_DIRECT);
23315 return sc;
23316 }
23317
23318 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23319 DImode for constant loop counts. */
23320
23321 static machine_mode
23322 counter_mode (rtx count_exp)
23323 {
23324 if (GET_MODE (count_exp) != VOIDmode)
23325 return GET_MODE (count_exp);
23326 if (!CONST_INT_P (count_exp))
23327 return Pmode;
23328 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23329 return DImode;
23330 return SImode;
23331 }
23332
23333 /* Copy the address to a Pmode register. This is used for x32 to
23334 truncate DImode TLS address to a SImode register. */
23335
23336 static rtx
23337 ix86_copy_addr_to_reg (rtx addr)
23338 {
23339 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23340 return copy_addr_to_reg (addr);
23341 else
23342 {
23343 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23344 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23345 }
23346 }
23347
23348 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23349 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23350 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23351 memory by VALUE (supposed to be in MODE).
23352
23353 The size is rounded down to whole number of chunk size moved at once.
23354 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23355
23356
23357 static void
23358 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23359 rtx destptr, rtx srcptr, rtx value,
23360 rtx count, machine_mode mode, int unroll,
23361 int expected_size, bool issetmem)
23362 {
23363 rtx_code_label *out_label, *top_label;
23364 rtx iter, tmp;
23365 machine_mode iter_mode = counter_mode (count);
23366 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23367 rtx piece_size = GEN_INT (piece_size_n);
23368 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23369 rtx size;
23370 int i;
23371
23372 top_label = gen_label_rtx ();
23373 out_label = gen_label_rtx ();
23374 iter = gen_reg_rtx (iter_mode);
23375
23376 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23377 NULL, 1, OPTAB_DIRECT);
23378 /* Those two should combine. */
23379 if (piece_size == const1_rtx)
23380 {
23381 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23382 true, out_label);
23383 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23384 }
23385 emit_move_insn (iter, const0_rtx);
23386
23387 emit_label (top_label);
23388
23389 tmp = convert_modes (Pmode, iter_mode, iter, true);
23390
23391 /* This assert could be relaxed - in this case we'll need to compute
23392 smallest power of two, containing in PIECE_SIZE_N and pass it to
23393 offset_address. */
23394 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23395 destmem = offset_address (destmem, tmp, piece_size_n);
23396 destmem = adjust_address (destmem, mode, 0);
23397
23398 if (!issetmem)
23399 {
23400 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23401 srcmem = adjust_address (srcmem, mode, 0);
23402
23403 /* When unrolling for chips that reorder memory reads and writes,
23404 we can save registers by using single temporary.
23405 Also using 4 temporaries is overkill in 32bit mode. */
23406 if (!TARGET_64BIT && 0)
23407 {
23408 for (i = 0; i < unroll; i++)
23409 {
23410 if (i)
23411 {
23412 destmem =
23413 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23414 srcmem =
23415 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23416 }
23417 emit_move_insn (destmem, srcmem);
23418 }
23419 }
23420 else
23421 {
23422 rtx tmpreg[4];
23423 gcc_assert (unroll <= 4);
23424 for (i = 0; i < unroll; i++)
23425 {
23426 tmpreg[i] = gen_reg_rtx (mode);
23427 if (i)
23428 {
23429 srcmem =
23430 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23431 }
23432 emit_move_insn (tmpreg[i], srcmem);
23433 }
23434 for (i = 0; i < unroll; i++)
23435 {
23436 if (i)
23437 {
23438 destmem =
23439 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23440 }
23441 emit_move_insn (destmem, tmpreg[i]);
23442 }
23443 }
23444 }
23445 else
23446 for (i = 0; i < unroll; i++)
23447 {
23448 if (i)
23449 destmem =
23450 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23451 emit_move_insn (destmem, value);
23452 }
23453
23454 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23455 true, OPTAB_LIB_WIDEN);
23456 if (tmp != iter)
23457 emit_move_insn (iter, tmp);
23458
23459 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23460 true, top_label);
23461 if (expected_size != -1)
23462 {
23463 expected_size /= GET_MODE_SIZE (mode) * unroll;
23464 if (expected_size == 0)
23465 predict_jump (0);
23466 else if (expected_size > REG_BR_PROB_BASE)
23467 predict_jump (REG_BR_PROB_BASE - 1);
23468 else
23469 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23470 }
23471 else
23472 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23473 iter = ix86_zero_extend_to_Pmode (iter);
23474 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23475 true, OPTAB_LIB_WIDEN);
23476 if (tmp != destptr)
23477 emit_move_insn (destptr, tmp);
23478 if (!issetmem)
23479 {
23480 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23481 true, OPTAB_LIB_WIDEN);
23482 if (tmp != srcptr)
23483 emit_move_insn (srcptr, tmp);
23484 }
23485 emit_label (out_label);
23486 }
23487
23488 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23489 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23490 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23491 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23492 ORIG_VALUE is the original value passed to memset to fill the memory with.
23493 Other arguments have same meaning as for previous function. */
23494
23495 static void
23496 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23497 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23498 rtx count,
23499 machine_mode mode, bool issetmem)
23500 {
23501 rtx destexp;
23502 rtx srcexp;
23503 rtx countreg;
23504 HOST_WIDE_INT rounded_count;
23505
23506 /* If possible, it is shorter to use rep movs.
23507 TODO: Maybe it is better to move this logic to decide_alg. */
23508 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23509 && (!issetmem || orig_value == const0_rtx))
23510 mode = SImode;
23511
23512 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23513 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23514
23515 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23516 GET_MODE_SIZE (mode)));
23517 if (mode != QImode)
23518 {
23519 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23520 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23521 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23522 }
23523 else
23524 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23525 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23526 {
23527 rounded_count = (INTVAL (count)
23528 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23529 destmem = shallow_copy_rtx (destmem);
23530 set_mem_size (destmem, rounded_count);
23531 }
23532 else if (MEM_SIZE_KNOWN_P (destmem))
23533 clear_mem_size (destmem);
23534
23535 if (issetmem)
23536 {
23537 value = force_reg (mode, gen_lowpart (mode, value));
23538 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23539 }
23540 else
23541 {
23542 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23543 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23544 if (mode != QImode)
23545 {
23546 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23547 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23548 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23549 }
23550 else
23551 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23552 if (CONST_INT_P (count))
23553 {
23554 rounded_count = (INTVAL (count)
23555 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23556 srcmem = shallow_copy_rtx (srcmem);
23557 set_mem_size (srcmem, rounded_count);
23558 }
23559 else
23560 {
23561 if (MEM_SIZE_KNOWN_P (srcmem))
23562 clear_mem_size (srcmem);
23563 }
23564 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23565 destexp, srcexp));
23566 }
23567 }
23568
23569 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23570 DESTMEM.
23571 SRC is passed by pointer to be updated on return.
23572 Return value is updated DST. */
23573 static rtx
23574 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23575 HOST_WIDE_INT size_to_move)
23576 {
23577 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23578 enum insn_code code;
23579 machine_mode move_mode;
23580 int piece_size, i;
23581
23582 /* Find the widest mode in which we could perform moves.
23583 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23584 it until move of such size is supported. */
23585 piece_size = 1 << floor_log2 (size_to_move);
23586 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23587 code = optab_handler (mov_optab, move_mode);
23588 while (code == CODE_FOR_nothing && piece_size > 1)
23589 {
23590 piece_size >>= 1;
23591 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23592 code = optab_handler (mov_optab, move_mode);
23593 }
23594
23595 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23596 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23597 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23598 {
23599 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23600 move_mode = mode_for_vector (word_mode, nunits);
23601 code = optab_handler (mov_optab, move_mode);
23602 if (code == CODE_FOR_nothing)
23603 {
23604 move_mode = word_mode;
23605 piece_size = GET_MODE_SIZE (move_mode);
23606 code = optab_handler (mov_optab, move_mode);
23607 }
23608 }
23609 gcc_assert (code != CODE_FOR_nothing);
23610
23611 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23612 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23613
23614 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23615 gcc_assert (size_to_move % piece_size == 0);
23616 adjust = GEN_INT (piece_size);
23617 for (i = 0; i < size_to_move; i += piece_size)
23618 {
23619 /* We move from memory to memory, so we'll need to do it via
23620 a temporary register. */
23621 tempreg = gen_reg_rtx (move_mode);
23622 emit_insn (GEN_FCN (code) (tempreg, src));
23623 emit_insn (GEN_FCN (code) (dst, tempreg));
23624
23625 emit_move_insn (destptr,
23626 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23627 emit_move_insn (srcptr,
23628 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23629
23630 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23631 piece_size);
23632 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23633 piece_size);
23634 }
23635
23636 /* Update DST and SRC rtx. */
23637 *srcmem = src;
23638 return dst;
23639 }
23640
23641 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23642 static void
23643 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23644 rtx destptr, rtx srcptr, rtx count, int max_size)
23645 {
23646 rtx src, dest;
23647 if (CONST_INT_P (count))
23648 {
23649 HOST_WIDE_INT countval = INTVAL (count);
23650 HOST_WIDE_INT epilogue_size = countval % max_size;
23651 int i;
23652
23653 /* For now MAX_SIZE should be a power of 2. This assert could be
23654 relaxed, but it'll require a bit more complicated epilogue
23655 expanding. */
23656 gcc_assert ((max_size & (max_size - 1)) == 0);
23657 for (i = max_size; i >= 1; i >>= 1)
23658 {
23659 if (epilogue_size & i)
23660 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23661 }
23662 return;
23663 }
23664 if (max_size > 8)
23665 {
23666 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23667 count, 1, OPTAB_DIRECT);
23668 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23669 count, QImode, 1, 4, false);
23670 return;
23671 }
23672
23673 /* When there are stringops, we can cheaply increase dest and src pointers.
23674 Otherwise we save code size by maintaining offset (zero is readily
23675 available from preceding rep operation) and using x86 addressing modes.
23676 */
23677 if (TARGET_SINGLE_STRINGOP)
23678 {
23679 if (max_size > 4)
23680 {
23681 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23682 src = change_address (srcmem, SImode, srcptr);
23683 dest = change_address (destmem, SImode, destptr);
23684 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23685 emit_label (label);
23686 LABEL_NUSES (label) = 1;
23687 }
23688 if (max_size > 2)
23689 {
23690 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23691 src = change_address (srcmem, HImode, srcptr);
23692 dest = change_address (destmem, HImode, destptr);
23693 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23694 emit_label (label);
23695 LABEL_NUSES (label) = 1;
23696 }
23697 if (max_size > 1)
23698 {
23699 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23700 src = change_address (srcmem, QImode, srcptr);
23701 dest = change_address (destmem, QImode, destptr);
23702 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23703 emit_label (label);
23704 LABEL_NUSES (label) = 1;
23705 }
23706 }
23707 else
23708 {
23709 rtx offset = force_reg (Pmode, const0_rtx);
23710 rtx tmp;
23711
23712 if (max_size > 4)
23713 {
23714 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23715 src = change_address (srcmem, SImode, srcptr);
23716 dest = change_address (destmem, SImode, destptr);
23717 emit_move_insn (dest, src);
23718 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23719 true, OPTAB_LIB_WIDEN);
23720 if (tmp != offset)
23721 emit_move_insn (offset, tmp);
23722 emit_label (label);
23723 LABEL_NUSES (label) = 1;
23724 }
23725 if (max_size > 2)
23726 {
23727 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23728 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23729 src = change_address (srcmem, HImode, tmp);
23730 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23731 dest = change_address (destmem, HImode, tmp);
23732 emit_move_insn (dest, src);
23733 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23734 true, OPTAB_LIB_WIDEN);
23735 if (tmp != offset)
23736 emit_move_insn (offset, tmp);
23737 emit_label (label);
23738 LABEL_NUSES (label) = 1;
23739 }
23740 if (max_size > 1)
23741 {
23742 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23743 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23744 src = change_address (srcmem, QImode, tmp);
23745 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23746 dest = change_address (destmem, QImode, tmp);
23747 emit_move_insn (dest, src);
23748 emit_label (label);
23749 LABEL_NUSES (label) = 1;
23750 }
23751 }
23752 }
23753
23754 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23755 with value PROMOTED_VAL.
23756 SRC is passed by pointer to be updated on return.
23757 Return value is updated DST. */
23758 static rtx
23759 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23760 HOST_WIDE_INT size_to_move)
23761 {
23762 rtx dst = destmem, adjust;
23763 enum insn_code code;
23764 machine_mode move_mode;
23765 int piece_size, i;
23766
23767 /* Find the widest mode in which we could perform moves.
23768 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23769 it until move of such size is supported. */
23770 move_mode = GET_MODE (promoted_val);
23771 if (move_mode == VOIDmode)
23772 move_mode = QImode;
23773 if (size_to_move < GET_MODE_SIZE (move_mode))
23774 {
23775 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23776 promoted_val = gen_lowpart (move_mode, promoted_val);
23777 }
23778 piece_size = GET_MODE_SIZE (move_mode);
23779 code = optab_handler (mov_optab, move_mode);
23780 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23781
23782 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23783
23784 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23785 gcc_assert (size_to_move % piece_size == 0);
23786 adjust = GEN_INT (piece_size);
23787 for (i = 0; i < size_to_move; i += piece_size)
23788 {
23789 if (piece_size <= GET_MODE_SIZE (word_mode))
23790 {
23791 emit_insn (gen_strset (destptr, dst, promoted_val));
23792 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23793 piece_size);
23794 continue;
23795 }
23796
23797 emit_insn (GEN_FCN (code) (dst, promoted_val));
23798
23799 emit_move_insn (destptr,
23800 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23801
23802 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23803 piece_size);
23804 }
23805
23806 /* Update DST rtx. */
23807 return dst;
23808 }
23809 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23810 static void
23811 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23812 rtx count, int max_size)
23813 {
23814 count =
23815 expand_simple_binop (counter_mode (count), AND, count,
23816 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23817 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23818 gen_lowpart (QImode, value), count, QImode,
23819 1, max_size / 2, true);
23820 }
23821
23822 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23823 static void
23824 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23825 rtx count, int max_size)
23826 {
23827 rtx dest;
23828
23829 if (CONST_INT_P (count))
23830 {
23831 HOST_WIDE_INT countval = INTVAL (count);
23832 HOST_WIDE_INT epilogue_size = countval % max_size;
23833 int i;
23834
23835 /* For now MAX_SIZE should be a power of 2. This assert could be
23836 relaxed, but it'll require a bit more complicated epilogue
23837 expanding. */
23838 gcc_assert ((max_size & (max_size - 1)) == 0);
23839 for (i = max_size; i >= 1; i >>= 1)
23840 {
23841 if (epilogue_size & i)
23842 {
23843 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23844 destmem = emit_memset (destmem, destptr, vec_value, i);
23845 else
23846 destmem = emit_memset (destmem, destptr, value, i);
23847 }
23848 }
23849 return;
23850 }
23851 if (max_size > 32)
23852 {
23853 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23854 return;
23855 }
23856 if (max_size > 16)
23857 {
23858 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23859 if (TARGET_64BIT)
23860 {
23861 dest = change_address (destmem, DImode, destptr);
23862 emit_insn (gen_strset (destptr, dest, value));
23863 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23864 emit_insn (gen_strset (destptr, dest, value));
23865 }
23866 else
23867 {
23868 dest = change_address (destmem, SImode, destptr);
23869 emit_insn (gen_strset (destptr, dest, value));
23870 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23871 emit_insn (gen_strset (destptr, dest, value));
23872 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23873 emit_insn (gen_strset (destptr, dest, value));
23874 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23875 emit_insn (gen_strset (destptr, dest, value));
23876 }
23877 emit_label (label);
23878 LABEL_NUSES (label) = 1;
23879 }
23880 if (max_size > 8)
23881 {
23882 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23883 if (TARGET_64BIT)
23884 {
23885 dest = change_address (destmem, DImode, destptr);
23886 emit_insn (gen_strset (destptr, dest, value));
23887 }
23888 else
23889 {
23890 dest = change_address (destmem, SImode, destptr);
23891 emit_insn (gen_strset (destptr, dest, value));
23892 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23893 emit_insn (gen_strset (destptr, dest, value));
23894 }
23895 emit_label (label);
23896 LABEL_NUSES (label) = 1;
23897 }
23898 if (max_size > 4)
23899 {
23900 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23901 dest = change_address (destmem, SImode, destptr);
23902 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23903 emit_label (label);
23904 LABEL_NUSES (label) = 1;
23905 }
23906 if (max_size > 2)
23907 {
23908 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23909 dest = change_address (destmem, HImode, destptr);
23910 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23911 emit_label (label);
23912 LABEL_NUSES (label) = 1;
23913 }
23914 if (max_size > 1)
23915 {
23916 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23917 dest = change_address (destmem, QImode, destptr);
23918 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23919 emit_label (label);
23920 LABEL_NUSES (label) = 1;
23921 }
23922 }
23923
23924 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23925 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23926 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23927 ignored.
23928 Return value is updated DESTMEM. */
23929 static rtx
23930 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23931 rtx destptr, rtx srcptr, rtx value,
23932 rtx vec_value, rtx count, int align,
23933 int desired_alignment, bool issetmem)
23934 {
23935 int i;
23936 for (i = 1; i < desired_alignment; i <<= 1)
23937 {
23938 if (align <= i)
23939 {
23940 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23941 if (issetmem)
23942 {
23943 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23944 destmem = emit_memset (destmem, destptr, vec_value, i);
23945 else
23946 destmem = emit_memset (destmem, destptr, value, i);
23947 }
23948 else
23949 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23950 ix86_adjust_counter (count, i);
23951 emit_label (label);
23952 LABEL_NUSES (label) = 1;
23953 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23954 }
23955 }
23956 return destmem;
23957 }
23958
23959 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23960 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23961 and jump to DONE_LABEL. */
23962 static void
23963 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23964 rtx destptr, rtx srcptr,
23965 rtx value, rtx vec_value,
23966 rtx count, int size,
23967 rtx done_label, bool issetmem)
23968 {
23969 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23970 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23971 rtx modesize;
23972 int n;
23973
23974 /* If we do not have vector value to copy, we must reduce size. */
23975 if (issetmem)
23976 {
23977 if (!vec_value)
23978 {
23979 if (GET_MODE (value) == VOIDmode && size > 8)
23980 mode = Pmode;
23981 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23982 mode = GET_MODE (value);
23983 }
23984 else
23985 mode = GET_MODE (vec_value), value = vec_value;
23986 }
23987 else
23988 {
23989 /* Choose appropriate vector mode. */
23990 if (size >= 32)
23991 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23992 else if (size >= 16)
23993 mode = TARGET_SSE ? V16QImode : DImode;
23994 srcmem = change_address (srcmem, mode, srcptr);
23995 }
23996 destmem = change_address (destmem, mode, destptr);
23997 modesize = GEN_INT (GET_MODE_SIZE (mode));
23998 gcc_assert (GET_MODE_SIZE (mode) <= size);
23999 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24000 {
24001 if (issetmem)
24002 emit_move_insn (destmem, gen_lowpart (mode, value));
24003 else
24004 {
24005 emit_move_insn (destmem, srcmem);
24006 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24007 }
24008 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24009 }
24010
24011 destmem = offset_address (destmem, count, 1);
24012 destmem = offset_address (destmem, GEN_INT (-2 * size),
24013 GET_MODE_SIZE (mode));
24014 if (!issetmem)
24015 {
24016 srcmem = offset_address (srcmem, count, 1);
24017 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24018 GET_MODE_SIZE (mode));
24019 }
24020 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24021 {
24022 if (issetmem)
24023 emit_move_insn (destmem, gen_lowpart (mode, value));
24024 else
24025 {
24026 emit_move_insn (destmem, srcmem);
24027 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24028 }
24029 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24030 }
24031 emit_jump_insn (gen_jump (done_label));
24032 emit_barrier ();
24033
24034 emit_label (label);
24035 LABEL_NUSES (label) = 1;
24036 }
24037
24038 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24039 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24040 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24041 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24042 DONE_LABEL is a label after the whole copying sequence. The label is created
24043 on demand if *DONE_LABEL is NULL.
24044 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24045 bounds after the initial copies.
24046
24047 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24048 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24049 we will dispatch to a library call for large blocks.
24050
24051 In pseudocode we do:
24052
24053 if (COUNT < SIZE)
24054 {
24055 Assume that SIZE is 4. Bigger sizes are handled analogously
24056 if (COUNT & 4)
24057 {
24058 copy 4 bytes from SRCPTR to DESTPTR
24059 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24060 goto done_label
24061 }
24062 if (!COUNT)
24063 goto done_label;
24064 copy 1 byte from SRCPTR to DESTPTR
24065 if (COUNT & 2)
24066 {
24067 copy 2 bytes from SRCPTR to DESTPTR
24068 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24069 }
24070 }
24071 else
24072 {
24073 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24074 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24075
24076 OLD_DESPTR = DESTPTR;
24077 Align DESTPTR up to DESIRED_ALIGN
24078 SRCPTR += DESTPTR - OLD_DESTPTR
24079 COUNT -= DEST_PTR - OLD_DESTPTR
24080 if (DYNAMIC_CHECK)
24081 Round COUNT down to multiple of SIZE
24082 << optional caller supplied zero size guard is here >>
24083 << optional caller suppplied dynamic check is here >>
24084 << caller supplied main copy loop is here >>
24085 }
24086 done_label:
24087 */
24088 static void
24089 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24090 rtx *destptr, rtx *srcptr,
24091 machine_mode mode,
24092 rtx value, rtx vec_value,
24093 rtx *count,
24094 rtx_code_label **done_label,
24095 int size,
24096 int desired_align,
24097 int align,
24098 unsigned HOST_WIDE_INT *min_size,
24099 bool dynamic_check,
24100 bool issetmem)
24101 {
24102 rtx_code_label *loop_label = NULL, *label;
24103 int n;
24104 rtx modesize;
24105 int prolog_size = 0;
24106 rtx mode_value;
24107
24108 /* Chose proper value to copy. */
24109 if (issetmem && VECTOR_MODE_P (mode))
24110 mode_value = vec_value;
24111 else
24112 mode_value = value;
24113 gcc_assert (GET_MODE_SIZE (mode) <= size);
24114
24115 /* See if block is big or small, handle small blocks. */
24116 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24117 {
24118 int size2 = size;
24119 loop_label = gen_label_rtx ();
24120
24121 if (!*done_label)
24122 *done_label = gen_label_rtx ();
24123
24124 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24125 1, loop_label);
24126 size2 >>= 1;
24127
24128 /* Handle sizes > 3. */
24129 for (;size2 > 2; size2 >>= 1)
24130 expand_small_movmem_or_setmem (destmem, srcmem,
24131 *destptr, *srcptr,
24132 value, vec_value,
24133 *count,
24134 size2, *done_label, issetmem);
24135 /* Nothing to copy? Jump to DONE_LABEL if so */
24136 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24137 1, *done_label);
24138
24139 /* Do a byte copy. */
24140 destmem = change_address (destmem, QImode, *destptr);
24141 if (issetmem)
24142 emit_move_insn (destmem, gen_lowpart (QImode, value));
24143 else
24144 {
24145 srcmem = change_address (srcmem, QImode, *srcptr);
24146 emit_move_insn (destmem, srcmem);
24147 }
24148
24149 /* Handle sizes 2 and 3. */
24150 label = ix86_expand_aligntest (*count, 2, false);
24151 destmem = change_address (destmem, HImode, *destptr);
24152 destmem = offset_address (destmem, *count, 1);
24153 destmem = offset_address (destmem, GEN_INT (-2), 2);
24154 if (issetmem)
24155 emit_move_insn (destmem, gen_lowpart (HImode, value));
24156 else
24157 {
24158 srcmem = change_address (srcmem, HImode, *srcptr);
24159 srcmem = offset_address (srcmem, *count, 1);
24160 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24161 emit_move_insn (destmem, srcmem);
24162 }
24163
24164 emit_label (label);
24165 LABEL_NUSES (label) = 1;
24166 emit_jump_insn (gen_jump (*done_label));
24167 emit_barrier ();
24168 }
24169 else
24170 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24171 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24172
24173 /* Start memcpy for COUNT >= SIZE. */
24174 if (loop_label)
24175 {
24176 emit_label (loop_label);
24177 LABEL_NUSES (loop_label) = 1;
24178 }
24179
24180 /* Copy first desired_align bytes. */
24181 if (!issetmem)
24182 srcmem = change_address (srcmem, mode, *srcptr);
24183 destmem = change_address (destmem, mode, *destptr);
24184 modesize = GEN_INT (GET_MODE_SIZE (mode));
24185 for (n = 0; prolog_size < desired_align - align; n++)
24186 {
24187 if (issetmem)
24188 emit_move_insn (destmem, mode_value);
24189 else
24190 {
24191 emit_move_insn (destmem, srcmem);
24192 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24193 }
24194 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24195 prolog_size += GET_MODE_SIZE (mode);
24196 }
24197
24198
24199 /* Copy last SIZE bytes. */
24200 destmem = offset_address (destmem, *count, 1);
24201 destmem = offset_address (destmem,
24202 GEN_INT (-size - prolog_size),
24203 1);
24204 if (issetmem)
24205 emit_move_insn (destmem, mode_value);
24206 else
24207 {
24208 srcmem = offset_address (srcmem, *count, 1);
24209 srcmem = offset_address (srcmem,
24210 GEN_INT (-size - prolog_size),
24211 1);
24212 emit_move_insn (destmem, srcmem);
24213 }
24214 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24215 {
24216 destmem = offset_address (destmem, modesize, 1);
24217 if (issetmem)
24218 emit_move_insn (destmem, mode_value);
24219 else
24220 {
24221 srcmem = offset_address (srcmem, modesize, 1);
24222 emit_move_insn (destmem, srcmem);
24223 }
24224 }
24225
24226 /* Align destination. */
24227 if (desired_align > 1 && desired_align > align)
24228 {
24229 rtx saveddest = *destptr;
24230
24231 gcc_assert (desired_align <= size);
24232 /* Align destptr up, place it to new register. */
24233 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24234 GEN_INT (prolog_size),
24235 NULL_RTX, 1, OPTAB_DIRECT);
24236 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24237 GEN_INT (-desired_align),
24238 *destptr, 1, OPTAB_DIRECT);
24239 /* See how many bytes we skipped. */
24240 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24241 *destptr,
24242 saveddest, 1, OPTAB_DIRECT);
24243 /* Adjust srcptr and count. */
24244 if (!issetmem)
24245 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24246 *srcptr, 1, OPTAB_DIRECT);
24247 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24248 saveddest, *count, 1, OPTAB_DIRECT);
24249 /* We copied at most size + prolog_size. */
24250 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24251 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24252 else
24253 *min_size = 0;
24254
24255 /* Our loops always round down the bock size, but for dispatch to library
24256 we need precise value. */
24257 if (dynamic_check)
24258 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24259 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24260 }
24261 else
24262 {
24263 gcc_assert (prolog_size == 0);
24264 /* Decrease count, so we won't end up copying last word twice. */
24265 if (!CONST_INT_P (*count))
24266 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24267 constm1_rtx, *count, 1, OPTAB_DIRECT);
24268 else
24269 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24270 if (*min_size)
24271 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24272 }
24273 }
24274
24275
24276 /* This function is like the previous one, except here we know how many bytes
24277 need to be copied. That allows us to update alignment not only of DST, which
24278 is returned, but also of SRC, which is passed as a pointer for that
24279 reason. */
24280 static rtx
24281 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24282 rtx srcreg, rtx value, rtx vec_value,
24283 int desired_align, int align_bytes,
24284 bool issetmem)
24285 {
24286 rtx src = NULL;
24287 rtx orig_dst = dst;
24288 rtx orig_src = NULL;
24289 int piece_size = 1;
24290 int copied_bytes = 0;
24291
24292 if (!issetmem)
24293 {
24294 gcc_assert (srcp != NULL);
24295 src = *srcp;
24296 orig_src = src;
24297 }
24298
24299 for (piece_size = 1;
24300 piece_size <= desired_align && copied_bytes < align_bytes;
24301 piece_size <<= 1)
24302 {
24303 if (align_bytes & piece_size)
24304 {
24305 if (issetmem)
24306 {
24307 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24308 dst = emit_memset (dst, destreg, vec_value, piece_size);
24309 else
24310 dst = emit_memset (dst, destreg, value, piece_size);
24311 }
24312 else
24313 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24314 copied_bytes += piece_size;
24315 }
24316 }
24317 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24318 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24319 if (MEM_SIZE_KNOWN_P (orig_dst))
24320 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24321
24322 if (!issetmem)
24323 {
24324 int src_align_bytes = get_mem_align_offset (src, desired_align
24325 * BITS_PER_UNIT);
24326 if (src_align_bytes >= 0)
24327 src_align_bytes = desired_align - src_align_bytes;
24328 if (src_align_bytes >= 0)
24329 {
24330 unsigned int src_align;
24331 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24332 {
24333 if ((src_align_bytes & (src_align - 1))
24334 == (align_bytes & (src_align - 1)))
24335 break;
24336 }
24337 if (src_align > (unsigned int) desired_align)
24338 src_align = desired_align;
24339 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24340 set_mem_align (src, src_align * BITS_PER_UNIT);
24341 }
24342 if (MEM_SIZE_KNOWN_P (orig_src))
24343 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24344 *srcp = src;
24345 }
24346
24347 return dst;
24348 }
24349
24350 /* Return true if ALG can be used in current context.
24351 Assume we expand memset if MEMSET is true. */
24352 static bool
24353 alg_usable_p (enum stringop_alg alg, bool memset)
24354 {
24355 if (alg == no_stringop)
24356 return false;
24357 if (alg == vector_loop)
24358 return TARGET_SSE || TARGET_AVX;
24359 /* Algorithms using the rep prefix want at least edi and ecx;
24360 additionally, memset wants eax and memcpy wants esi. Don't
24361 consider such algorithms if the user has appropriated those
24362 registers for their own purposes. */
24363 if (alg == rep_prefix_1_byte
24364 || alg == rep_prefix_4_byte
24365 || alg == rep_prefix_8_byte)
24366 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24367 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24368 return true;
24369 }
24370
24371 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24372 static enum stringop_alg
24373 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24374 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24375 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24376 {
24377 const struct stringop_algs * algs;
24378 bool optimize_for_speed;
24379 int max = 0;
24380 const struct processor_costs *cost;
24381 int i;
24382 bool any_alg_usable_p = false;
24383
24384 *noalign = false;
24385 *dynamic_check = -1;
24386
24387 /* Even if the string operation call is cold, we still might spend a lot
24388 of time processing large blocks. */
24389 if (optimize_function_for_size_p (cfun)
24390 || (optimize_insn_for_size_p ()
24391 && (max_size < 256
24392 || (expected_size != -1 && expected_size < 256))))
24393 optimize_for_speed = false;
24394 else
24395 optimize_for_speed = true;
24396
24397 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24398 if (memset)
24399 algs = &cost->memset[TARGET_64BIT != 0];
24400 else
24401 algs = &cost->memcpy[TARGET_64BIT != 0];
24402
24403 /* See maximal size for user defined algorithm. */
24404 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24405 {
24406 enum stringop_alg candidate = algs->size[i].alg;
24407 bool usable = alg_usable_p (candidate, memset);
24408 any_alg_usable_p |= usable;
24409
24410 if (candidate != libcall && candidate && usable)
24411 max = algs->size[i].max;
24412 }
24413
24414 /* If expected size is not known but max size is small enough
24415 so inline version is a win, set expected size into
24416 the range. */
24417 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24418 && expected_size == -1)
24419 expected_size = min_size / 2 + max_size / 2;
24420
24421 /* If user specified the algorithm, honnor it if possible. */
24422 if (ix86_stringop_alg != no_stringop
24423 && alg_usable_p (ix86_stringop_alg, memset))
24424 return ix86_stringop_alg;
24425 /* rep; movq or rep; movl is the smallest variant. */
24426 else if (!optimize_for_speed)
24427 {
24428 *noalign = true;
24429 if (!count || (count & 3) || (memset && !zero_memset))
24430 return alg_usable_p (rep_prefix_1_byte, memset)
24431 ? rep_prefix_1_byte : loop_1_byte;
24432 else
24433 return alg_usable_p (rep_prefix_4_byte, memset)
24434 ? rep_prefix_4_byte : loop;
24435 }
24436 /* Very tiny blocks are best handled via the loop, REP is expensive to
24437 setup. */
24438 else if (expected_size != -1 && expected_size < 4)
24439 return loop_1_byte;
24440 else if (expected_size != -1)
24441 {
24442 enum stringop_alg alg = libcall;
24443 bool alg_noalign = false;
24444 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24445 {
24446 /* We get here if the algorithms that were not libcall-based
24447 were rep-prefix based and we are unable to use rep prefixes
24448 based on global register usage. Break out of the loop and
24449 use the heuristic below. */
24450 if (algs->size[i].max == 0)
24451 break;
24452 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24453 {
24454 enum stringop_alg candidate = algs->size[i].alg;
24455
24456 if (candidate != libcall && alg_usable_p (candidate, memset))
24457 {
24458 alg = candidate;
24459 alg_noalign = algs->size[i].noalign;
24460 }
24461 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24462 last non-libcall inline algorithm. */
24463 if (TARGET_INLINE_ALL_STRINGOPS)
24464 {
24465 /* When the current size is best to be copied by a libcall,
24466 but we are still forced to inline, run the heuristic below
24467 that will pick code for medium sized blocks. */
24468 if (alg != libcall)
24469 {
24470 *noalign = alg_noalign;
24471 return alg;
24472 }
24473 break;
24474 }
24475 else if (alg_usable_p (candidate, memset))
24476 {
24477 *noalign = algs->size[i].noalign;
24478 return candidate;
24479 }
24480 }
24481 }
24482 }
24483 /* When asked to inline the call anyway, try to pick meaningful choice.
24484 We look for maximal size of block that is faster to copy by hand and
24485 take blocks of at most of that size guessing that average size will
24486 be roughly half of the block.
24487
24488 If this turns out to be bad, we might simply specify the preferred
24489 choice in ix86_costs. */
24490 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24491 && (algs->unknown_size == libcall
24492 || !alg_usable_p (algs->unknown_size, memset)))
24493 {
24494 enum stringop_alg alg;
24495
24496 /* If there aren't any usable algorithms, then recursing on
24497 smaller sizes isn't going to find anything. Just return the
24498 simple byte-at-a-time copy loop. */
24499 if (!any_alg_usable_p)
24500 {
24501 /* Pick something reasonable. */
24502 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24503 *dynamic_check = 128;
24504 return loop_1_byte;
24505 }
24506 if (max <= 0)
24507 max = 4096;
24508 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24509 zero_memset, dynamic_check, noalign);
24510 gcc_assert (*dynamic_check == -1);
24511 gcc_assert (alg != libcall);
24512 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24513 *dynamic_check = max;
24514 return alg;
24515 }
24516 return (alg_usable_p (algs->unknown_size, memset)
24517 ? algs->unknown_size : libcall);
24518 }
24519
24520 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24521 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24522 static int
24523 decide_alignment (int align,
24524 enum stringop_alg alg,
24525 int expected_size,
24526 machine_mode move_mode)
24527 {
24528 int desired_align = 0;
24529
24530 gcc_assert (alg != no_stringop);
24531
24532 if (alg == libcall)
24533 return 0;
24534 if (move_mode == VOIDmode)
24535 return 0;
24536
24537 desired_align = GET_MODE_SIZE (move_mode);
24538 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24539 copying whole cacheline at once. */
24540 if (TARGET_PENTIUMPRO
24541 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24542 desired_align = 8;
24543
24544 if (optimize_size)
24545 desired_align = 1;
24546 if (desired_align < align)
24547 desired_align = align;
24548 if (expected_size != -1 && expected_size < 4)
24549 desired_align = align;
24550
24551 return desired_align;
24552 }
24553
24554
24555 /* Helper function for memcpy. For QImode value 0xXY produce
24556 0xXYXYXYXY of wide specified by MODE. This is essentially
24557 a * 0x10101010, but we can do slightly better than
24558 synth_mult by unwinding the sequence by hand on CPUs with
24559 slow multiply. */
24560 static rtx
24561 promote_duplicated_reg (machine_mode mode, rtx val)
24562 {
24563 machine_mode valmode = GET_MODE (val);
24564 rtx tmp;
24565 int nops = mode == DImode ? 3 : 2;
24566
24567 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24568 if (val == const0_rtx)
24569 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24570 if (CONST_INT_P (val))
24571 {
24572 HOST_WIDE_INT v = INTVAL (val) & 255;
24573
24574 v |= v << 8;
24575 v |= v << 16;
24576 if (mode == DImode)
24577 v |= (v << 16) << 16;
24578 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24579 }
24580
24581 if (valmode == VOIDmode)
24582 valmode = QImode;
24583 if (valmode != QImode)
24584 val = gen_lowpart (QImode, val);
24585 if (mode == QImode)
24586 return val;
24587 if (!TARGET_PARTIAL_REG_STALL)
24588 nops--;
24589 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24590 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24591 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24592 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24593 {
24594 rtx reg = convert_modes (mode, QImode, val, true);
24595 tmp = promote_duplicated_reg (mode, const1_rtx);
24596 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24597 OPTAB_DIRECT);
24598 }
24599 else
24600 {
24601 rtx reg = convert_modes (mode, QImode, val, true);
24602
24603 if (!TARGET_PARTIAL_REG_STALL)
24604 if (mode == SImode)
24605 emit_insn (gen_movsi_insv_1 (reg, reg));
24606 else
24607 emit_insn (gen_movdi_insv_1 (reg, reg));
24608 else
24609 {
24610 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24611 NULL, 1, OPTAB_DIRECT);
24612 reg =
24613 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24614 }
24615 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24616 NULL, 1, OPTAB_DIRECT);
24617 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24618 if (mode == SImode)
24619 return reg;
24620 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24621 NULL, 1, OPTAB_DIRECT);
24622 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24623 return reg;
24624 }
24625 }
24626
24627 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24628 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24629 alignment from ALIGN to DESIRED_ALIGN. */
24630 static rtx
24631 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24632 int align)
24633 {
24634 rtx promoted_val;
24635
24636 if (TARGET_64BIT
24637 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24638 promoted_val = promote_duplicated_reg (DImode, val);
24639 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24640 promoted_val = promote_duplicated_reg (SImode, val);
24641 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24642 promoted_val = promote_duplicated_reg (HImode, val);
24643 else
24644 promoted_val = val;
24645
24646 return promoted_val;
24647 }
24648
24649 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24650 operations when profitable. The code depends upon architecture, block size
24651 and alignment, but always has one of the following overall structures:
24652
24653 Aligned move sequence:
24654
24655 1) Prologue guard: Conditional that jumps up to epilogues for small
24656 blocks that can be handled by epilogue alone. This is faster
24657 but also needed for correctness, since prologue assume the block
24658 is larger than the desired alignment.
24659
24660 Optional dynamic check for size and libcall for large
24661 blocks is emitted here too, with -minline-stringops-dynamically.
24662
24663 2) Prologue: copy first few bytes in order to get destination
24664 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24665 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24666 copied. We emit either a jump tree on power of two sized
24667 blocks, or a byte loop.
24668
24669 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24670 with specified algorithm.
24671
24672 4) Epilogue: code copying tail of the block that is too small to be
24673 handled by main body (or up to size guarded by prologue guard).
24674
24675 Misaligned move sequence
24676
24677 1) missaligned move prologue/epilogue containing:
24678 a) Prologue handling small memory blocks and jumping to done_label
24679 (skipped if blocks are known to be large enough)
24680 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24681 needed by single possibly misaligned move
24682 (skipped if alignment is not needed)
24683 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24684
24685 2) Zero size guard dispatching to done_label, if needed
24686
24687 3) dispatch to library call, if needed,
24688
24689 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24690 with specified algorithm. */
24691 bool
24692 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24693 rtx align_exp, rtx expected_align_exp,
24694 rtx expected_size_exp, rtx min_size_exp,
24695 rtx max_size_exp, rtx probable_max_size_exp,
24696 bool issetmem)
24697 {
24698 rtx destreg;
24699 rtx srcreg = NULL;
24700 rtx_code_label *label = NULL;
24701 rtx tmp;
24702 rtx_code_label *jump_around_label = NULL;
24703 HOST_WIDE_INT align = 1;
24704 unsigned HOST_WIDE_INT count = 0;
24705 HOST_WIDE_INT expected_size = -1;
24706 int size_needed = 0, epilogue_size_needed;
24707 int desired_align = 0, align_bytes = 0;
24708 enum stringop_alg alg;
24709 rtx promoted_val = NULL;
24710 rtx vec_promoted_val = NULL;
24711 bool force_loopy_epilogue = false;
24712 int dynamic_check;
24713 bool need_zero_guard = false;
24714 bool noalign;
24715 machine_mode move_mode = VOIDmode;
24716 int unroll_factor = 1;
24717 /* TODO: Once value ranges are available, fill in proper data. */
24718 unsigned HOST_WIDE_INT min_size = 0;
24719 unsigned HOST_WIDE_INT max_size = -1;
24720 unsigned HOST_WIDE_INT probable_max_size = -1;
24721 bool misaligned_prologue_used = false;
24722
24723 if (CONST_INT_P (align_exp))
24724 align = INTVAL (align_exp);
24725 /* i386 can do misaligned access on reasonably increased cost. */
24726 if (CONST_INT_P (expected_align_exp)
24727 && INTVAL (expected_align_exp) > align)
24728 align = INTVAL (expected_align_exp);
24729 /* ALIGN is the minimum of destination and source alignment, but we care here
24730 just about destination alignment. */
24731 else if (!issetmem
24732 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24733 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24734
24735 if (CONST_INT_P (count_exp))
24736 {
24737 min_size = max_size = probable_max_size = count = expected_size
24738 = INTVAL (count_exp);
24739 /* When COUNT is 0, there is nothing to do. */
24740 if (!count)
24741 return true;
24742 }
24743 else
24744 {
24745 if (min_size_exp)
24746 min_size = INTVAL (min_size_exp);
24747 if (max_size_exp)
24748 max_size = INTVAL (max_size_exp);
24749 if (probable_max_size_exp)
24750 probable_max_size = INTVAL (probable_max_size_exp);
24751 if (CONST_INT_P (expected_size_exp))
24752 expected_size = INTVAL (expected_size_exp);
24753 }
24754
24755 /* Make sure we don't need to care about overflow later on. */
24756 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24757 return false;
24758
24759 /* Step 0: Decide on preferred algorithm, desired alignment and
24760 size of chunks to be copied by main loop. */
24761 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24762 issetmem,
24763 issetmem && val_exp == const0_rtx,
24764 &dynamic_check, &noalign);
24765 if (alg == libcall)
24766 return false;
24767 gcc_assert (alg != no_stringop);
24768
24769 /* For now vector-version of memset is generated only for memory zeroing, as
24770 creating of promoted vector value is very cheap in this case. */
24771 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24772 alg = unrolled_loop;
24773
24774 if (!count)
24775 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24776 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24777 if (!issetmem)
24778 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24779
24780 unroll_factor = 1;
24781 move_mode = word_mode;
24782 switch (alg)
24783 {
24784 case libcall:
24785 case no_stringop:
24786 case last_alg:
24787 gcc_unreachable ();
24788 case loop_1_byte:
24789 need_zero_guard = true;
24790 move_mode = QImode;
24791 break;
24792 case loop:
24793 need_zero_guard = true;
24794 break;
24795 case unrolled_loop:
24796 need_zero_guard = true;
24797 unroll_factor = (TARGET_64BIT ? 4 : 2);
24798 break;
24799 case vector_loop:
24800 need_zero_guard = true;
24801 unroll_factor = 4;
24802 /* Find the widest supported mode. */
24803 move_mode = word_mode;
24804 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24805 != CODE_FOR_nothing)
24806 move_mode = GET_MODE_WIDER_MODE (move_mode);
24807
24808 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24809 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24810 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24811 {
24812 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24813 move_mode = mode_for_vector (word_mode, nunits);
24814 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24815 move_mode = word_mode;
24816 }
24817 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24818 break;
24819 case rep_prefix_8_byte:
24820 move_mode = DImode;
24821 break;
24822 case rep_prefix_4_byte:
24823 move_mode = SImode;
24824 break;
24825 case rep_prefix_1_byte:
24826 move_mode = QImode;
24827 break;
24828 }
24829 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24830 epilogue_size_needed = size_needed;
24831
24832 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24833 if (!TARGET_ALIGN_STRINGOPS || noalign)
24834 align = desired_align;
24835
24836 /* Step 1: Prologue guard. */
24837
24838 /* Alignment code needs count to be in register. */
24839 if (CONST_INT_P (count_exp) && desired_align > align)
24840 {
24841 if (INTVAL (count_exp) > desired_align
24842 && INTVAL (count_exp) > size_needed)
24843 {
24844 align_bytes
24845 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24846 if (align_bytes <= 0)
24847 align_bytes = 0;
24848 else
24849 align_bytes = desired_align - align_bytes;
24850 }
24851 if (align_bytes == 0)
24852 count_exp = force_reg (counter_mode (count_exp), count_exp);
24853 }
24854 gcc_assert (desired_align >= 1 && align >= 1);
24855
24856 /* Misaligned move sequences handle both prologue and epilogue at once.
24857 Default code generation results in a smaller code for large alignments
24858 and also avoids redundant job when sizes are known precisely. */
24859 misaligned_prologue_used
24860 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24861 && MAX (desired_align, epilogue_size_needed) <= 32
24862 && desired_align <= epilogue_size_needed
24863 && ((desired_align > align && !align_bytes)
24864 || (!count && epilogue_size_needed > 1)));
24865
24866 /* Do the cheap promotion to allow better CSE across the
24867 main loop and epilogue (ie one load of the big constant in the
24868 front of all code.
24869 For now the misaligned move sequences do not have fast path
24870 without broadcasting. */
24871 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24872 {
24873 if (alg == vector_loop)
24874 {
24875 gcc_assert (val_exp == const0_rtx);
24876 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24877 promoted_val = promote_duplicated_reg_to_size (val_exp,
24878 GET_MODE_SIZE (word_mode),
24879 desired_align, align);
24880 }
24881 else
24882 {
24883 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24884 desired_align, align);
24885 }
24886 }
24887 /* Misaligned move sequences handles both prologues and epilogues at once.
24888 Default code generation results in smaller code for large alignments and
24889 also avoids redundant job when sizes are known precisely. */
24890 if (misaligned_prologue_used)
24891 {
24892 /* Misaligned move prologue handled small blocks by itself. */
24893 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24894 (dst, src, &destreg, &srcreg,
24895 move_mode, promoted_val, vec_promoted_val,
24896 &count_exp,
24897 &jump_around_label,
24898 desired_align < align
24899 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24900 desired_align, align, &min_size, dynamic_check, issetmem);
24901 if (!issetmem)
24902 src = change_address (src, BLKmode, srcreg);
24903 dst = change_address (dst, BLKmode, destreg);
24904 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24905 epilogue_size_needed = 0;
24906 if (need_zero_guard && !min_size)
24907 {
24908 /* It is possible that we copied enough so the main loop will not
24909 execute. */
24910 gcc_assert (size_needed > 1);
24911 if (jump_around_label == NULL_RTX)
24912 jump_around_label = gen_label_rtx ();
24913 emit_cmp_and_jump_insns (count_exp,
24914 GEN_INT (size_needed),
24915 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24916 if (expected_size == -1
24917 || expected_size < (desired_align - align) / 2 + size_needed)
24918 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24919 else
24920 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24921 }
24922 }
24923 /* Ensure that alignment prologue won't copy past end of block. */
24924 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24925 {
24926 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24927 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24928 Make sure it is power of 2. */
24929 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24930
24931 /* To improve performance of small blocks, we jump around the VAL
24932 promoting mode. This mean that if the promoted VAL is not constant,
24933 we might not use it in the epilogue and have to use byte
24934 loop variant. */
24935 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24936 force_loopy_epilogue = true;
24937 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24938 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24939 {
24940 /* If main algorithm works on QImode, no epilogue is needed.
24941 For small sizes just don't align anything. */
24942 if (size_needed == 1)
24943 desired_align = align;
24944 else
24945 goto epilogue;
24946 }
24947 else if (!count
24948 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24949 {
24950 label = gen_label_rtx ();
24951 emit_cmp_and_jump_insns (count_exp,
24952 GEN_INT (epilogue_size_needed),
24953 LTU, 0, counter_mode (count_exp), 1, label);
24954 if (expected_size == -1 || expected_size < epilogue_size_needed)
24955 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24956 else
24957 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24958 }
24959 }
24960
24961 /* Emit code to decide on runtime whether library call or inline should be
24962 used. */
24963 if (dynamic_check != -1)
24964 {
24965 if (!issetmem && CONST_INT_P (count_exp))
24966 {
24967 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24968 {
24969 emit_block_move_via_libcall (dst, src, count_exp, false);
24970 count_exp = const0_rtx;
24971 goto epilogue;
24972 }
24973 }
24974 else
24975 {
24976 rtx_code_label *hot_label = gen_label_rtx ();
24977 if (jump_around_label == NULL_RTX)
24978 jump_around_label = gen_label_rtx ();
24979 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24980 LEU, 0, counter_mode (count_exp),
24981 1, hot_label);
24982 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24983 if (issetmem)
24984 set_storage_via_libcall (dst, count_exp, val_exp, false);
24985 else
24986 emit_block_move_via_libcall (dst, src, count_exp, false);
24987 emit_jump (jump_around_label);
24988 emit_label (hot_label);
24989 }
24990 }
24991
24992 /* Step 2: Alignment prologue. */
24993 /* Do the expensive promotion once we branched off the small blocks. */
24994 if (issetmem && !promoted_val)
24995 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24996 desired_align, align);
24997
24998 if (desired_align > align && !misaligned_prologue_used)
24999 {
25000 if (align_bytes == 0)
25001 {
25002 /* Except for the first move in prologue, we no longer know
25003 constant offset in aliasing info. It don't seems to worth
25004 the pain to maintain it for the first move, so throw away
25005 the info early. */
25006 dst = change_address (dst, BLKmode, destreg);
25007 if (!issetmem)
25008 src = change_address (src, BLKmode, srcreg);
25009 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25010 promoted_val, vec_promoted_val,
25011 count_exp, align, desired_align,
25012 issetmem);
25013 /* At most desired_align - align bytes are copied. */
25014 if (min_size < (unsigned)(desired_align - align))
25015 min_size = 0;
25016 else
25017 min_size -= desired_align - align;
25018 }
25019 else
25020 {
25021 /* If we know how many bytes need to be stored before dst is
25022 sufficiently aligned, maintain aliasing info accurately. */
25023 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25024 srcreg,
25025 promoted_val,
25026 vec_promoted_val,
25027 desired_align,
25028 align_bytes,
25029 issetmem);
25030
25031 count_exp = plus_constant (counter_mode (count_exp),
25032 count_exp, -align_bytes);
25033 count -= align_bytes;
25034 min_size -= align_bytes;
25035 max_size -= align_bytes;
25036 }
25037 if (need_zero_guard
25038 && !min_size
25039 && (count < (unsigned HOST_WIDE_INT) size_needed
25040 || (align_bytes == 0
25041 && count < ((unsigned HOST_WIDE_INT) size_needed
25042 + desired_align - align))))
25043 {
25044 /* It is possible that we copied enough so the main loop will not
25045 execute. */
25046 gcc_assert (size_needed > 1);
25047 if (label == NULL_RTX)
25048 label = gen_label_rtx ();
25049 emit_cmp_and_jump_insns (count_exp,
25050 GEN_INT (size_needed),
25051 LTU, 0, counter_mode (count_exp), 1, label);
25052 if (expected_size == -1
25053 || expected_size < (desired_align - align) / 2 + size_needed)
25054 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25055 else
25056 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25057 }
25058 }
25059 if (label && size_needed == 1)
25060 {
25061 emit_label (label);
25062 LABEL_NUSES (label) = 1;
25063 label = NULL;
25064 epilogue_size_needed = 1;
25065 if (issetmem)
25066 promoted_val = val_exp;
25067 }
25068 else if (label == NULL_RTX && !misaligned_prologue_used)
25069 epilogue_size_needed = size_needed;
25070
25071 /* Step 3: Main loop. */
25072
25073 switch (alg)
25074 {
25075 case libcall:
25076 case no_stringop:
25077 case last_alg:
25078 gcc_unreachable ();
25079 case loop_1_byte:
25080 case loop:
25081 case unrolled_loop:
25082 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25083 count_exp, move_mode, unroll_factor,
25084 expected_size, issetmem);
25085 break;
25086 case vector_loop:
25087 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25088 vec_promoted_val, count_exp, move_mode,
25089 unroll_factor, expected_size, issetmem);
25090 break;
25091 case rep_prefix_8_byte:
25092 case rep_prefix_4_byte:
25093 case rep_prefix_1_byte:
25094 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25095 val_exp, count_exp, move_mode, issetmem);
25096 break;
25097 }
25098 /* Adjust properly the offset of src and dest memory for aliasing. */
25099 if (CONST_INT_P (count_exp))
25100 {
25101 if (!issetmem)
25102 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25103 (count / size_needed) * size_needed);
25104 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25105 (count / size_needed) * size_needed);
25106 }
25107 else
25108 {
25109 if (!issetmem)
25110 src = change_address (src, BLKmode, srcreg);
25111 dst = change_address (dst, BLKmode, destreg);
25112 }
25113
25114 /* Step 4: Epilogue to copy the remaining bytes. */
25115 epilogue:
25116 if (label)
25117 {
25118 /* When the main loop is done, COUNT_EXP might hold original count,
25119 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25120 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25121 bytes. Compensate if needed. */
25122
25123 if (size_needed < epilogue_size_needed)
25124 {
25125 tmp =
25126 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25127 GEN_INT (size_needed - 1), count_exp, 1,
25128 OPTAB_DIRECT);
25129 if (tmp != count_exp)
25130 emit_move_insn (count_exp, tmp);
25131 }
25132 emit_label (label);
25133 LABEL_NUSES (label) = 1;
25134 }
25135
25136 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25137 {
25138 if (force_loopy_epilogue)
25139 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25140 epilogue_size_needed);
25141 else
25142 {
25143 if (issetmem)
25144 expand_setmem_epilogue (dst, destreg, promoted_val,
25145 vec_promoted_val, count_exp,
25146 epilogue_size_needed);
25147 else
25148 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25149 epilogue_size_needed);
25150 }
25151 }
25152 if (jump_around_label)
25153 emit_label (jump_around_label);
25154 return true;
25155 }
25156
25157
25158 /* Expand the appropriate insns for doing strlen if not just doing
25159 repnz; scasb
25160
25161 out = result, initialized with the start address
25162 align_rtx = alignment of the address.
25163 scratch = scratch register, initialized with the startaddress when
25164 not aligned, otherwise undefined
25165
25166 This is just the body. It needs the initializations mentioned above and
25167 some address computing at the end. These things are done in i386.md. */
25168
25169 static void
25170 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25171 {
25172 int align;
25173 rtx tmp;
25174 rtx_code_label *align_2_label = NULL;
25175 rtx_code_label *align_3_label = NULL;
25176 rtx_code_label *align_4_label = gen_label_rtx ();
25177 rtx_code_label *end_0_label = gen_label_rtx ();
25178 rtx mem;
25179 rtx tmpreg = gen_reg_rtx (SImode);
25180 rtx scratch = gen_reg_rtx (SImode);
25181 rtx cmp;
25182
25183 align = 0;
25184 if (CONST_INT_P (align_rtx))
25185 align = INTVAL (align_rtx);
25186
25187 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25188
25189 /* Is there a known alignment and is it less than 4? */
25190 if (align < 4)
25191 {
25192 rtx scratch1 = gen_reg_rtx (Pmode);
25193 emit_move_insn (scratch1, out);
25194 /* Is there a known alignment and is it not 2? */
25195 if (align != 2)
25196 {
25197 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25198 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25199
25200 /* Leave just the 3 lower bits. */
25201 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25202 NULL_RTX, 0, OPTAB_WIDEN);
25203
25204 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25205 Pmode, 1, align_4_label);
25206 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25207 Pmode, 1, align_2_label);
25208 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25209 Pmode, 1, align_3_label);
25210 }
25211 else
25212 {
25213 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25214 check if is aligned to 4 - byte. */
25215
25216 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25217 NULL_RTX, 0, OPTAB_WIDEN);
25218
25219 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25220 Pmode, 1, align_4_label);
25221 }
25222
25223 mem = change_address (src, QImode, out);
25224
25225 /* Now compare the bytes. */
25226
25227 /* Compare the first n unaligned byte on a byte per byte basis. */
25228 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25229 QImode, 1, end_0_label);
25230
25231 /* Increment the address. */
25232 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25233
25234 /* Not needed with an alignment of 2 */
25235 if (align != 2)
25236 {
25237 emit_label (align_2_label);
25238
25239 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25240 end_0_label);
25241
25242 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25243
25244 emit_label (align_3_label);
25245 }
25246
25247 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25248 end_0_label);
25249
25250 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25251 }
25252
25253 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25254 align this loop. It gives only huge programs, but does not help to
25255 speed up. */
25256 emit_label (align_4_label);
25257
25258 mem = change_address (src, SImode, out);
25259 emit_move_insn (scratch, mem);
25260 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25261
25262 /* This formula yields a nonzero result iff one of the bytes is zero.
25263 This saves three branches inside loop and many cycles. */
25264
25265 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25266 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25267 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25268 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25269 gen_int_mode (0x80808080, SImode)));
25270 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25271 align_4_label);
25272
25273 if (TARGET_CMOVE)
25274 {
25275 rtx reg = gen_reg_rtx (SImode);
25276 rtx reg2 = gen_reg_rtx (Pmode);
25277 emit_move_insn (reg, tmpreg);
25278 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25279
25280 /* If zero is not in the first two bytes, move two bytes forward. */
25281 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25282 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25283 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25284 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25285 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25286 reg,
25287 tmpreg)));
25288 /* Emit lea manually to avoid clobbering of flags. */
25289 emit_insn (gen_rtx_SET (SImode, reg2,
25290 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25291
25292 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25293 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25294 emit_insn (gen_rtx_SET (VOIDmode, out,
25295 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25296 reg2,
25297 out)));
25298 }
25299 else
25300 {
25301 rtx_code_label *end_2_label = gen_label_rtx ();
25302 /* Is zero in the first two bytes? */
25303
25304 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25305 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25306 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25307 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25308 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25309 pc_rtx);
25310 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25311 JUMP_LABEL (tmp) = end_2_label;
25312
25313 /* Not in the first two. Move two bytes forward. */
25314 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25315 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25316
25317 emit_label (end_2_label);
25318
25319 }
25320
25321 /* Avoid branch in fixing the byte. */
25322 tmpreg = gen_lowpart (QImode, tmpreg);
25323 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25324 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25325 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25326 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25327
25328 emit_label (end_0_label);
25329 }
25330
25331 /* Expand strlen. */
25332
25333 bool
25334 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25335 {
25336 rtx addr, scratch1, scratch2, scratch3, scratch4;
25337
25338 /* The generic case of strlen expander is long. Avoid it's
25339 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25340
25341 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25342 && !TARGET_INLINE_ALL_STRINGOPS
25343 && !optimize_insn_for_size_p ()
25344 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25345 return false;
25346
25347 addr = force_reg (Pmode, XEXP (src, 0));
25348 scratch1 = gen_reg_rtx (Pmode);
25349
25350 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25351 && !optimize_insn_for_size_p ())
25352 {
25353 /* Well it seems that some optimizer does not combine a call like
25354 foo(strlen(bar), strlen(bar));
25355 when the move and the subtraction is done here. It does calculate
25356 the length just once when these instructions are done inside of
25357 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25358 often used and I use one fewer register for the lifetime of
25359 output_strlen_unroll() this is better. */
25360
25361 emit_move_insn (out, addr);
25362
25363 ix86_expand_strlensi_unroll_1 (out, src, align);
25364
25365 /* strlensi_unroll_1 returns the address of the zero at the end of
25366 the string, like memchr(), so compute the length by subtracting
25367 the start address. */
25368 emit_insn (ix86_gen_sub3 (out, out, addr));
25369 }
25370 else
25371 {
25372 rtx unspec;
25373
25374 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25375 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25376 return false;
25377
25378 scratch2 = gen_reg_rtx (Pmode);
25379 scratch3 = gen_reg_rtx (Pmode);
25380 scratch4 = force_reg (Pmode, constm1_rtx);
25381
25382 emit_move_insn (scratch3, addr);
25383 eoschar = force_reg (QImode, eoschar);
25384
25385 src = replace_equiv_address_nv (src, scratch3);
25386
25387 /* If .md starts supporting :P, this can be done in .md. */
25388 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25389 scratch4), UNSPEC_SCAS);
25390 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25391 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25392 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25393 }
25394 return true;
25395 }
25396
25397 /* For given symbol (function) construct code to compute address of it's PLT
25398 entry in large x86-64 PIC model. */
25399 static rtx
25400 construct_plt_address (rtx symbol)
25401 {
25402 rtx tmp, unspec;
25403
25404 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25405 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25406 gcc_assert (Pmode == DImode);
25407
25408 tmp = gen_reg_rtx (Pmode);
25409 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25410
25411 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25412 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25413 return tmp;
25414 }
25415
25416 rtx
25417 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25418 rtx callarg2,
25419 rtx pop, bool sibcall)
25420 {
25421 rtx vec[3];
25422 rtx use = NULL, call;
25423 unsigned int vec_len = 0;
25424
25425 if (pop == const0_rtx)
25426 pop = NULL;
25427 gcc_assert (!TARGET_64BIT || !pop);
25428
25429 if (TARGET_MACHO && !TARGET_64BIT)
25430 {
25431 #if TARGET_MACHO
25432 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25433 fnaddr = machopic_indirect_call_target (fnaddr);
25434 #endif
25435 }
25436 else
25437 {
25438 /* Static functions and indirect calls don't need the pic register. */
25439 if (flag_pic
25440 && (!TARGET_64BIT
25441 || (ix86_cmodel == CM_LARGE_PIC
25442 && DEFAULT_ABI != MS_ABI))
25443 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25444 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25445 {
25446 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25447 if (ix86_use_pseudo_pic_reg ())
25448 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25449 pic_offset_table_rtx);
25450 }
25451 }
25452
25453 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25454 {
25455 rtx al = gen_rtx_REG (QImode, AX_REG);
25456 emit_move_insn (al, callarg2);
25457 use_reg (&use, al);
25458 }
25459
25460 if (ix86_cmodel == CM_LARGE_PIC
25461 && !TARGET_PECOFF
25462 && MEM_P (fnaddr)
25463 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25464 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25465 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25466 else if (sibcall
25467 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25468 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25469 {
25470 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25471 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25472 }
25473
25474 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25475
25476 if (retval)
25477 {
25478 /* We should add bounds as destination register in case
25479 pointer with bounds may be returned. */
25480 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25481 {
25482 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25483 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25484 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25485 chkp_put_regs_to_expr_list (retval);
25486 }
25487
25488 call = gen_rtx_SET (VOIDmode, retval, call);
25489 }
25490 vec[vec_len++] = call;
25491
25492 if (pop)
25493 {
25494 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25495 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25496 vec[vec_len++] = pop;
25497 }
25498
25499 if (TARGET_64BIT_MS_ABI
25500 && (!callarg2 || INTVAL (callarg2) != -2))
25501 {
25502 int const cregs_size
25503 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25504 int i;
25505
25506 for (i = 0; i < cregs_size; i++)
25507 {
25508 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25509 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25510
25511 clobber_reg (&use, gen_rtx_REG (mode, regno));
25512 }
25513 }
25514
25515 if (vec_len > 1)
25516 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25517 call = emit_call_insn (call);
25518 if (use)
25519 CALL_INSN_FUNCTION_USAGE (call) = use;
25520
25521 return call;
25522 }
25523
25524 /* Output the assembly for a call instruction. */
25525
25526 const char *
25527 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25528 {
25529 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25530 bool seh_nop_p = false;
25531 const char *xasm;
25532
25533 if (SIBLING_CALL_P (insn))
25534 {
25535 if (direct_p)
25536 xasm = "%!jmp\t%P0";
25537 /* SEH epilogue detection requires the indirect branch case
25538 to include REX.W. */
25539 else if (TARGET_SEH)
25540 xasm = "%!rex.W jmp %A0";
25541 else
25542 xasm = "%!jmp\t%A0";
25543
25544 output_asm_insn (xasm, &call_op);
25545 return "";
25546 }
25547
25548 /* SEH unwinding can require an extra nop to be emitted in several
25549 circumstances. Determine if we have one of those. */
25550 if (TARGET_SEH)
25551 {
25552 rtx_insn *i;
25553
25554 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25555 {
25556 /* If we get to another real insn, we don't need the nop. */
25557 if (INSN_P (i))
25558 break;
25559
25560 /* If we get to the epilogue note, prevent a catch region from
25561 being adjacent to the standard epilogue sequence. If non-
25562 call-exceptions, we'll have done this during epilogue emission. */
25563 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25564 && !flag_non_call_exceptions
25565 && !can_throw_internal (insn))
25566 {
25567 seh_nop_p = true;
25568 break;
25569 }
25570 }
25571
25572 /* If we didn't find a real insn following the call, prevent the
25573 unwinder from looking into the next function. */
25574 if (i == NULL)
25575 seh_nop_p = true;
25576 }
25577
25578 if (direct_p)
25579 xasm = "%!call\t%P0";
25580 else
25581 xasm = "%!call\t%A0";
25582
25583 output_asm_insn (xasm, &call_op);
25584
25585 if (seh_nop_p)
25586 return "nop";
25587
25588 return "";
25589 }
25590 \f
25591 /* Clear stack slot assignments remembered from previous functions.
25592 This is called from INIT_EXPANDERS once before RTL is emitted for each
25593 function. */
25594
25595 static struct machine_function *
25596 ix86_init_machine_status (void)
25597 {
25598 struct machine_function *f;
25599
25600 f = ggc_cleared_alloc<machine_function> ();
25601 f->use_fast_prologue_epilogue_nregs = -1;
25602 f->call_abi = ix86_abi;
25603
25604 return f;
25605 }
25606
25607 /* Return a MEM corresponding to a stack slot with mode MODE.
25608 Allocate a new slot if necessary.
25609
25610 The RTL for a function can have several slots available: N is
25611 which slot to use. */
25612
25613 rtx
25614 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25615 {
25616 struct stack_local_entry *s;
25617
25618 gcc_assert (n < MAX_386_STACK_LOCALS);
25619
25620 for (s = ix86_stack_locals; s; s = s->next)
25621 if (s->mode == mode && s->n == n)
25622 return validize_mem (copy_rtx (s->rtl));
25623
25624 s = ggc_alloc<stack_local_entry> ();
25625 s->n = n;
25626 s->mode = mode;
25627 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25628
25629 s->next = ix86_stack_locals;
25630 ix86_stack_locals = s;
25631 return validize_mem (copy_rtx (s->rtl));
25632 }
25633
25634 static void
25635 ix86_instantiate_decls (void)
25636 {
25637 struct stack_local_entry *s;
25638
25639 for (s = ix86_stack_locals; s; s = s->next)
25640 if (s->rtl != NULL_RTX)
25641 instantiate_decl_rtl (s->rtl);
25642 }
25643 \f
25644 /* Check whether x86 address PARTS is a pc-relative address. */
25645
25646 static bool
25647 rip_relative_addr_p (struct ix86_address *parts)
25648 {
25649 rtx base, index, disp;
25650
25651 base = parts->base;
25652 index = parts->index;
25653 disp = parts->disp;
25654
25655 if (disp && !base && !index)
25656 {
25657 if (TARGET_64BIT)
25658 {
25659 rtx symbol = disp;
25660
25661 if (GET_CODE (disp) == CONST)
25662 symbol = XEXP (disp, 0);
25663 if (GET_CODE (symbol) == PLUS
25664 && CONST_INT_P (XEXP (symbol, 1)))
25665 symbol = XEXP (symbol, 0);
25666
25667 if (GET_CODE (symbol) == LABEL_REF
25668 || (GET_CODE (symbol) == SYMBOL_REF
25669 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25670 || (GET_CODE (symbol) == UNSPEC
25671 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25672 || XINT (symbol, 1) == UNSPEC_PCREL
25673 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25674 return true;
25675 }
25676 }
25677 return false;
25678 }
25679
25680 /* Calculate the length of the memory address in the instruction encoding.
25681 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25682 or other prefixes. We never generate addr32 prefix for LEA insn. */
25683
25684 int
25685 memory_address_length (rtx addr, bool lea)
25686 {
25687 struct ix86_address parts;
25688 rtx base, index, disp;
25689 int len;
25690 int ok;
25691
25692 if (GET_CODE (addr) == PRE_DEC
25693 || GET_CODE (addr) == POST_INC
25694 || GET_CODE (addr) == PRE_MODIFY
25695 || GET_CODE (addr) == POST_MODIFY)
25696 return 0;
25697
25698 ok = ix86_decompose_address (addr, &parts);
25699 gcc_assert (ok);
25700
25701 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25702
25703 /* If this is not LEA instruction, add the length of addr32 prefix. */
25704 if (TARGET_64BIT && !lea
25705 && (SImode_address_operand (addr, VOIDmode)
25706 || (parts.base && GET_MODE (parts.base) == SImode)
25707 || (parts.index && GET_MODE (parts.index) == SImode)))
25708 len++;
25709
25710 base = parts.base;
25711 index = parts.index;
25712 disp = parts.disp;
25713
25714 if (base && GET_CODE (base) == SUBREG)
25715 base = SUBREG_REG (base);
25716 if (index && GET_CODE (index) == SUBREG)
25717 index = SUBREG_REG (index);
25718
25719 gcc_assert (base == NULL_RTX || REG_P (base));
25720 gcc_assert (index == NULL_RTX || REG_P (index));
25721
25722 /* Rule of thumb:
25723 - esp as the base always wants an index,
25724 - ebp as the base always wants a displacement,
25725 - r12 as the base always wants an index,
25726 - r13 as the base always wants a displacement. */
25727
25728 /* Register Indirect. */
25729 if (base && !index && !disp)
25730 {
25731 /* esp (for its index) and ebp (for its displacement) need
25732 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25733 code. */
25734 if (base == arg_pointer_rtx
25735 || base == frame_pointer_rtx
25736 || REGNO (base) == SP_REG
25737 || REGNO (base) == BP_REG
25738 || REGNO (base) == R12_REG
25739 || REGNO (base) == R13_REG)
25740 len++;
25741 }
25742
25743 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25744 is not disp32, but disp32(%rip), so for disp32
25745 SIB byte is needed, unless print_operand_address
25746 optimizes it into disp32(%rip) or (%rip) is implied
25747 by UNSPEC. */
25748 else if (disp && !base && !index)
25749 {
25750 len += 4;
25751 if (rip_relative_addr_p (&parts))
25752 len++;
25753 }
25754 else
25755 {
25756 /* Find the length of the displacement constant. */
25757 if (disp)
25758 {
25759 if (base && satisfies_constraint_K (disp))
25760 len += 1;
25761 else
25762 len += 4;
25763 }
25764 /* ebp always wants a displacement. Similarly r13. */
25765 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25766 len++;
25767
25768 /* An index requires the two-byte modrm form.... */
25769 if (index
25770 /* ...like esp (or r12), which always wants an index. */
25771 || base == arg_pointer_rtx
25772 || base == frame_pointer_rtx
25773 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25774 len++;
25775 }
25776
25777 return len;
25778 }
25779
25780 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25781 is set, expect that insn have 8bit immediate alternative. */
25782 int
25783 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25784 {
25785 int len = 0;
25786 int i;
25787 extract_insn_cached (insn);
25788 for (i = recog_data.n_operands - 1; i >= 0; --i)
25789 if (CONSTANT_P (recog_data.operand[i]))
25790 {
25791 enum attr_mode mode = get_attr_mode (insn);
25792
25793 gcc_assert (!len);
25794 if (shortform && CONST_INT_P (recog_data.operand[i]))
25795 {
25796 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25797 switch (mode)
25798 {
25799 case MODE_QI:
25800 len = 1;
25801 continue;
25802 case MODE_HI:
25803 ival = trunc_int_for_mode (ival, HImode);
25804 break;
25805 case MODE_SI:
25806 ival = trunc_int_for_mode (ival, SImode);
25807 break;
25808 default:
25809 break;
25810 }
25811 if (IN_RANGE (ival, -128, 127))
25812 {
25813 len = 1;
25814 continue;
25815 }
25816 }
25817 switch (mode)
25818 {
25819 case MODE_QI:
25820 len = 1;
25821 break;
25822 case MODE_HI:
25823 len = 2;
25824 break;
25825 case MODE_SI:
25826 len = 4;
25827 break;
25828 /* Immediates for DImode instructions are encoded
25829 as 32bit sign extended values. */
25830 case MODE_DI:
25831 len = 4;
25832 break;
25833 default:
25834 fatal_insn ("unknown insn mode", insn);
25835 }
25836 }
25837 return len;
25838 }
25839
25840 /* Compute default value for "length_address" attribute. */
25841 int
25842 ix86_attr_length_address_default (rtx_insn *insn)
25843 {
25844 int i;
25845
25846 if (get_attr_type (insn) == TYPE_LEA)
25847 {
25848 rtx set = PATTERN (insn), addr;
25849
25850 if (GET_CODE (set) == PARALLEL)
25851 set = XVECEXP (set, 0, 0);
25852
25853 gcc_assert (GET_CODE (set) == SET);
25854
25855 addr = SET_SRC (set);
25856
25857 return memory_address_length (addr, true);
25858 }
25859
25860 extract_insn_cached (insn);
25861 for (i = recog_data.n_operands - 1; i >= 0; --i)
25862 if (MEM_P (recog_data.operand[i]))
25863 {
25864 constrain_operands_cached (insn, reload_completed);
25865 if (which_alternative != -1)
25866 {
25867 const char *constraints = recog_data.constraints[i];
25868 int alt = which_alternative;
25869
25870 while (*constraints == '=' || *constraints == '+')
25871 constraints++;
25872 while (alt-- > 0)
25873 while (*constraints++ != ',')
25874 ;
25875 /* Skip ignored operands. */
25876 if (*constraints == 'X')
25877 continue;
25878 }
25879 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25880 }
25881 return 0;
25882 }
25883
25884 /* Compute default value for "length_vex" attribute. It includes
25885 2 or 3 byte VEX prefix and 1 opcode byte. */
25886
25887 int
25888 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25889 bool has_vex_w)
25890 {
25891 int i;
25892
25893 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25894 byte VEX prefix. */
25895 if (!has_0f_opcode || has_vex_w)
25896 return 3 + 1;
25897
25898 /* We can always use 2 byte VEX prefix in 32bit. */
25899 if (!TARGET_64BIT)
25900 return 2 + 1;
25901
25902 extract_insn_cached (insn);
25903
25904 for (i = recog_data.n_operands - 1; i >= 0; --i)
25905 if (REG_P (recog_data.operand[i]))
25906 {
25907 /* REX.W bit uses 3 byte VEX prefix. */
25908 if (GET_MODE (recog_data.operand[i]) == DImode
25909 && GENERAL_REG_P (recog_data.operand[i]))
25910 return 3 + 1;
25911 }
25912 else
25913 {
25914 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25915 if (MEM_P (recog_data.operand[i])
25916 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25917 return 3 + 1;
25918 }
25919
25920 return 2 + 1;
25921 }
25922 \f
25923 /* Return the maximum number of instructions a cpu can issue. */
25924
25925 static int
25926 ix86_issue_rate (void)
25927 {
25928 switch (ix86_tune)
25929 {
25930 case PROCESSOR_PENTIUM:
25931 case PROCESSOR_BONNELL:
25932 case PROCESSOR_SILVERMONT:
25933 case PROCESSOR_INTEL:
25934 case PROCESSOR_K6:
25935 case PROCESSOR_BTVER2:
25936 case PROCESSOR_PENTIUM4:
25937 case PROCESSOR_NOCONA:
25938 return 2;
25939
25940 case PROCESSOR_PENTIUMPRO:
25941 case PROCESSOR_ATHLON:
25942 case PROCESSOR_K8:
25943 case PROCESSOR_AMDFAM10:
25944 case PROCESSOR_GENERIC:
25945 case PROCESSOR_BTVER1:
25946 return 3;
25947
25948 case PROCESSOR_BDVER1:
25949 case PROCESSOR_BDVER2:
25950 case PROCESSOR_BDVER3:
25951 case PROCESSOR_BDVER4:
25952 case PROCESSOR_CORE2:
25953 case PROCESSOR_NEHALEM:
25954 case PROCESSOR_SANDYBRIDGE:
25955 case PROCESSOR_HASWELL:
25956 return 4;
25957
25958 default:
25959 return 1;
25960 }
25961 }
25962
25963 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25964 by DEP_INSN and nothing set by DEP_INSN. */
25965
25966 static bool
25967 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25968 {
25969 rtx set, set2;
25970
25971 /* Simplify the test for uninteresting insns. */
25972 if (insn_type != TYPE_SETCC
25973 && insn_type != TYPE_ICMOV
25974 && insn_type != TYPE_FCMOV
25975 && insn_type != TYPE_IBR)
25976 return false;
25977
25978 if ((set = single_set (dep_insn)) != 0)
25979 {
25980 set = SET_DEST (set);
25981 set2 = NULL_RTX;
25982 }
25983 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25984 && XVECLEN (PATTERN (dep_insn), 0) == 2
25985 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25986 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25987 {
25988 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25989 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25990 }
25991 else
25992 return false;
25993
25994 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25995 return false;
25996
25997 /* This test is true if the dependent insn reads the flags but
25998 not any other potentially set register. */
25999 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26000 return false;
26001
26002 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26003 return false;
26004
26005 return true;
26006 }
26007
26008 /* Return true iff USE_INSN has a memory address with operands set by
26009 SET_INSN. */
26010
26011 bool
26012 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26013 {
26014 int i;
26015 extract_insn_cached (use_insn);
26016 for (i = recog_data.n_operands - 1; i >= 0; --i)
26017 if (MEM_P (recog_data.operand[i]))
26018 {
26019 rtx addr = XEXP (recog_data.operand[i], 0);
26020 return modified_in_p (addr, set_insn) != 0;
26021 }
26022 return false;
26023 }
26024
26025 /* Helper function for exact_store_load_dependency.
26026 Return true if addr is found in insn. */
26027 static bool
26028 exact_dependency_1 (rtx addr, rtx insn)
26029 {
26030 enum rtx_code code;
26031 const char *format_ptr;
26032 int i, j;
26033
26034 code = GET_CODE (insn);
26035 switch (code)
26036 {
26037 case MEM:
26038 if (rtx_equal_p (addr, insn))
26039 return true;
26040 break;
26041 case REG:
26042 CASE_CONST_ANY:
26043 case SYMBOL_REF:
26044 case CODE_LABEL:
26045 case PC:
26046 case CC0:
26047 case EXPR_LIST:
26048 return false;
26049 default:
26050 break;
26051 }
26052
26053 format_ptr = GET_RTX_FORMAT (code);
26054 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26055 {
26056 switch (*format_ptr++)
26057 {
26058 case 'e':
26059 if (exact_dependency_1 (addr, XEXP (insn, i)))
26060 return true;
26061 break;
26062 case 'E':
26063 for (j = 0; j < XVECLEN (insn, i); j++)
26064 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26065 return true;
26066 break;
26067 }
26068 }
26069 return false;
26070 }
26071
26072 /* Return true if there exists exact dependency for store & load, i.e.
26073 the same memory address is used in them. */
26074 static bool
26075 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26076 {
26077 rtx set1, set2;
26078
26079 set1 = single_set (store);
26080 if (!set1)
26081 return false;
26082 if (!MEM_P (SET_DEST (set1)))
26083 return false;
26084 set2 = single_set (load);
26085 if (!set2)
26086 return false;
26087 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26088 return true;
26089 return false;
26090 }
26091
26092 static int
26093 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26094 {
26095 enum attr_type insn_type, dep_insn_type;
26096 enum attr_memory memory;
26097 rtx set, set2;
26098 int dep_insn_code_number;
26099
26100 /* Anti and output dependencies have zero cost on all CPUs. */
26101 if (REG_NOTE_KIND (link) != 0)
26102 return 0;
26103
26104 dep_insn_code_number = recog_memoized (dep_insn);
26105
26106 /* If we can't recognize the insns, we can't really do anything. */
26107 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26108 return cost;
26109
26110 insn_type = get_attr_type (insn);
26111 dep_insn_type = get_attr_type (dep_insn);
26112
26113 switch (ix86_tune)
26114 {
26115 case PROCESSOR_PENTIUM:
26116 /* Address Generation Interlock adds a cycle of latency. */
26117 if (insn_type == TYPE_LEA)
26118 {
26119 rtx addr = PATTERN (insn);
26120
26121 if (GET_CODE (addr) == PARALLEL)
26122 addr = XVECEXP (addr, 0, 0);
26123
26124 gcc_assert (GET_CODE (addr) == SET);
26125
26126 addr = SET_SRC (addr);
26127 if (modified_in_p (addr, dep_insn))
26128 cost += 1;
26129 }
26130 else if (ix86_agi_dependent (dep_insn, insn))
26131 cost += 1;
26132
26133 /* ??? Compares pair with jump/setcc. */
26134 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26135 cost = 0;
26136
26137 /* Floating point stores require value to be ready one cycle earlier. */
26138 if (insn_type == TYPE_FMOV
26139 && get_attr_memory (insn) == MEMORY_STORE
26140 && !ix86_agi_dependent (dep_insn, insn))
26141 cost += 1;
26142 break;
26143
26144 case PROCESSOR_PENTIUMPRO:
26145 /* INT->FP conversion is expensive. */
26146 if (get_attr_fp_int_src (dep_insn))
26147 cost += 5;
26148
26149 /* There is one cycle extra latency between an FP op and a store. */
26150 if (insn_type == TYPE_FMOV
26151 && (set = single_set (dep_insn)) != NULL_RTX
26152 && (set2 = single_set (insn)) != NULL_RTX
26153 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26154 && MEM_P (SET_DEST (set2)))
26155 cost += 1;
26156
26157 memory = get_attr_memory (insn);
26158
26159 /* Show ability of reorder buffer to hide latency of load by executing
26160 in parallel with previous instruction in case
26161 previous instruction is not needed to compute the address. */
26162 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26163 && !ix86_agi_dependent (dep_insn, insn))
26164 {
26165 /* Claim moves to take one cycle, as core can issue one load
26166 at time and the next load can start cycle later. */
26167 if (dep_insn_type == TYPE_IMOV
26168 || dep_insn_type == TYPE_FMOV)
26169 cost = 1;
26170 else if (cost > 1)
26171 cost--;
26172 }
26173 break;
26174
26175 case PROCESSOR_K6:
26176 /* The esp dependency is resolved before
26177 the instruction is really finished. */
26178 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26179 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26180 return 1;
26181
26182 /* INT->FP conversion is expensive. */
26183 if (get_attr_fp_int_src (dep_insn))
26184 cost += 5;
26185
26186 memory = get_attr_memory (insn);
26187
26188 /* Show ability of reorder buffer to hide latency of load by executing
26189 in parallel with previous instruction in case
26190 previous instruction is not needed to compute the address. */
26191 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26192 && !ix86_agi_dependent (dep_insn, insn))
26193 {
26194 /* Claim moves to take one cycle, as core can issue one load
26195 at time and the next load can start cycle later. */
26196 if (dep_insn_type == TYPE_IMOV
26197 || dep_insn_type == TYPE_FMOV)
26198 cost = 1;
26199 else if (cost > 2)
26200 cost -= 2;
26201 else
26202 cost = 1;
26203 }
26204 break;
26205
26206 case PROCESSOR_AMDFAM10:
26207 case PROCESSOR_BDVER1:
26208 case PROCESSOR_BDVER2:
26209 case PROCESSOR_BDVER3:
26210 case PROCESSOR_BDVER4:
26211 case PROCESSOR_BTVER1:
26212 case PROCESSOR_BTVER2:
26213 case PROCESSOR_GENERIC:
26214 /* Stack engine allows to execute push&pop instructions in parall. */
26215 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26216 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26217 return 0;
26218 /* FALLTHRU */
26219
26220 case PROCESSOR_ATHLON:
26221 case PROCESSOR_K8:
26222 memory = get_attr_memory (insn);
26223
26224 /* Show ability of reorder buffer to hide latency of load by executing
26225 in parallel with previous instruction in case
26226 previous instruction is not needed to compute the address. */
26227 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26228 && !ix86_agi_dependent (dep_insn, insn))
26229 {
26230 enum attr_unit unit = get_attr_unit (insn);
26231 int loadcost = 3;
26232
26233 /* Because of the difference between the length of integer and
26234 floating unit pipeline preparation stages, the memory operands
26235 for floating point are cheaper.
26236
26237 ??? For Athlon it the difference is most probably 2. */
26238 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26239 loadcost = 3;
26240 else
26241 loadcost = TARGET_ATHLON ? 2 : 0;
26242
26243 if (cost >= loadcost)
26244 cost -= loadcost;
26245 else
26246 cost = 0;
26247 }
26248 break;
26249
26250 case PROCESSOR_CORE2:
26251 case PROCESSOR_NEHALEM:
26252 case PROCESSOR_SANDYBRIDGE:
26253 case PROCESSOR_HASWELL:
26254 /* Stack engine allows to execute push&pop instructions in parall. */
26255 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26256 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26257 return 0;
26258
26259 memory = get_attr_memory (insn);
26260
26261 /* Show ability of reorder buffer to hide latency of load by executing
26262 in parallel with previous instruction in case
26263 previous instruction is not needed to compute the address. */
26264 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26265 && !ix86_agi_dependent (dep_insn, insn))
26266 {
26267 if (cost >= 4)
26268 cost -= 4;
26269 else
26270 cost = 0;
26271 }
26272 break;
26273
26274 case PROCESSOR_SILVERMONT:
26275 case PROCESSOR_INTEL:
26276 if (!reload_completed)
26277 return cost;
26278
26279 /* Increase cost of integer loads. */
26280 memory = get_attr_memory (dep_insn);
26281 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26282 {
26283 enum attr_unit unit = get_attr_unit (dep_insn);
26284 if (unit == UNIT_INTEGER && cost == 1)
26285 {
26286 if (memory == MEMORY_LOAD)
26287 cost = 3;
26288 else
26289 {
26290 /* Increase cost of ld/st for short int types only
26291 because of store forwarding issue. */
26292 rtx set = single_set (dep_insn);
26293 if (set && (GET_MODE (SET_DEST (set)) == QImode
26294 || GET_MODE (SET_DEST (set)) == HImode))
26295 {
26296 /* Increase cost of store/load insn if exact
26297 dependence exists and it is load insn. */
26298 enum attr_memory insn_memory = get_attr_memory (insn);
26299 if (insn_memory == MEMORY_LOAD
26300 && exact_store_load_dependency (dep_insn, insn))
26301 cost = 3;
26302 }
26303 }
26304 }
26305 }
26306
26307 default:
26308 break;
26309 }
26310
26311 return cost;
26312 }
26313
26314 /* How many alternative schedules to try. This should be as wide as the
26315 scheduling freedom in the DFA, but no wider. Making this value too
26316 large results extra work for the scheduler. */
26317
26318 static int
26319 ia32_multipass_dfa_lookahead (void)
26320 {
26321 switch (ix86_tune)
26322 {
26323 case PROCESSOR_PENTIUM:
26324 return 2;
26325
26326 case PROCESSOR_PENTIUMPRO:
26327 case PROCESSOR_K6:
26328 return 1;
26329
26330 case PROCESSOR_BDVER1:
26331 case PROCESSOR_BDVER2:
26332 case PROCESSOR_BDVER3:
26333 case PROCESSOR_BDVER4:
26334 /* We use lookahead value 4 for BD both before and after reload
26335 schedules. Plan is to have value 8 included for O3. */
26336 return 4;
26337
26338 case PROCESSOR_CORE2:
26339 case PROCESSOR_NEHALEM:
26340 case PROCESSOR_SANDYBRIDGE:
26341 case PROCESSOR_HASWELL:
26342 case PROCESSOR_BONNELL:
26343 case PROCESSOR_SILVERMONT:
26344 case PROCESSOR_INTEL:
26345 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26346 as many instructions can be executed on a cycle, i.e.,
26347 issue_rate. I wonder why tuning for many CPUs does not do this. */
26348 if (reload_completed)
26349 return ix86_issue_rate ();
26350 /* Don't use lookahead for pre-reload schedule to save compile time. */
26351 return 0;
26352
26353 default:
26354 return 0;
26355 }
26356 }
26357
26358 /* Return true if target platform supports macro-fusion. */
26359
26360 static bool
26361 ix86_macro_fusion_p ()
26362 {
26363 return TARGET_FUSE_CMP_AND_BRANCH;
26364 }
26365
26366 /* Check whether current microarchitecture support macro fusion
26367 for insn pair "CONDGEN + CONDJMP". Refer to
26368 "Intel Architectures Optimization Reference Manual". */
26369
26370 static bool
26371 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26372 {
26373 rtx src, dest;
26374 enum rtx_code ccode;
26375 rtx compare_set = NULL_RTX, test_if, cond;
26376 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26377
26378 if (!any_condjump_p (condjmp))
26379 return false;
26380
26381 if (get_attr_type (condgen) != TYPE_TEST
26382 && get_attr_type (condgen) != TYPE_ICMP
26383 && get_attr_type (condgen) != TYPE_INCDEC
26384 && get_attr_type (condgen) != TYPE_ALU)
26385 return false;
26386
26387 compare_set = single_set (condgen);
26388 if (compare_set == NULL_RTX
26389 && !TARGET_FUSE_ALU_AND_BRANCH)
26390 return false;
26391
26392 if (compare_set == NULL_RTX)
26393 {
26394 int i;
26395 rtx pat = PATTERN (condgen);
26396 for (i = 0; i < XVECLEN (pat, 0); i++)
26397 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26398 {
26399 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26400 if (GET_CODE (set_src) == COMPARE)
26401 compare_set = XVECEXP (pat, 0, i);
26402 else
26403 alu_set = XVECEXP (pat, 0, i);
26404 }
26405 }
26406 if (compare_set == NULL_RTX)
26407 return false;
26408 src = SET_SRC (compare_set);
26409 if (GET_CODE (src) != COMPARE)
26410 return false;
26411
26412 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26413 supported. */
26414 if ((MEM_P (XEXP (src, 0))
26415 && CONST_INT_P (XEXP (src, 1)))
26416 || (MEM_P (XEXP (src, 1))
26417 && CONST_INT_P (XEXP (src, 0))))
26418 return false;
26419
26420 /* No fusion for RIP-relative address. */
26421 if (MEM_P (XEXP (src, 0)))
26422 addr = XEXP (XEXP (src, 0), 0);
26423 else if (MEM_P (XEXP (src, 1)))
26424 addr = XEXP (XEXP (src, 1), 0);
26425
26426 if (addr) {
26427 ix86_address parts;
26428 int ok = ix86_decompose_address (addr, &parts);
26429 gcc_assert (ok);
26430
26431 if (rip_relative_addr_p (&parts))
26432 return false;
26433 }
26434
26435 test_if = SET_SRC (pc_set (condjmp));
26436 cond = XEXP (test_if, 0);
26437 ccode = GET_CODE (cond);
26438 /* Check whether conditional jump use Sign or Overflow Flags. */
26439 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26440 && (ccode == GE
26441 || ccode == GT
26442 || ccode == LE
26443 || ccode == LT))
26444 return false;
26445
26446 /* Return true for TYPE_TEST and TYPE_ICMP. */
26447 if (get_attr_type (condgen) == TYPE_TEST
26448 || get_attr_type (condgen) == TYPE_ICMP)
26449 return true;
26450
26451 /* The following is the case that macro-fusion for alu + jmp. */
26452 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26453 return false;
26454
26455 /* No fusion for alu op with memory destination operand. */
26456 dest = SET_DEST (alu_set);
26457 if (MEM_P (dest))
26458 return false;
26459
26460 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26461 supported. */
26462 if (get_attr_type (condgen) == TYPE_INCDEC
26463 && (ccode == GEU
26464 || ccode == GTU
26465 || ccode == LEU
26466 || ccode == LTU))
26467 return false;
26468
26469 return true;
26470 }
26471
26472 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26473 execution. It is applied if
26474 (1) IMUL instruction is on the top of list;
26475 (2) There exists the only producer of independent IMUL instruction in
26476 ready list.
26477 Return index of IMUL producer if it was found and -1 otherwise. */
26478 static int
26479 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26480 {
26481 rtx_insn *insn;
26482 rtx set, insn1, insn2;
26483 sd_iterator_def sd_it;
26484 dep_t dep;
26485 int index = -1;
26486 int i;
26487
26488 if (!TARGET_BONNELL)
26489 return index;
26490
26491 /* Check that IMUL instruction is on the top of ready list. */
26492 insn = ready[n_ready - 1];
26493 set = single_set (insn);
26494 if (!set)
26495 return index;
26496 if (!(GET_CODE (SET_SRC (set)) == MULT
26497 && GET_MODE (SET_SRC (set)) == SImode))
26498 return index;
26499
26500 /* Search for producer of independent IMUL instruction. */
26501 for (i = n_ready - 2; i >= 0; i--)
26502 {
26503 insn = ready[i];
26504 if (!NONDEBUG_INSN_P (insn))
26505 continue;
26506 /* Skip IMUL instruction. */
26507 insn2 = PATTERN (insn);
26508 if (GET_CODE (insn2) == PARALLEL)
26509 insn2 = XVECEXP (insn2, 0, 0);
26510 if (GET_CODE (insn2) == SET
26511 && GET_CODE (SET_SRC (insn2)) == MULT
26512 && GET_MODE (SET_SRC (insn2)) == SImode)
26513 continue;
26514
26515 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26516 {
26517 rtx con;
26518 con = DEP_CON (dep);
26519 if (!NONDEBUG_INSN_P (con))
26520 continue;
26521 insn1 = PATTERN (con);
26522 if (GET_CODE (insn1) == PARALLEL)
26523 insn1 = XVECEXP (insn1, 0, 0);
26524
26525 if (GET_CODE (insn1) == SET
26526 && GET_CODE (SET_SRC (insn1)) == MULT
26527 && GET_MODE (SET_SRC (insn1)) == SImode)
26528 {
26529 sd_iterator_def sd_it1;
26530 dep_t dep1;
26531 /* Check if there is no other dependee for IMUL. */
26532 index = i;
26533 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26534 {
26535 rtx pro;
26536 pro = DEP_PRO (dep1);
26537 if (!NONDEBUG_INSN_P (pro))
26538 continue;
26539 if (pro != insn)
26540 index = -1;
26541 }
26542 if (index >= 0)
26543 break;
26544 }
26545 }
26546 if (index >= 0)
26547 break;
26548 }
26549 return index;
26550 }
26551
26552 /* Try to find the best candidate on the top of ready list if two insns
26553 have the same priority - candidate is best if its dependees were
26554 scheduled earlier. Applied for Silvermont only.
26555 Return true if top 2 insns must be interchanged. */
26556 static bool
26557 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26558 {
26559 rtx_insn *top = ready[n_ready - 1];
26560 rtx_insn *next = ready[n_ready - 2];
26561 rtx set;
26562 sd_iterator_def sd_it;
26563 dep_t dep;
26564 int clock1 = -1;
26565 int clock2 = -1;
26566 #define INSN_TICK(INSN) (HID (INSN)->tick)
26567
26568 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26569 return false;
26570
26571 if (!NONDEBUG_INSN_P (top))
26572 return false;
26573 if (!NONJUMP_INSN_P (top))
26574 return false;
26575 if (!NONDEBUG_INSN_P (next))
26576 return false;
26577 if (!NONJUMP_INSN_P (next))
26578 return false;
26579 set = single_set (top);
26580 if (!set)
26581 return false;
26582 set = single_set (next);
26583 if (!set)
26584 return false;
26585
26586 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26587 {
26588 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26589 return false;
26590 /* Determine winner more precise. */
26591 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26592 {
26593 rtx pro;
26594 pro = DEP_PRO (dep);
26595 if (!NONDEBUG_INSN_P (pro))
26596 continue;
26597 if (INSN_TICK (pro) > clock1)
26598 clock1 = INSN_TICK (pro);
26599 }
26600 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26601 {
26602 rtx pro;
26603 pro = DEP_PRO (dep);
26604 if (!NONDEBUG_INSN_P (pro))
26605 continue;
26606 if (INSN_TICK (pro) > clock2)
26607 clock2 = INSN_TICK (pro);
26608 }
26609
26610 if (clock1 == clock2)
26611 {
26612 /* Determine winner - load must win. */
26613 enum attr_memory memory1, memory2;
26614 memory1 = get_attr_memory (top);
26615 memory2 = get_attr_memory (next);
26616 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26617 return true;
26618 }
26619 return (bool) (clock2 < clock1);
26620 }
26621 return false;
26622 #undef INSN_TICK
26623 }
26624
26625 /* Perform possible reodering of ready list for Atom/Silvermont only.
26626 Return issue rate. */
26627 static int
26628 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26629 int *pn_ready, int clock_var)
26630 {
26631 int issue_rate = -1;
26632 int n_ready = *pn_ready;
26633 int i;
26634 rtx_insn *insn;
26635 int index = -1;
26636
26637 /* Set up issue rate. */
26638 issue_rate = ix86_issue_rate ();
26639
26640 /* Do reodering for BONNELL/SILVERMONT only. */
26641 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26642 return issue_rate;
26643
26644 /* Nothing to do if ready list contains only 1 instruction. */
26645 if (n_ready <= 1)
26646 return issue_rate;
26647
26648 /* Do reodering for post-reload scheduler only. */
26649 if (!reload_completed)
26650 return issue_rate;
26651
26652 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26653 {
26654 if (sched_verbose > 1)
26655 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26656 INSN_UID (ready[index]));
26657
26658 /* Put IMUL producer (ready[index]) at the top of ready list. */
26659 insn = ready[index];
26660 for (i = index; i < n_ready - 1; i++)
26661 ready[i] = ready[i + 1];
26662 ready[n_ready - 1] = insn;
26663 return issue_rate;
26664 }
26665 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26666 {
26667 if (sched_verbose > 1)
26668 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26669 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26670 /* Swap 2 top elements of ready list. */
26671 insn = ready[n_ready - 1];
26672 ready[n_ready - 1] = ready[n_ready - 2];
26673 ready[n_ready - 2] = insn;
26674 }
26675 return issue_rate;
26676 }
26677
26678 static bool
26679 ix86_class_likely_spilled_p (reg_class_t);
26680
26681 /* Returns true if lhs of insn is HW function argument register and set up
26682 is_spilled to true if it is likely spilled HW register. */
26683 static bool
26684 insn_is_function_arg (rtx insn, bool* is_spilled)
26685 {
26686 rtx dst;
26687
26688 if (!NONDEBUG_INSN_P (insn))
26689 return false;
26690 /* Call instructions are not movable, ignore it. */
26691 if (CALL_P (insn))
26692 return false;
26693 insn = PATTERN (insn);
26694 if (GET_CODE (insn) == PARALLEL)
26695 insn = XVECEXP (insn, 0, 0);
26696 if (GET_CODE (insn) != SET)
26697 return false;
26698 dst = SET_DEST (insn);
26699 if (REG_P (dst) && HARD_REGISTER_P (dst)
26700 && ix86_function_arg_regno_p (REGNO (dst)))
26701 {
26702 /* Is it likely spilled HW register? */
26703 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26704 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26705 *is_spilled = true;
26706 return true;
26707 }
26708 return false;
26709 }
26710
26711 /* Add output dependencies for chain of function adjacent arguments if only
26712 there is a move to likely spilled HW register. Return first argument
26713 if at least one dependence was added or NULL otherwise. */
26714 static rtx_insn *
26715 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26716 {
26717 rtx_insn *insn;
26718 rtx_insn *last = call;
26719 rtx_insn *first_arg = NULL;
26720 bool is_spilled = false;
26721
26722 head = PREV_INSN (head);
26723
26724 /* Find nearest to call argument passing instruction. */
26725 while (true)
26726 {
26727 last = PREV_INSN (last);
26728 if (last == head)
26729 return NULL;
26730 if (!NONDEBUG_INSN_P (last))
26731 continue;
26732 if (insn_is_function_arg (last, &is_spilled))
26733 break;
26734 return NULL;
26735 }
26736
26737 first_arg = last;
26738 while (true)
26739 {
26740 insn = PREV_INSN (last);
26741 if (!INSN_P (insn))
26742 break;
26743 if (insn == head)
26744 break;
26745 if (!NONDEBUG_INSN_P (insn))
26746 {
26747 last = insn;
26748 continue;
26749 }
26750 if (insn_is_function_arg (insn, &is_spilled))
26751 {
26752 /* Add output depdendence between two function arguments if chain
26753 of output arguments contains likely spilled HW registers. */
26754 if (is_spilled)
26755 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26756 first_arg = last = insn;
26757 }
26758 else
26759 break;
26760 }
26761 if (!is_spilled)
26762 return NULL;
26763 return first_arg;
26764 }
26765
26766 /* Add output or anti dependency from insn to first_arg to restrict its code
26767 motion. */
26768 static void
26769 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26770 {
26771 rtx set;
26772 rtx tmp;
26773
26774 set = single_set (insn);
26775 if (!set)
26776 return;
26777 tmp = SET_DEST (set);
26778 if (REG_P (tmp))
26779 {
26780 /* Add output dependency to the first function argument. */
26781 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26782 return;
26783 }
26784 /* Add anti dependency. */
26785 add_dependence (first_arg, insn, REG_DEP_ANTI);
26786 }
26787
26788 /* Avoid cross block motion of function argument through adding dependency
26789 from the first non-jump instruction in bb. */
26790 static void
26791 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26792 {
26793 rtx_insn *insn = BB_END (bb);
26794
26795 while (insn)
26796 {
26797 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26798 {
26799 rtx set = single_set (insn);
26800 if (set)
26801 {
26802 avoid_func_arg_motion (arg, insn);
26803 return;
26804 }
26805 }
26806 if (insn == BB_HEAD (bb))
26807 return;
26808 insn = PREV_INSN (insn);
26809 }
26810 }
26811
26812 /* Hook for pre-reload schedule - avoid motion of function arguments
26813 passed in likely spilled HW registers. */
26814 static void
26815 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26816 {
26817 rtx_insn *insn;
26818 rtx_insn *first_arg = NULL;
26819 if (reload_completed)
26820 return;
26821 while (head != tail && DEBUG_INSN_P (head))
26822 head = NEXT_INSN (head);
26823 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26824 if (INSN_P (insn) && CALL_P (insn))
26825 {
26826 first_arg = add_parameter_dependencies (insn, head);
26827 if (first_arg)
26828 {
26829 /* Add dependee for first argument to predecessors if only
26830 region contains more than one block. */
26831 basic_block bb = BLOCK_FOR_INSN (insn);
26832 int rgn = CONTAINING_RGN (bb->index);
26833 int nr_blks = RGN_NR_BLOCKS (rgn);
26834 /* Skip trivial regions and region head blocks that can have
26835 predecessors outside of region. */
26836 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26837 {
26838 edge e;
26839 edge_iterator ei;
26840
26841 /* Regions are SCCs with the exception of selective
26842 scheduling with pipelining of outer blocks enabled.
26843 So also check that immediate predecessors of a non-head
26844 block are in the same region. */
26845 FOR_EACH_EDGE (e, ei, bb->preds)
26846 {
26847 /* Avoid creating of loop-carried dependencies through
26848 using topological ordering in the region. */
26849 if (rgn == CONTAINING_RGN (e->src->index)
26850 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26851 add_dependee_for_func_arg (first_arg, e->src);
26852 }
26853 }
26854 insn = first_arg;
26855 if (insn == head)
26856 break;
26857 }
26858 }
26859 else if (first_arg)
26860 avoid_func_arg_motion (first_arg, insn);
26861 }
26862
26863 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26864 HW registers to maximum, to schedule them at soon as possible. These are
26865 moves from function argument registers at the top of the function entry
26866 and moves from function return value registers after call. */
26867 static int
26868 ix86_adjust_priority (rtx_insn *insn, int priority)
26869 {
26870 rtx set;
26871
26872 if (reload_completed)
26873 return priority;
26874
26875 if (!NONDEBUG_INSN_P (insn))
26876 return priority;
26877
26878 set = single_set (insn);
26879 if (set)
26880 {
26881 rtx tmp = SET_SRC (set);
26882 if (REG_P (tmp)
26883 && HARD_REGISTER_P (tmp)
26884 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26885 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26886 return current_sched_info->sched_max_insns_priority;
26887 }
26888
26889 return priority;
26890 }
26891
26892 /* Model decoder of Core 2/i7.
26893 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26894 track the instruction fetch block boundaries and make sure that long
26895 (9+ bytes) instructions are assigned to D0. */
26896
26897 /* Maximum length of an insn that can be handled by
26898 a secondary decoder unit. '8' for Core 2/i7. */
26899 static int core2i7_secondary_decoder_max_insn_size;
26900
26901 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26902 '16' for Core 2/i7. */
26903 static int core2i7_ifetch_block_size;
26904
26905 /* Maximum number of instructions decoder can handle per cycle.
26906 '6' for Core 2/i7. */
26907 static int core2i7_ifetch_block_max_insns;
26908
26909 typedef struct ix86_first_cycle_multipass_data_ *
26910 ix86_first_cycle_multipass_data_t;
26911 typedef const struct ix86_first_cycle_multipass_data_ *
26912 const_ix86_first_cycle_multipass_data_t;
26913
26914 /* A variable to store target state across calls to max_issue within
26915 one cycle. */
26916 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26917 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26918
26919 /* Initialize DATA. */
26920 static void
26921 core2i7_first_cycle_multipass_init (void *_data)
26922 {
26923 ix86_first_cycle_multipass_data_t data
26924 = (ix86_first_cycle_multipass_data_t) _data;
26925
26926 data->ifetch_block_len = 0;
26927 data->ifetch_block_n_insns = 0;
26928 data->ready_try_change = NULL;
26929 data->ready_try_change_size = 0;
26930 }
26931
26932 /* Advancing the cycle; reset ifetch block counts. */
26933 static void
26934 core2i7_dfa_post_advance_cycle (void)
26935 {
26936 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26937
26938 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26939
26940 data->ifetch_block_len = 0;
26941 data->ifetch_block_n_insns = 0;
26942 }
26943
26944 static int min_insn_size (rtx_insn *);
26945
26946 /* Filter out insns from ready_try that the core will not be able to issue
26947 on current cycle due to decoder. */
26948 static void
26949 core2i7_first_cycle_multipass_filter_ready_try
26950 (const_ix86_first_cycle_multipass_data_t data,
26951 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26952 {
26953 while (n_ready--)
26954 {
26955 rtx_insn *insn;
26956 int insn_size;
26957
26958 if (ready_try[n_ready])
26959 continue;
26960
26961 insn = get_ready_element (n_ready);
26962 insn_size = min_insn_size (insn);
26963
26964 if (/* If this is a too long an insn for a secondary decoder ... */
26965 (!first_cycle_insn_p
26966 && insn_size > core2i7_secondary_decoder_max_insn_size)
26967 /* ... or it would not fit into the ifetch block ... */
26968 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26969 /* ... or the decoder is full already ... */
26970 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26971 /* ... mask the insn out. */
26972 {
26973 ready_try[n_ready] = 1;
26974
26975 if (data->ready_try_change)
26976 bitmap_set_bit (data->ready_try_change, n_ready);
26977 }
26978 }
26979 }
26980
26981 /* Prepare for a new round of multipass lookahead scheduling. */
26982 static void
26983 core2i7_first_cycle_multipass_begin (void *_data,
26984 signed char *ready_try, int n_ready,
26985 bool first_cycle_insn_p)
26986 {
26987 ix86_first_cycle_multipass_data_t data
26988 = (ix86_first_cycle_multipass_data_t) _data;
26989 const_ix86_first_cycle_multipass_data_t prev_data
26990 = ix86_first_cycle_multipass_data;
26991
26992 /* Restore the state from the end of the previous round. */
26993 data->ifetch_block_len = prev_data->ifetch_block_len;
26994 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26995
26996 /* Filter instructions that cannot be issued on current cycle due to
26997 decoder restrictions. */
26998 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26999 first_cycle_insn_p);
27000 }
27001
27002 /* INSN is being issued in current solution. Account for its impact on
27003 the decoder model. */
27004 static void
27005 core2i7_first_cycle_multipass_issue (void *_data,
27006 signed char *ready_try, int n_ready,
27007 rtx_insn *insn, const void *_prev_data)
27008 {
27009 ix86_first_cycle_multipass_data_t data
27010 = (ix86_first_cycle_multipass_data_t) _data;
27011 const_ix86_first_cycle_multipass_data_t prev_data
27012 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27013
27014 int insn_size = min_insn_size (insn);
27015
27016 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27017 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27018 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27019 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27020
27021 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27022 if (!data->ready_try_change)
27023 {
27024 data->ready_try_change = sbitmap_alloc (n_ready);
27025 data->ready_try_change_size = n_ready;
27026 }
27027 else if (data->ready_try_change_size < n_ready)
27028 {
27029 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27030 n_ready, 0);
27031 data->ready_try_change_size = n_ready;
27032 }
27033 bitmap_clear (data->ready_try_change);
27034
27035 /* Filter out insns from ready_try that the core will not be able to issue
27036 on current cycle due to decoder. */
27037 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27038 false);
27039 }
27040
27041 /* Revert the effect on ready_try. */
27042 static void
27043 core2i7_first_cycle_multipass_backtrack (const void *_data,
27044 signed char *ready_try,
27045 int n_ready ATTRIBUTE_UNUSED)
27046 {
27047 const_ix86_first_cycle_multipass_data_t data
27048 = (const_ix86_first_cycle_multipass_data_t) _data;
27049 unsigned int i = 0;
27050 sbitmap_iterator sbi;
27051
27052 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27053 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27054 {
27055 ready_try[i] = 0;
27056 }
27057 }
27058
27059 /* Save the result of multipass lookahead scheduling for the next round. */
27060 static void
27061 core2i7_first_cycle_multipass_end (const void *_data)
27062 {
27063 const_ix86_first_cycle_multipass_data_t data
27064 = (const_ix86_first_cycle_multipass_data_t) _data;
27065 ix86_first_cycle_multipass_data_t next_data
27066 = ix86_first_cycle_multipass_data;
27067
27068 if (data != NULL)
27069 {
27070 next_data->ifetch_block_len = data->ifetch_block_len;
27071 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27072 }
27073 }
27074
27075 /* Deallocate target data. */
27076 static void
27077 core2i7_first_cycle_multipass_fini (void *_data)
27078 {
27079 ix86_first_cycle_multipass_data_t data
27080 = (ix86_first_cycle_multipass_data_t) _data;
27081
27082 if (data->ready_try_change)
27083 {
27084 sbitmap_free (data->ready_try_change);
27085 data->ready_try_change = NULL;
27086 data->ready_try_change_size = 0;
27087 }
27088 }
27089
27090 /* Prepare for scheduling pass. */
27091 static void
27092 ix86_sched_init_global (FILE *, int, int)
27093 {
27094 /* Install scheduling hooks for current CPU. Some of these hooks are used
27095 in time-critical parts of the scheduler, so we only set them up when
27096 they are actually used. */
27097 switch (ix86_tune)
27098 {
27099 case PROCESSOR_CORE2:
27100 case PROCESSOR_NEHALEM:
27101 case PROCESSOR_SANDYBRIDGE:
27102 case PROCESSOR_HASWELL:
27103 /* Do not perform multipass scheduling for pre-reload schedule
27104 to save compile time. */
27105 if (reload_completed)
27106 {
27107 targetm.sched.dfa_post_advance_cycle
27108 = core2i7_dfa_post_advance_cycle;
27109 targetm.sched.first_cycle_multipass_init
27110 = core2i7_first_cycle_multipass_init;
27111 targetm.sched.first_cycle_multipass_begin
27112 = core2i7_first_cycle_multipass_begin;
27113 targetm.sched.first_cycle_multipass_issue
27114 = core2i7_first_cycle_multipass_issue;
27115 targetm.sched.first_cycle_multipass_backtrack
27116 = core2i7_first_cycle_multipass_backtrack;
27117 targetm.sched.first_cycle_multipass_end
27118 = core2i7_first_cycle_multipass_end;
27119 targetm.sched.first_cycle_multipass_fini
27120 = core2i7_first_cycle_multipass_fini;
27121
27122 /* Set decoder parameters. */
27123 core2i7_secondary_decoder_max_insn_size = 8;
27124 core2i7_ifetch_block_size = 16;
27125 core2i7_ifetch_block_max_insns = 6;
27126 break;
27127 }
27128 /* ... Fall through ... */
27129 default:
27130 targetm.sched.dfa_post_advance_cycle = NULL;
27131 targetm.sched.first_cycle_multipass_init = NULL;
27132 targetm.sched.first_cycle_multipass_begin = NULL;
27133 targetm.sched.first_cycle_multipass_issue = NULL;
27134 targetm.sched.first_cycle_multipass_backtrack = NULL;
27135 targetm.sched.first_cycle_multipass_end = NULL;
27136 targetm.sched.first_cycle_multipass_fini = NULL;
27137 break;
27138 }
27139 }
27140
27141 \f
27142 /* Compute the alignment given to a constant that is being placed in memory.
27143 EXP is the constant and ALIGN is the alignment that the object would
27144 ordinarily have.
27145 The value of this function is used instead of that alignment to align
27146 the object. */
27147
27148 int
27149 ix86_constant_alignment (tree exp, int align)
27150 {
27151 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27152 || TREE_CODE (exp) == INTEGER_CST)
27153 {
27154 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27155 return 64;
27156 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27157 return 128;
27158 }
27159 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27160 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27161 return BITS_PER_WORD;
27162
27163 return align;
27164 }
27165
27166 /* Compute the alignment for a static variable.
27167 TYPE is the data type, and ALIGN is the alignment that
27168 the object would ordinarily have. The value of this function is used
27169 instead of that alignment to align the object. */
27170
27171 int
27172 ix86_data_alignment (tree type, int align, bool opt)
27173 {
27174 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27175 for symbols from other compilation units or symbols that don't need
27176 to bind locally. In order to preserve some ABI compatibility with
27177 those compilers, ensure we don't decrease alignment from what we
27178 used to assume. */
27179
27180 int max_align_compat
27181 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
27182
27183 /* A data structure, equal or greater than the size of a cache line
27184 (64 bytes in the Pentium 4 and other recent Intel processors, including
27185 processors based on Intel Core microarchitecture) should be aligned
27186 so that its base address is a multiple of a cache line size. */
27187
27188 int max_align
27189 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27190
27191 if (max_align < BITS_PER_WORD)
27192 max_align = BITS_PER_WORD;
27193
27194 if (opt
27195 && AGGREGATE_TYPE_P (type)
27196 && TYPE_SIZE (type)
27197 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27198 {
27199 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27200 && align < max_align_compat)
27201 align = max_align_compat;
27202 if (wi::geu_p (TYPE_SIZE (type), max_align)
27203 && align < max_align)
27204 align = max_align;
27205 }
27206
27207 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27208 to 16byte boundary. */
27209 if (TARGET_64BIT)
27210 {
27211 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27212 && TYPE_SIZE (type)
27213 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27214 && wi::geu_p (TYPE_SIZE (type), 128)
27215 && align < 128)
27216 return 128;
27217 }
27218
27219 if (!opt)
27220 return align;
27221
27222 if (TREE_CODE (type) == ARRAY_TYPE)
27223 {
27224 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27225 return 64;
27226 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27227 return 128;
27228 }
27229 else if (TREE_CODE (type) == COMPLEX_TYPE)
27230 {
27231
27232 if (TYPE_MODE (type) == DCmode && align < 64)
27233 return 64;
27234 if ((TYPE_MODE (type) == XCmode
27235 || TYPE_MODE (type) == TCmode) && align < 128)
27236 return 128;
27237 }
27238 else if ((TREE_CODE (type) == RECORD_TYPE
27239 || TREE_CODE (type) == UNION_TYPE
27240 || TREE_CODE (type) == QUAL_UNION_TYPE)
27241 && TYPE_FIELDS (type))
27242 {
27243 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27244 return 64;
27245 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27246 return 128;
27247 }
27248 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27249 || TREE_CODE (type) == INTEGER_TYPE)
27250 {
27251 if (TYPE_MODE (type) == DFmode && align < 64)
27252 return 64;
27253 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27254 return 128;
27255 }
27256
27257 return align;
27258 }
27259
27260 /* Compute the alignment for a local variable or a stack slot. EXP is
27261 the data type or decl itself, MODE is the widest mode available and
27262 ALIGN is the alignment that the object would ordinarily have. The
27263 value of this macro is used instead of that alignment to align the
27264 object. */
27265
27266 unsigned int
27267 ix86_local_alignment (tree exp, machine_mode mode,
27268 unsigned int align)
27269 {
27270 tree type, decl;
27271
27272 if (exp && DECL_P (exp))
27273 {
27274 type = TREE_TYPE (exp);
27275 decl = exp;
27276 }
27277 else
27278 {
27279 type = exp;
27280 decl = NULL;
27281 }
27282
27283 /* Don't do dynamic stack realignment for long long objects with
27284 -mpreferred-stack-boundary=2. */
27285 if (!TARGET_64BIT
27286 && align == 64
27287 && ix86_preferred_stack_boundary < 64
27288 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27289 && (!type || !TYPE_USER_ALIGN (type))
27290 && (!decl || !DECL_USER_ALIGN (decl)))
27291 align = 32;
27292
27293 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27294 register in MODE. We will return the largest alignment of XF
27295 and DF. */
27296 if (!type)
27297 {
27298 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27299 align = GET_MODE_ALIGNMENT (DFmode);
27300 return align;
27301 }
27302
27303 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27304 to 16byte boundary. Exact wording is:
27305
27306 An array uses the same alignment as its elements, except that a local or
27307 global array variable of length at least 16 bytes or
27308 a C99 variable-length array variable always has alignment of at least 16 bytes.
27309
27310 This was added to allow use of aligned SSE instructions at arrays. This
27311 rule is meant for static storage (where compiler can not do the analysis
27312 by itself). We follow it for automatic variables only when convenient.
27313 We fully control everything in the function compiled and functions from
27314 other unit can not rely on the alignment.
27315
27316 Exclude va_list type. It is the common case of local array where
27317 we can not benefit from the alignment.
27318
27319 TODO: Probably one should optimize for size only when var is not escaping. */
27320 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27321 && TARGET_SSE)
27322 {
27323 if (AGGREGATE_TYPE_P (type)
27324 && (va_list_type_node == NULL_TREE
27325 || (TYPE_MAIN_VARIANT (type)
27326 != TYPE_MAIN_VARIANT (va_list_type_node)))
27327 && TYPE_SIZE (type)
27328 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27329 && wi::geu_p (TYPE_SIZE (type), 16)
27330 && align < 128)
27331 return 128;
27332 }
27333 if (TREE_CODE (type) == ARRAY_TYPE)
27334 {
27335 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27336 return 64;
27337 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27338 return 128;
27339 }
27340 else if (TREE_CODE (type) == COMPLEX_TYPE)
27341 {
27342 if (TYPE_MODE (type) == DCmode && align < 64)
27343 return 64;
27344 if ((TYPE_MODE (type) == XCmode
27345 || TYPE_MODE (type) == TCmode) && align < 128)
27346 return 128;
27347 }
27348 else if ((TREE_CODE (type) == RECORD_TYPE
27349 || TREE_CODE (type) == UNION_TYPE
27350 || TREE_CODE (type) == QUAL_UNION_TYPE)
27351 && TYPE_FIELDS (type))
27352 {
27353 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27354 return 64;
27355 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27356 return 128;
27357 }
27358 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27359 || TREE_CODE (type) == INTEGER_TYPE)
27360 {
27361
27362 if (TYPE_MODE (type) == DFmode && align < 64)
27363 return 64;
27364 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27365 return 128;
27366 }
27367 return align;
27368 }
27369
27370 /* Compute the minimum required alignment for dynamic stack realignment
27371 purposes for a local variable, parameter or a stack slot. EXP is
27372 the data type or decl itself, MODE is its mode and ALIGN is the
27373 alignment that the object would ordinarily have. */
27374
27375 unsigned int
27376 ix86_minimum_alignment (tree exp, machine_mode mode,
27377 unsigned int align)
27378 {
27379 tree type, decl;
27380
27381 if (exp && DECL_P (exp))
27382 {
27383 type = TREE_TYPE (exp);
27384 decl = exp;
27385 }
27386 else
27387 {
27388 type = exp;
27389 decl = NULL;
27390 }
27391
27392 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27393 return align;
27394
27395 /* Don't do dynamic stack realignment for long long objects with
27396 -mpreferred-stack-boundary=2. */
27397 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27398 && (!type || !TYPE_USER_ALIGN (type))
27399 && (!decl || !DECL_USER_ALIGN (decl)))
27400 return 32;
27401
27402 return align;
27403 }
27404 \f
27405 /* Find a location for the static chain incoming to a nested function.
27406 This is a register, unless all free registers are used by arguments. */
27407
27408 static rtx
27409 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27410 {
27411 unsigned regno;
27412
27413 /* While this function won't be called by the middle-end when a static
27414 chain isn't needed, it's also used throughout the backend so it's
27415 easiest to keep this check centralized. */
27416 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27417 return NULL;
27418
27419 if (TARGET_64BIT)
27420 {
27421 /* We always use R10 in 64-bit mode. */
27422 regno = R10_REG;
27423 }
27424 else
27425 {
27426 const_tree fntype, fndecl;
27427 unsigned int ccvt;
27428
27429 /* By default in 32-bit mode we use ECX to pass the static chain. */
27430 regno = CX_REG;
27431
27432 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27433 {
27434 fntype = TREE_TYPE (fndecl_or_type);
27435 fndecl = fndecl_or_type;
27436 }
27437 else
27438 {
27439 fntype = fndecl_or_type;
27440 fndecl = NULL;
27441 }
27442
27443 ccvt = ix86_get_callcvt (fntype);
27444 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27445 {
27446 /* Fastcall functions use ecx/edx for arguments, which leaves
27447 us with EAX for the static chain.
27448 Thiscall functions use ecx for arguments, which also
27449 leaves us with EAX for the static chain. */
27450 regno = AX_REG;
27451 }
27452 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27453 {
27454 /* Thiscall functions use ecx for arguments, which leaves
27455 us with EAX and EDX for the static chain.
27456 We are using for abi-compatibility EAX. */
27457 regno = AX_REG;
27458 }
27459 else if (ix86_function_regparm (fntype, fndecl) == 3)
27460 {
27461 /* For regparm 3, we have no free call-clobbered registers in
27462 which to store the static chain. In order to implement this,
27463 we have the trampoline push the static chain to the stack.
27464 However, we can't push a value below the return address when
27465 we call the nested function directly, so we have to use an
27466 alternate entry point. For this we use ESI, and have the
27467 alternate entry point push ESI, so that things appear the
27468 same once we're executing the nested function. */
27469 if (incoming_p)
27470 {
27471 if (fndecl == current_function_decl)
27472 ix86_static_chain_on_stack = true;
27473 return gen_frame_mem (SImode,
27474 plus_constant (Pmode,
27475 arg_pointer_rtx, -8));
27476 }
27477 regno = SI_REG;
27478 }
27479 }
27480
27481 return gen_rtx_REG (Pmode, regno);
27482 }
27483
27484 /* Emit RTL insns to initialize the variable parts of a trampoline.
27485 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27486 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27487 to be passed to the target function. */
27488
27489 static void
27490 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27491 {
27492 rtx mem, fnaddr;
27493 int opcode;
27494 int offset = 0;
27495
27496 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27497
27498 if (TARGET_64BIT)
27499 {
27500 int size;
27501
27502 /* Load the function address to r11. Try to load address using
27503 the shorter movl instead of movabs. We may want to support
27504 movq for kernel mode, but kernel does not use trampolines at
27505 the moment. FNADDR is a 32bit address and may not be in
27506 DImode when ptr_mode == SImode. Always use movl in this
27507 case. */
27508 if (ptr_mode == SImode
27509 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27510 {
27511 fnaddr = copy_addr_to_reg (fnaddr);
27512
27513 mem = adjust_address (m_tramp, HImode, offset);
27514 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27515
27516 mem = adjust_address (m_tramp, SImode, offset + 2);
27517 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27518 offset += 6;
27519 }
27520 else
27521 {
27522 mem = adjust_address (m_tramp, HImode, offset);
27523 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27524
27525 mem = adjust_address (m_tramp, DImode, offset + 2);
27526 emit_move_insn (mem, fnaddr);
27527 offset += 10;
27528 }
27529
27530 /* Load static chain using movabs to r10. Use the shorter movl
27531 instead of movabs when ptr_mode == SImode. */
27532 if (ptr_mode == SImode)
27533 {
27534 opcode = 0xba41;
27535 size = 6;
27536 }
27537 else
27538 {
27539 opcode = 0xba49;
27540 size = 10;
27541 }
27542
27543 mem = adjust_address (m_tramp, HImode, offset);
27544 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27545
27546 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27547 emit_move_insn (mem, chain_value);
27548 offset += size;
27549
27550 /* Jump to r11; the last (unused) byte is a nop, only there to
27551 pad the write out to a single 32-bit store. */
27552 mem = adjust_address (m_tramp, SImode, offset);
27553 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27554 offset += 4;
27555 }
27556 else
27557 {
27558 rtx disp, chain;
27559
27560 /* Depending on the static chain location, either load a register
27561 with a constant, or push the constant to the stack. All of the
27562 instructions are the same size. */
27563 chain = ix86_static_chain (fndecl, true);
27564 if (REG_P (chain))
27565 {
27566 switch (REGNO (chain))
27567 {
27568 case AX_REG:
27569 opcode = 0xb8; break;
27570 case CX_REG:
27571 opcode = 0xb9; break;
27572 default:
27573 gcc_unreachable ();
27574 }
27575 }
27576 else
27577 opcode = 0x68;
27578
27579 mem = adjust_address (m_tramp, QImode, offset);
27580 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27581
27582 mem = adjust_address (m_tramp, SImode, offset + 1);
27583 emit_move_insn (mem, chain_value);
27584 offset += 5;
27585
27586 mem = adjust_address (m_tramp, QImode, offset);
27587 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27588
27589 mem = adjust_address (m_tramp, SImode, offset + 1);
27590
27591 /* Compute offset from the end of the jmp to the target function.
27592 In the case in which the trampoline stores the static chain on
27593 the stack, we need to skip the first insn which pushes the
27594 (call-saved) register static chain; this push is 1 byte. */
27595 offset += 5;
27596 disp = expand_binop (SImode, sub_optab, fnaddr,
27597 plus_constant (Pmode, XEXP (m_tramp, 0),
27598 offset - (MEM_P (chain) ? 1 : 0)),
27599 NULL_RTX, 1, OPTAB_DIRECT);
27600 emit_move_insn (mem, disp);
27601 }
27602
27603 gcc_assert (offset <= TRAMPOLINE_SIZE);
27604
27605 #ifdef HAVE_ENABLE_EXECUTE_STACK
27606 #ifdef CHECK_EXECUTE_STACK_ENABLED
27607 if (CHECK_EXECUTE_STACK_ENABLED)
27608 #endif
27609 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27610 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27611 #endif
27612 }
27613 \f
27614 /* The following file contains several enumerations and data structures
27615 built from the definitions in i386-builtin-types.def. */
27616
27617 #include "i386-builtin-types.inc"
27618
27619 /* Table for the ix86 builtin non-function types. */
27620 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27621
27622 /* Retrieve an element from the above table, building some of
27623 the types lazily. */
27624
27625 static tree
27626 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27627 {
27628 unsigned int index;
27629 tree type, itype;
27630
27631 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27632
27633 type = ix86_builtin_type_tab[(int) tcode];
27634 if (type != NULL)
27635 return type;
27636
27637 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27638 if (tcode <= IX86_BT_LAST_VECT)
27639 {
27640 machine_mode mode;
27641
27642 index = tcode - IX86_BT_LAST_PRIM - 1;
27643 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27644 mode = ix86_builtin_type_vect_mode[index];
27645
27646 type = build_vector_type_for_mode (itype, mode);
27647 }
27648 else
27649 {
27650 int quals;
27651
27652 index = tcode - IX86_BT_LAST_VECT - 1;
27653 if (tcode <= IX86_BT_LAST_PTR)
27654 quals = TYPE_UNQUALIFIED;
27655 else
27656 quals = TYPE_QUAL_CONST;
27657
27658 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27659 if (quals != TYPE_UNQUALIFIED)
27660 itype = build_qualified_type (itype, quals);
27661
27662 type = build_pointer_type (itype);
27663 }
27664
27665 ix86_builtin_type_tab[(int) tcode] = type;
27666 return type;
27667 }
27668
27669 /* Table for the ix86 builtin function types. */
27670 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27671
27672 /* Retrieve an element from the above table, building some of
27673 the types lazily. */
27674
27675 static tree
27676 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27677 {
27678 tree type;
27679
27680 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27681
27682 type = ix86_builtin_func_type_tab[(int) tcode];
27683 if (type != NULL)
27684 return type;
27685
27686 if (tcode <= IX86_BT_LAST_FUNC)
27687 {
27688 unsigned start = ix86_builtin_func_start[(int) tcode];
27689 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27690 tree rtype, atype, args = void_list_node;
27691 unsigned i;
27692
27693 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27694 for (i = after - 1; i > start; --i)
27695 {
27696 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27697 args = tree_cons (NULL, atype, args);
27698 }
27699
27700 type = build_function_type (rtype, args);
27701 }
27702 else
27703 {
27704 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27705 enum ix86_builtin_func_type icode;
27706
27707 icode = ix86_builtin_func_alias_base[index];
27708 type = ix86_get_builtin_func_type (icode);
27709 }
27710
27711 ix86_builtin_func_type_tab[(int) tcode] = type;
27712 return type;
27713 }
27714
27715
27716 /* Codes for all the SSE/MMX builtins. */
27717 enum ix86_builtins
27718 {
27719 IX86_BUILTIN_ADDPS,
27720 IX86_BUILTIN_ADDSS,
27721 IX86_BUILTIN_DIVPS,
27722 IX86_BUILTIN_DIVSS,
27723 IX86_BUILTIN_MULPS,
27724 IX86_BUILTIN_MULSS,
27725 IX86_BUILTIN_SUBPS,
27726 IX86_BUILTIN_SUBSS,
27727
27728 IX86_BUILTIN_CMPEQPS,
27729 IX86_BUILTIN_CMPLTPS,
27730 IX86_BUILTIN_CMPLEPS,
27731 IX86_BUILTIN_CMPGTPS,
27732 IX86_BUILTIN_CMPGEPS,
27733 IX86_BUILTIN_CMPNEQPS,
27734 IX86_BUILTIN_CMPNLTPS,
27735 IX86_BUILTIN_CMPNLEPS,
27736 IX86_BUILTIN_CMPNGTPS,
27737 IX86_BUILTIN_CMPNGEPS,
27738 IX86_BUILTIN_CMPORDPS,
27739 IX86_BUILTIN_CMPUNORDPS,
27740 IX86_BUILTIN_CMPEQSS,
27741 IX86_BUILTIN_CMPLTSS,
27742 IX86_BUILTIN_CMPLESS,
27743 IX86_BUILTIN_CMPNEQSS,
27744 IX86_BUILTIN_CMPNLTSS,
27745 IX86_BUILTIN_CMPNLESS,
27746 IX86_BUILTIN_CMPORDSS,
27747 IX86_BUILTIN_CMPUNORDSS,
27748
27749 IX86_BUILTIN_COMIEQSS,
27750 IX86_BUILTIN_COMILTSS,
27751 IX86_BUILTIN_COMILESS,
27752 IX86_BUILTIN_COMIGTSS,
27753 IX86_BUILTIN_COMIGESS,
27754 IX86_BUILTIN_COMINEQSS,
27755 IX86_BUILTIN_UCOMIEQSS,
27756 IX86_BUILTIN_UCOMILTSS,
27757 IX86_BUILTIN_UCOMILESS,
27758 IX86_BUILTIN_UCOMIGTSS,
27759 IX86_BUILTIN_UCOMIGESS,
27760 IX86_BUILTIN_UCOMINEQSS,
27761
27762 IX86_BUILTIN_CVTPI2PS,
27763 IX86_BUILTIN_CVTPS2PI,
27764 IX86_BUILTIN_CVTSI2SS,
27765 IX86_BUILTIN_CVTSI642SS,
27766 IX86_BUILTIN_CVTSS2SI,
27767 IX86_BUILTIN_CVTSS2SI64,
27768 IX86_BUILTIN_CVTTPS2PI,
27769 IX86_BUILTIN_CVTTSS2SI,
27770 IX86_BUILTIN_CVTTSS2SI64,
27771
27772 IX86_BUILTIN_MAXPS,
27773 IX86_BUILTIN_MAXSS,
27774 IX86_BUILTIN_MINPS,
27775 IX86_BUILTIN_MINSS,
27776
27777 IX86_BUILTIN_LOADUPS,
27778 IX86_BUILTIN_STOREUPS,
27779 IX86_BUILTIN_MOVSS,
27780
27781 IX86_BUILTIN_MOVHLPS,
27782 IX86_BUILTIN_MOVLHPS,
27783 IX86_BUILTIN_LOADHPS,
27784 IX86_BUILTIN_LOADLPS,
27785 IX86_BUILTIN_STOREHPS,
27786 IX86_BUILTIN_STORELPS,
27787
27788 IX86_BUILTIN_MASKMOVQ,
27789 IX86_BUILTIN_MOVMSKPS,
27790 IX86_BUILTIN_PMOVMSKB,
27791
27792 IX86_BUILTIN_MOVNTPS,
27793 IX86_BUILTIN_MOVNTQ,
27794
27795 IX86_BUILTIN_LOADDQU,
27796 IX86_BUILTIN_STOREDQU,
27797
27798 IX86_BUILTIN_PACKSSWB,
27799 IX86_BUILTIN_PACKSSDW,
27800 IX86_BUILTIN_PACKUSWB,
27801
27802 IX86_BUILTIN_PADDB,
27803 IX86_BUILTIN_PADDW,
27804 IX86_BUILTIN_PADDD,
27805 IX86_BUILTIN_PADDQ,
27806 IX86_BUILTIN_PADDSB,
27807 IX86_BUILTIN_PADDSW,
27808 IX86_BUILTIN_PADDUSB,
27809 IX86_BUILTIN_PADDUSW,
27810 IX86_BUILTIN_PSUBB,
27811 IX86_BUILTIN_PSUBW,
27812 IX86_BUILTIN_PSUBD,
27813 IX86_BUILTIN_PSUBQ,
27814 IX86_BUILTIN_PSUBSB,
27815 IX86_BUILTIN_PSUBSW,
27816 IX86_BUILTIN_PSUBUSB,
27817 IX86_BUILTIN_PSUBUSW,
27818
27819 IX86_BUILTIN_PAND,
27820 IX86_BUILTIN_PANDN,
27821 IX86_BUILTIN_POR,
27822 IX86_BUILTIN_PXOR,
27823
27824 IX86_BUILTIN_PAVGB,
27825 IX86_BUILTIN_PAVGW,
27826
27827 IX86_BUILTIN_PCMPEQB,
27828 IX86_BUILTIN_PCMPEQW,
27829 IX86_BUILTIN_PCMPEQD,
27830 IX86_BUILTIN_PCMPGTB,
27831 IX86_BUILTIN_PCMPGTW,
27832 IX86_BUILTIN_PCMPGTD,
27833
27834 IX86_BUILTIN_PMADDWD,
27835
27836 IX86_BUILTIN_PMAXSW,
27837 IX86_BUILTIN_PMAXUB,
27838 IX86_BUILTIN_PMINSW,
27839 IX86_BUILTIN_PMINUB,
27840
27841 IX86_BUILTIN_PMULHUW,
27842 IX86_BUILTIN_PMULHW,
27843 IX86_BUILTIN_PMULLW,
27844
27845 IX86_BUILTIN_PSADBW,
27846 IX86_BUILTIN_PSHUFW,
27847
27848 IX86_BUILTIN_PSLLW,
27849 IX86_BUILTIN_PSLLD,
27850 IX86_BUILTIN_PSLLQ,
27851 IX86_BUILTIN_PSRAW,
27852 IX86_BUILTIN_PSRAD,
27853 IX86_BUILTIN_PSRLW,
27854 IX86_BUILTIN_PSRLD,
27855 IX86_BUILTIN_PSRLQ,
27856 IX86_BUILTIN_PSLLWI,
27857 IX86_BUILTIN_PSLLDI,
27858 IX86_BUILTIN_PSLLQI,
27859 IX86_BUILTIN_PSRAWI,
27860 IX86_BUILTIN_PSRADI,
27861 IX86_BUILTIN_PSRLWI,
27862 IX86_BUILTIN_PSRLDI,
27863 IX86_BUILTIN_PSRLQI,
27864
27865 IX86_BUILTIN_PUNPCKHBW,
27866 IX86_BUILTIN_PUNPCKHWD,
27867 IX86_BUILTIN_PUNPCKHDQ,
27868 IX86_BUILTIN_PUNPCKLBW,
27869 IX86_BUILTIN_PUNPCKLWD,
27870 IX86_BUILTIN_PUNPCKLDQ,
27871
27872 IX86_BUILTIN_SHUFPS,
27873
27874 IX86_BUILTIN_RCPPS,
27875 IX86_BUILTIN_RCPSS,
27876 IX86_BUILTIN_RSQRTPS,
27877 IX86_BUILTIN_RSQRTPS_NR,
27878 IX86_BUILTIN_RSQRTSS,
27879 IX86_BUILTIN_RSQRTF,
27880 IX86_BUILTIN_SQRTPS,
27881 IX86_BUILTIN_SQRTPS_NR,
27882 IX86_BUILTIN_SQRTSS,
27883
27884 IX86_BUILTIN_UNPCKHPS,
27885 IX86_BUILTIN_UNPCKLPS,
27886
27887 IX86_BUILTIN_ANDPS,
27888 IX86_BUILTIN_ANDNPS,
27889 IX86_BUILTIN_ORPS,
27890 IX86_BUILTIN_XORPS,
27891
27892 IX86_BUILTIN_EMMS,
27893 IX86_BUILTIN_LDMXCSR,
27894 IX86_BUILTIN_STMXCSR,
27895 IX86_BUILTIN_SFENCE,
27896
27897 IX86_BUILTIN_FXSAVE,
27898 IX86_BUILTIN_FXRSTOR,
27899 IX86_BUILTIN_FXSAVE64,
27900 IX86_BUILTIN_FXRSTOR64,
27901
27902 IX86_BUILTIN_XSAVE,
27903 IX86_BUILTIN_XRSTOR,
27904 IX86_BUILTIN_XSAVE64,
27905 IX86_BUILTIN_XRSTOR64,
27906
27907 IX86_BUILTIN_XSAVEOPT,
27908 IX86_BUILTIN_XSAVEOPT64,
27909
27910 IX86_BUILTIN_XSAVEC,
27911 IX86_BUILTIN_XSAVEC64,
27912
27913 IX86_BUILTIN_XSAVES,
27914 IX86_BUILTIN_XRSTORS,
27915 IX86_BUILTIN_XSAVES64,
27916 IX86_BUILTIN_XRSTORS64,
27917
27918 /* 3DNow! Original */
27919 IX86_BUILTIN_FEMMS,
27920 IX86_BUILTIN_PAVGUSB,
27921 IX86_BUILTIN_PF2ID,
27922 IX86_BUILTIN_PFACC,
27923 IX86_BUILTIN_PFADD,
27924 IX86_BUILTIN_PFCMPEQ,
27925 IX86_BUILTIN_PFCMPGE,
27926 IX86_BUILTIN_PFCMPGT,
27927 IX86_BUILTIN_PFMAX,
27928 IX86_BUILTIN_PFMIN,
27929 IX86_BUILTIN_PFMUL,
27930 IX86_BUILTIN_PFRCP,
27931 IX86_BUILTIN_PFRCPIT1,
27932 IX86_BUILTIN_PFRCPIT2,
27933 IX86_BUILTIN_PFRSQIT1,
27934 IX86_BUILTIN_PFRSQRT,
27935 IX86_BUILTIN_PFSUB,
27936 IX86_BUILTIN_PFSUBR,
27937 IX86_BUILTIN_PI2FD,
27938 IX86_BUILTIN_PMULHRW,
27939
27940 /* 3DNow! Athlon Extensions */
27941 IX86_BUILTIN_PF2IW,
27942 IX86_BUILTIN_PFNACC,
27943 IX86_BUILTIN_PFPNACC,
27944 IX86_BUILTIN_PI2FW,
27945 IX86_BUILTIN_PSWAPDSI,
27946 IX86_BUILTIN_PSWAPDSF,
27947
27948 /* SSE2 */
27949 IX86_BUILTIN_ADDPD,
27950 IX86_BUILTIN_ADDSD,
27951 IX86_BUILTIN_DIVPD,
27952 IX86_BUILTIN_DIVSD,
27953 IX86_BUILTIN_MULPD,
27954 IX86_BUILTIN_MULSD,
27955 IX86_BUILTIN_SUBPD,
27956 IX86_BUILTIN_SUBSD,
27957
27958 IX86_BUILTIN_CMPEQPD,
27959 IX86_BUILTIN_CMPLTPD,
27960 IX86_BUILTIN_CMPLEPD,
27961 IX86_BUILTIN_CMPGTPD,
27962 IX86_BUILTIN_CMPGEPD,
27963 IX86_BUILTIN_CMPNEQPD,
27964 IX86_BUILTIN_CMPNLTPD,
27965 IX86_BUILTIN_CMPNLEPD,
27966 IX86_BUILTIN_CMPNGTPD,
27967 IX86_BUILTIN_CMPNGEPD,
27968 IX86_BUILTIN_CMPORDPD,
27969 IX86_BUILTIN_CMPUNORDPD,
27970 IX86_BUILTIN_CMPEQSD,
27971 IX86_BUILTIN_CMPLTSD,
27972 IX86_BUILTIN_CMPLESD,
27973 IX86_BUILTIN_CMPNEQSD,
27974 IX86_BUILTIN_CMPNLTSD,
27975 IX86_BUILTIN_CMPNLESD,
27976 IX86_BUILTIN_CMPORDSD,
27977 IX86_BUILTIN_CMPUNORDSD,
27978
27979 IX86_BUILTIN_COMIEQSD,
27980 IX86_BUILTIN_COMILTSD,
27981 IX86_BUILTIN_COMILESD,
27982 IX86_BUILTIN_COMIGTSD,
27983 IX86_BUILTIN_COMIGESD,
27984 IX86_BUILTIN_COMINEQSD,
27985 IX86_BUILTIN_UCOMIEQSD,
27986 IX86_BUILTIN_UCOMILTSD,
27987 IX86_BUILTIN_UCOMILESD,
27988 IX86_BUILTIN_UCOMIGTSD,
27989 IX86_BUILTIN_UCOMIGESD,
27990 IX86_BUILTIN_UCOMINEQSD,
27991
27992 IX86_BUILTIN_MAXPD,
27993 IX86_BUILTIN_MAXSD,
27994 IX86_BUILTIN_MINPD,
27995 IX86_BUILTIN_MINSD,
27996
27997 IX86_BUILTIN_ANDPD,
27998 IX86_BUILTIN_ANDNPD,
27999 IX86_BUILTIN_ORPD,
28000 IX86_BUILTIN_XORPD,
28001
28002 IX86_BUILTIN_SQRTPD,
28003 IX86_BUILTIN_SQRTSD,
28004
28005 IX86_BUILTIN_UNPCKHPD,
28006 IX86_BUILTIN_UNPCKLPD,
28007
28008 IX86_BUILTIN_SHUFPD,
28009
28010 IX86_BUILTIN_LOADUPD,
28011 IX86_BUILTIN_STOREUPD,
28012 IX86_BUILTIN_MOVSD,
28013
28014 IX86_BUILTIN_LOADHPD,
28015 IX86_BUILTIN_LOADLPD,
28016
28017 IX86_BUILTIN_CVTDQ2PD,
28018 IX86_BUILTIN_CVTDQ2PS,
28019
28020 IX86_BUILTIN_CVTPD2DQ,
28021 IX86_BUILTIN_CVTPD2PI,
28022 IX86_BUILTIN_CVTPD2PS,
28023 IX86_BUILTIN_CVTTPD2DQ,
28024 IX86_BUILTIN_CVTTPD2PI,
28025
28026 IX86_BUILTIN_CVTPI2PD,
28027 IX86_BUILTIN_CVTSI2SD,
28028 IX86_BUILTIN_CVTSI642SD,
28029
28030 IX86_BUILTIN_CVTSD2SI,
28031 IX86_BUILTIN_CVTSD2SI64,
28032 IX86_BUILTIN_CVTSD2SS,
28033 IX86_BUILTIN_CVTSS2SD,
28034 IX86_BUILTIN_CVTTSD2SI,
28035 IX86_BUILTIN_CVTTSD2SI64,
28036
28037 IX86_BUILTIN_CVTPS2DQ,
28038 IX86_BUILTIN_CVTPS2PD,
28039 IX86_BUILTIN_CVTTPS2DQ,
28040
28041 IX86_BUILTIN_MOVNTI,
28042 IX86_BUILTIN_MOVNTI64,
28043 IX86_BUILTIN_MOVNTPD,
28044 IX86_BUILTIN_MOVNTDQ,
28045
28046 IX86_BUILTIN_MOVQ128,
28047
28048 /* SSE2 MMX */
28049 IX86_BUILTIN_MASKMOVDQU,
28050 IX86_BUILTIN_MOVMSKPD,
28051 IX86_BUILTIN_PMOVMSKB128,
28052
28053 IX86_BUILTIN_PACKSSWB128,
28054 IX86_BUILTIN_PACKSSDW128,
28055 IX86_BUILTIN_PACKUSWB128,
28056
28057 IX86_BUILTIN_PADDB128,
28058 IX86_BUILTIN_PADDW128,
28059 IX86_BUILTIN_PADDD128,
28060 IX86_BUILTIN_PADDQ128,
28061 IX86_BUILTIN_PADDSB128,
28062 IX86_BUILTIN_PADDSW128,
28063 IX86_BUILTIN_PADDUSB128,
28064 IX86_BUILTIN_PADDUSW128,
28065 IX86_BUILTIN_PSUBB128,
28066 IX86_BUILTIN_PSUBW128,
28067 IX86_BUILTIN_PSUBD128,
28068 IX86_BUILTIN_PSUBQ128,
28069 IX86_BUILTIN_PSUBSB128,
28070 IX86_BUILTIN_PSUBSW128,
28071 IX86_BUILTIN_PSUBUSB128,
28072 IX86_BUILTIN_PSUBUSW128,
28073
28074 IX86_BUILTIN_PAND128,
28075 IX86_BUILTIN_PANDN128,
28076 IX86_BUILTIN_POR128,
28077 IX86_BUILTIN_PXOR128,
28078
28079 IX86_BUILTIN_PAVGB128,
28080 IX86_BUILTIN_PAVGW128,
28081
28082 IX86_BUILTIN_PCMPEQB128,
28083 IX86_BUILTIN_PCMPEQW128,
28084 IX86_BUILTIN_PCMPEQD128,
28085 IX86_BUILTIN_PCMPGTB128,
28086 IX86_BUILTIN_PCMPGTW128,
28087 IX86_BUILTIN_PCMPGTD128,
28088
28089 IX86_BUILTIN_PMADDWD128,
28090
28091 IX86_BUILTIN_PMAXSW128,
28092 IX86_BUILTIN_PMAXUB128,
28093 IX86_BUILTIN_PMINSW128,
28094 IX86_BUILTIN_PMINUB128,
28095
28096 IX86_BUILTIN_PMULUDQ,
28097 IX86_BUILTIN_PMULUDQ128,
28098 IX86_BUILTIN_PMULHUW128,
28099 IX86_BUILTIN_PMULHW128,
28100 IX86_BUILTIN_PMULLW128,
28101
28102 IX86_BUILTIN_PSADBW128,
28103 IX86_BUILTIN_PSHUFHW,
28104 IX86_BUILTIN_PSHUFLW,
28105 IX86_BUILTIN_PSHUFD,
28106
28107 IX86_BUILTIN_PSLLDQI128,
28108 IX86_BUILTIN_PSLLWI128,
28109 IX86_BUILTIN_PSLLDI128,
28110 IX86_BUILTIN_PSLLQI128,
28111 IX86_BUILTIN_PSRAWI128,
28112 IX86_BUILTIN_PSRADI128,
28113 IX86_BUILTIN_PSRLDQI128,
28114 IX86_BUILTIN_PSRLWI128,
28115 IX86_BUILTIN_PSRLDI128,
28116 IX86_BUILTIN_PSRLQI128,
28117
28118 IX86_BUILTIN_PSLLDQ128,
28119 IX86_BUILTIN_PSLLW128,
28120 IX86_BUILTIN_PSLLD128,
28121 IX86_BUILTIN_PSLLQ128,
28122 IX86_BUILTIN_PSRAW128,
28123 IX86_BUILTIN_PSRAD128,
28124 IX86_BUILTIN_PSRLW128,
28125 IX86_BUILTIN_PSRLD128,
28126 IX86_BUILTIN_PSRLQ128,
28127
28128 IX86_BUILTIN_PUNPCKHBW128,
28129 IX86_BUILTIN_PUNPCKHWD128,
28130 IX86_BUILTIN_PUNPCKHDQ128,
28131 IX86_BUILTIN_PUNPCKHQDQ128,
28132 IX86_BUILTIN_PUNPCKLBW128,
28133 IX86_BUILTIN_PUNPCKLWD128,
28134 IX86_BUILTIN_PUNPCKLDQ128,
28135 IX86_BUILTIN_PUNPCKLQDQ128,
28136
28137 IX86_BUILTIN_CLFLUSH,
28138 IX86_BUILTIN_MFENCE,
28139 IX86_BUILTIN_LFENCE,
28140 IX86_BUILTIN_PAUSE,
28141
28142 IX86_BUILTIN_FNSTENV,
28143 IX86_BUILTIN_FLDENV,
28144 IX86_BUILTIN_FNSTSW,
28145 IX86_BUILTIN_FNCLEX,
28146
28147 IX86_BUILTIN_BSRSI,
28148 IX86_BUILTIN_BSRDI,
28149 IX86_BUILTIN_RDPMC,
28150 IX86_BUILTIN_RDTSC,
28151 IX86_BUILTIN_RDTSCP,
28152 IX86_BUILTIN_ROLQI,
28153 IX86_BUILTIN_ROLHI,
28154 IX86_BUILTIN_RORQI,
28155 IX86_BUILTIN_RORHI,
28156
28157 /* SSE3. */
28158 IX86_BUILTIN_ADDSUBPS,
28159 IX86_BUILTIN_HADDPS,
28160 IX86_BUILTIN_HSUBPS,
28161 IX86_BUILTIN_MOVSHDUP,
28162 IX86_BUILTIN_MOVSLDUP,
28163 IX86_BUILTIN_ADDSUBPD,
28164 IX86_BUILTIN_HADDPD,
28165 IX86_BUILTIN_HSUBPD,
28166 IX86_BUILTIN_LDDQU,
28167
28168 IX86_BUILTIN_MONITOR,
28169 IX86_BUILTIN_MWAIT,
28170
28171 /* SSSE3. */
28172 IX86_BUILTIN_PHADDW,
28173 IX86_BUILTIN_PHADDD,
28174 IX86_BUILTIN_PHADDSW,
28175 IX86_BUILTIN_PHSUBW,
28176 IX86_BUILTIN_PHSUBD,
28177 IX86_BUILTIN_PHSUBSW,
28178 IX86_BUILTIN_PMADDUBSW,
28179 IX86_BUILTIN_PMULHRSW,
28180 IX86_BUILTIN_PSHUFB,
28181 IX86_BUILTIN_PSIGNB,
28182 IX86_BUILTIN_PSIGNW,
28183 IX86_BUILTIN_PSIGND,
28184 IX86_BUILTIN_PALIGNR,
28185 IX86_BUILTIN_PABSB,
28186 IX86_BUILTIN_PABSW,
28187 IX86_BUILTIN_PABSD,
28188
28189 IX86_BUILTIN_PHADDW128,
28190 IX86_BUILTIN_PHADDD128,
28191 IX86_BUILTIN_PHADDSW128,
28192 IX86_BUILTIN_PHSUBW128,
28193 IX86_BUILTIN_PHSUBD128,
28194 IX86_BUILTIN_PHSUBSW128,
28195 IX86_BUILTIN_PMADDUBSW128,
28196 IX86_BUILTIN_PMULHRSW128,
28197 IX86_BUILTIN_PSHUFB128,
28198 IX86_BUILTIN_PSIGNB128,
28199 IX86_BUILTIN_PSIGNW128,
28200 IX86_BUILTIN_PSIGND128,
28201 IX86_BUILTIN_PALIGNR128,
28202 IX86_BUILTIN_PABSB128,
28203 IX86_BUILTIN_PABSW128,
28204 IX86_BUILTIN_PABSD128,
28205
28206 /* AMDFAM10 - SSE4A New Instructions. */
28207 IX86_BUILTIN_MOVNTSD,
28208 IX86_BUILTIN_MOVNTSS,
28209 IX86_BUILTIN_EXTRQI,
28210 IX86_BUILTIN_EXTRQ,
28211 IX86_BUILTIN_INSERTQI,
28212 IX86_BUILTIN_INSERTQ,
28213
28214 /* SSE4.1. */
28215 IX86_BUILTIN_BLENDPD,
28216 IX86_BUILTIN_BLENDPS,
28217 IX86_BUILTIN_BLENDVPD,
28218 IX86_BUILTIN_BLENDVPS,
28219 IX86_BUILTIN_PBLENDVB128,
28220 IX86_BUILTIN_PBLENDW128,
28221
28222 IX86_BUILTIN_DPPD,
28223 IX86_BUILTIN_DPPS,
28224
28225 IX86_BUILTIN_INSERTPS128,
28226
28227 IX86_BUILTIN_MOVNTDQA,
28228 IX86_BUILTIN_MPSADBW128,
28229 IX86_BUILTIN_PACKUSDW128,
28230 IX86_BUILTIN_PCMPEQQ,
28231 IX86_BUILTIN_PHMINPOSUW128,
28232
28233 IX86_BUILTIN_PMAXSB128,
28234 IX86_BUILTIN_PMAXSD128,
28235 IX86_BUILTIN_PMAXUD128,
28236 IX86_BUILTIN_PMAXUW128,
28237
28238 IX86_BUILTIN_PMINSB128,
28239 IX86_BUILTIN_PMINSD128,
28240 IX86_BUILTIN_PMINUD128,
28241 IX86_BUILTIN_PMINUW128,
28242
28243 IX86_BUILTIN_PMOVSXBW128,
28244 IX86_BUILTIN_PMOVSXBD128,
28245 IX86_BUILTIN_PMOVSXBQ128,
28246 IX86_BUILTIN_PMOVSXWD128,
28247 IX86_BUILTIN_PMOVSXWQ128,
28248 IX86_BUILTIN_PMOVSXDQ128,
28249
28250 IX86_BUILTIN_PMOVZXBW128,
28251 IX86_BUILTIN_PMOVZXBD128,
28252 IX86_BUILTIN_PMOVZXBQ128,
28253 IX86_BUILTIN_PMOVZXWD128,
28254 IX86_BUILTIN_PMOVZXWQ128,
28255 IX86_BUILTIN_PMOVZXDQ128,
28256
28257 IX86_BUILTIN_PMULDQ128,
28258 IX86_BUILTIN_PMULLD128,
28259
28260 IX86_BUILTIN_ROUNDSD,
28261 IX86_BUILTIN_ROUNDSS,
28262
28263 IX86_BUILTIN_ROUNDPD,
28264 IX86_BUILTIN_ROUNDPS,
28265
28266 IX86_BUILTIN_FLOORPD,
28267 IX86_BUILTIN_CEILPD,
28268 IX86_BUILTIN_TRUNCPD,
28269 IX86_BUILTIN_RINTPD,
28270 IX86_BUILTIN_ROUNDPD_AZ,
28271
28272 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28273 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28274 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28275
28276 IX86_BUILTIN_FLOORPS,
28277 IX86_BUILTIN_CEILPS,
28278 IX86_BUILTIN_TRUNCPS,
28279 IX86_BUILTIN_RINTPS,
28280 IX86_BUILTIN_ROUNDPS_AZ,
28281
28282 IX86_BUILTIN_FLOORPS_SFIX,
28283 IX86_BUILTIN_CEILPS_SFIX,
28284 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28285
28286 IX86_BUILTIN_PTESTZ,
28287 IX86_BUILTIN_PTESTC,
28288 IX86_BUILTIN_PTESTNZC,
28289
28290 IX86_BUILTIN_VEC_INIT_V2SI,
28291 IX86_BUILTIN_VEC_INIT_V4HI,
28292 IX86_BUILTIN_VEC_INIT_V8QI,
28293 IX86_BUILTIN_VEC_EXT_V2DF,
28294 IX86_BUILTIN_VEC_EXT_V2DI,
28295 IX86_BUILTIN_VEC_EXT_V4SF,
28296 IX86_BUILTIN_VEC_EXT_V4SI,
28297 IX86_BUILTIN_VEC_EXT_V8HI,
28298 IX86_BUILTIN_VEC_EXT_V2SI,
28299 IX86_BUILTIN_VEC_EXT_V4HI,
28300 IX86_BUILTIN_VEC_EXT_V16QI,
28301 IX86_BUILTIN_VEC_SET_V2DI,
28302 IX86_BUILTIN_VEC_SET_V4SF,
28303 IX86_BUILTIN_VEC_SET_V4SI,
28304 IX86_BUILTIN_VEC_SET_V8HI,
28305 IX86_BUILTIN_VEC_SET_V4HI,
28306 IX86_BUILTIN_VEC_SET_V16QI,
28307
28308 IX86_BUILTIN_VEC_PACK_SFIX,
28309 IX86_BUILTIN_VEC_PACK_SFIX256,
28310
28311 /* SSE4.2. */
28312 IX86_BUILTIN_CRC32QI,
28313 IX86_BUILTIN_CRC32HI,
28314 IX86_BUILTIN_CRC32SI,
28315 IX86_BUILTIN_CRC32DI,
28316
28317 IX86_BUILTIN_PCMPESTRI128,
28318 IX86_BUILTIN_PCMPESTRM128,
28319 IX86_BUILTIN_PCMPESTRA128,
28320 IX86_BUILTIN_PCMPESTRC128,
28321 IX86_BUILTIN_PCMPESTRO128,
28322 IX86_BUILTIN_PCMPESTRS128,
28323 IX86_BUILTIN_PCMPESTRZ128,
28324 IX86_BUILTIN_PCMPISTRI128,
28325 IX86_BUILTIN_PCMPISTRM128,
28326 IX86_BUILTIN_PCMPISTRA128,
28327 IX86_BUILTIN_PCMPISTRC128,
28328 IX86_BUILTIN_PCMPISTRO128,
28329 IX86_BUILTIN_PCMPISTRS128,
28330 IX86_BUILTIN_PCMPISTRZ128,
28331
28332 IX86_BUILTIN_PCMPGTQ,
28333
28334 /* AES instructions */
28335 IX86_BUILTIN_AESENC128,
28336 IX86_BUILTIN_AESENCLAST128,
28337 IX86_BUILTIN_AESDEC128,
28338 IX86_BUILTIN_AESDECLAST128,
28339 IX86_BUILTIN_AESIMC128,
28340 IX86_BUILTIN_AESKEYGENASSIST128,
28341
28342 /* PCLMUL instruction */
28343 IX86_BUILTIN_PCLMULQDQ128,
28344
28345 /* AVX */
28346 IX86_BUILTIN_ADDPD256,
28347 IX86_BUILTIN_ADDPS256,
28348 IX86_BUILTIN_ADDSUBPD256,
28349 IX86_BUILTIN_ADDSUBPS256,
28350 IX86_BUILTIN_ANDPD256,
28351 IX86_BUILTIN_ANDPS256,
28352 IX86_BUILTIN_ANDNPD256,
28353 IX86_BUILTIN_ANDNPS256,
28354 IX86_BUILTIN_BLENDPD256,
28355 IX86_BUILTIN_BLENDPS256,
28356 IX86_BUILTIN_BLENDVPD256,
28357 IX86_BUILTIN_BLENDVPS256,
28358 IX86_BUILTIN_DIVPD256,
28359 IX86_BUILTIN_DIVPS256,
28360 IX86_BUILTIN_DPPS256,
28361 IX86_BUILTIN_HADDPD256,
28362 IX86_BUILTIN_HADDPS256,
28363 IX86_BUILTIN_HSUBPD256,
28364 IX86_BUILTIN_HSUBPS256,
28365 IX86_BUILTIN_MAXPD256,
28366 IX86_BUILTIN_MAXPS256,
28367 IX86_BUILTIN_MINPD256,
28368 IX86_BUILTIN_MINPS256,
28369 IX86_BUILTIN_MULPD256,
28370 IX86_BUILTIN_MULPS256,
28371 IX86_BUILTIN_ORPD256,
28372 IX86_BUILTIN_ORPS256,
28373 IX86_BUILTIN_SHUFPD256,
28374 IX86_BUILTIN_SHUFPS256,
28375 IX86_BUILTIN_SUBPD256,
28376 IX86_BUILTIN_SUBPS256,
28377 IX86_BUILTIN_XORPD256,
28378 IX86_BUILTIN_XORPS256,
28379 IX86_BUILTIN_CMPSD,
28380 IX86_BUILTIN_CMPSS,
28381 IX86_BUILTIN_CMPPD,
28382 IX86_BUILTIN_CMPPS,
28383 IX86_BUILTIN_CMPPD256,
28384 IX86_BUILTIN_CMPPS256,
28385 IX86_BUILTIN_CVTDQ2PD256,
28386 IX86_BUILTIN_CVTDQ2PS256,
28387 IX86_BUILTIN_CVTPD2PS256,
28388 IX86_BUILTIN_CVTPS2DQ256,
28389 IX86_BUILTIN_CVTPS2PD256,
28390 IX86_BUILTIN_CVTTPD2DQ256,
28391 IX86_BUILTIN_CVTPD2DQ256,
28392 IX86_BUILTIN_CVTTPS2DQ256,
28393 IX86_BUILTIN_EXTRACTF128PD256,
28394 IX86_BUILTIN_EXTRACTF128PS256,
28395 IX86_BUILTIN_EXTRACTF128SI256,
28396 IX86_BUILTIN_VZEROALL,
28397 IX86_BUILTIN_VZEROUPPER,
28398 IX86_BUILTIN_VPERMILVARPD,
28399 IX86_BUILTIN_VPERMILVARPS,
28400 IX86_BUILTIN_VPERMILVARPD256,
28401 IX86_BUILTIN_VPERMILVARPS256,
28402 IX86_BUILTIN_VPERMILPD,
28403 IX86_BUILTIN_VPERMILPS,
28404 IX86_BUILTIN_VPERMILPD256,
28405 IX86_BUILTIN_VPERMILPS256,
28406 IX86_BUILTIN_VPERMIL2PD,
28407 IX86_BUILTIN_VPERMIL2PS,
28408 IX86_BUILTIN_VPERMIL2PD256,
28409 IX86_BUILTIN_VPERMIL2PS256,
28410 IX86_BUILTIN_VPERM2F128PD256,
28411 IX86_BUILTIN_VPERM2F128PS256,
28412 IX86_BUILTIN_VPERM2F128SI256,
28413 IX86_BUILTIN_VBROADCASTSS,
28414 IX86_BUILTIN_VBROADCASTSD256,
28415 IX86_BUILTIN_VBROADCASTSS256,
28416 IX86_BUILTIN_VBROADCASTPD256,
28417 IX86_BUILTIN_VBROADCASTPS256,
28418 IX86_BUILTIN_VINSERTF128PD256,
28419 IX86_BUILTIN_VINSERTF128PS256,
28420 IX86_BUILTIN_VINSERTF128SI256,
28421 IX86_BUILTIN_LOADUPD256,
28422 IX86_BUILTIN_LOADUPS256,
28423 IX86_BUILTIN_STOREUPD256,
28424 IX86_BUILTIN_STOREUPS256,
28425 IX86_BUILTIN_LDDQU256,
28426 IX86_BUILTIN_MOVNTDQ256,
28427 IX86_BUILTIN_MOVNTPD256,
28428 IX86_BUILTIN_MOVNTPS256,
28429 IX86_BUILTIN_LOADDQU256,
28430 IX86_BUILTIN_STOREDQU256,
28431 IX86_BUILTIN_MASKLOADPD,
28432 IX86_BUILTIN_MASKLOADPS,
28433 IX86_BUILTIN_MASKSTOREPD,
28434 IX86_BUILTIN_MASKSTOREPS,
28435 IX86_BUILTIN_MASKLOADPD256,
28436 IX86_BUILTIN_MASKLOADPS256,
28437 IX86_BUILTIN_MASKSTOREPD256,
28438 IX86_BUILTIN_MASKSTOREPS256,
28439 IX86_BUILTIN_MOVSHDUP256,
28440 IX86_BUILTIN_MOVSLDUP256,
28441 IX86_BUILTIN_MOVDDUP256,
28442
28443 IX86_BUILTIN_SQRTPD256,
28444 IX86_BUILTIN_SQRTPS256,
28445 IX86_BUILTIN_SQRTPS_NR256,
28446 IX86_BUILTIN_RSQRTPS256,
28447 IX86_BUILTIN_RSQRTPS_NR256,
28448
28449 IX86_BUILTIN_RCPPS256,
28450
28451 IX86_BUILTIN_ROUNDPD256,
28452 IX86_BUILTIN_ROUNDPS256,
28453
28454 IX86_BUILTIN_FLOORPD256,
28455 IX86_BUILTIN_CEILPD256,
28456 IX86_BUILTIN_TRUNCPD256,
28457 IX86_BUILTIN_RINTPD256,
28458 IX86_BUILTIN_ROUNDPD_AZ256,
28459
28460 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28461 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28462 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28463
28464 IX86_BUILTIN_FLOORPS256,
28465 IX86_BUILTIN_CEILPS256,
28466 IX86_BUILTIN_TRUNCPS256,
28467 IX86_BUILTIN_RINTPS256,
28468 IX86_BUILTIN_ROUNDPS_AZ256,
28469
28470 IX86_BUILTIN_FLOORPS_SFIX256,
28471 IX86_BUILTIN_CEILPS_SFIX256,
28472 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28473
28474 IX86_BUILTIN_UNPCKHPD256,
28475 IX86_BUILTIN_UNPCKLPD256,
28476 IX86_BUILTIN_UNPCKHPS256,
28477 IX86_BUILTIN_UNPCKLPS256,
28478
28479 IX86_BUILTIN_SI256_SI,
28480 IX86_BUILTIN_PS256_PS,
28481 IX86_BUILTIN_PD256_PD,
28482 IX86_BUILTIN_SI_SI256,
28483 IX86_BUILTIN_PS_PS256,
28484 IX86_BUILTIN_PD_PD256,
28485
28486 IX86_BUILTIN_VTESTZPD,
28487 IX86_BUILTIN_VTESTCPD,
28488 IX86_BUILTIN_VTESTNZCPD,
28489 IX86_BUILTIN_VTESTZPS,
28490 IX86_BUILTIN_VTESTCPS,
28491 IX86_BUILTIN_VTESTNZCPS,
28492 IX86_BUILTIN_VTESTZPD256,
28493 IX86_BUILTIN_VTESTCPD256,
28494 IX86_BUILTIN_VTESTNZCPD256,
28495 IX86_BUILTIN_VTESTZPS256,
28496 IX86_BUILTIN_VTESTCPS256,
28497 IX86_BUILTIN_VTESTNZCPS256,
28498 IX86_BUILTIN_PTESTZ256,
28499 IX86_BUILTIN_PTESTC256,
28500 IX86_BUILTIN_PTESTNZC256,
28501
28502 IX86_BUILTIN_MOVMSKPD256,
28503 IX86_BUILTIN_MOVMSKPS256,
28504
28505 /* AVX2 */
28506 IX86_BUILTIN_MPSADBW256,
28507 IX86_BUILTIN_PABSB256,
28508 IX86_BUILTIN_PABSW256,
28509 IX86_BUILTIN_PABSD256,
28510 IX86_BUILTIN_PACKSSDW256,
28511 IX86_BUILTIN_PACKSSWB256,
28512 IX86_BUILTIN_PACKUSDW256,
28513 IX86_BUILTIN_PACKUSWB256,
28514 IX86_BUILTIN_PADDB256,
28515 IX86_BUILTIN_PADDW256,
28516 IX86_BUILTIN_PADDD256,
28517 IX86_BUILTIN_PADDQ256,
28518 IX86_BUILTIN_PADDSB256,
28519 IX86_BUILTIN_PADDSW256,
28520 IX86_BUILTIN_PADDUSB256,
28521 IX86_BUILTIN_PADDUSW256,
28522 IX86_BUILTIN_PALIGNR256,
28523 IX86_BUILTIN_AND256I,
28524 IX86_BUILTIN_ANDNOT256I,
28525 IX86_BUILTIN_PAVGB256,
28526 IX86_BUILTIN_PAVGW256,
28527 IX86_BUILTIN_PBLENDVB256,
28528 IX86_BUILTIN_PBLENDVW256,
28529 IX86_BUILTIN_PCMPEQB256,
28530 IX86_BUILTIN_PCMPEQW256,
28531 IX86_BUILTIN_PCMPEQD256,
28532 IX86_BUILTIN_PCMPEQQ256,
28533 IX86_BUILTIN_PCMPGTB256,
28534 IX86_BUILTIN_PCMPGTW256,
28535 IX86_BUILTIN_PCMPGTD256,
28536 IX86_BUILTIN_PCMPGTQ256,
28537 IX86_BUILTIN_PHADDW256,
28538 IX86_BUILTIN_PHADDD256,
28539 IX86_BUILTIN_PHADDSW256,
28540 IX86_BUILTIN_PHSUBW256,
28541 IX86_BUILTIN_PHSUBD256,
28542 IX86_BUILTIN_PHSUBSW256,
28543 IX86_BUILTIN_PMADDUBSW256,
28544 IX86_BUILTIN_PMADDWD256,
28545 IX86_BUILTIN_PMAXSB256,
28546 IX86_BUILTIN_PMAXSW256,
28547 IX86_BUILTIN_PMAXSD256,
28548 IX86_BUILTIN_PMAXUB256,
28549 IX86_BUILTIN_PMAXUW256,
28550 IX86_BUILTIN_PMAXUD256,
28551 IX86_BUILTIN_PMINSB256,
28552 IX86_BUILTIN_PMINSW256,
28553 IX86_BUILTIN_PMINSD256,
28554 IX86_BUILTIN_PMINUB256,
28555 IX86_BUILTIN_PMINUW256,
28556 IX86_BUILTIN_PMINUD256,
28557 IX86_BUILTIN_PMOVMSKB256,
28558 IX86_BUILTIN_PMOVSXBW256,
28559 IX86_BUILTIN_PMOVSXBD256,
28560 IX86_BUILTIN_PMOVSXBQ256,
28561 IX86_BUILTIN_PMOVSXWD256,
28562 IX86_BUILTIN_PMOVSXWQ256,
28563 IX86_BUILTIN_PMOVSXDQ256,
28564 IX86_BUILTIN_PMOVZXBW256,
28565 IX86_BUILTIN_PMOVZXBD256,
28566 IX86_BUILTIN_PMOVZXBQ256,
28567 IX86_BUILTIN_PMOVZXWD256,
28568 IX86_BUILTIN_PMOVZXWQ256,
28569 IX86_BUILTIN_PMOVZXDQ256,
28570 IX86_BUILTIN_PMULDQ256,
28571 IX86_BUILTIN_PMULHRSW256,
28572 IX86_BUILTIN_PMULHUW256,
28573 IX86_BUILTIN_PMULHW256,
28574 IX86_BUILTIN_PMULLW256,
28575 IX86_BUILTIN_PMULLD256,
28576 IX86_BUILTIN_PMULUDQ256,
28577 IX86_BUILTIN_POR256,
28578 IX86_BUILTIN_PSADBW256,
28579 IX86_BUILTIN_PSHUFB256,
28580 IX86_BUILTIN_PSHUFD256,
28581 IX86_BUILTIN_PSHUFHW256,
28582 IX86_BUILTIN_PSHUFLW256,
28583 IX86_BUILTIN_PSIGNB256,
28584 IX86_BUILTIN_PSIGNW256,
28585 IX86_BUILTIN_PSIGND256,
28586 IX86_BUILTIN_PSLLDQI256,
28587 IX86_BUILTIN_PSLLWI256,
28588 IX86_BUILTIN_PSLLW256,
28589 IX86_BUILTIN_PSLLDI256,
28590 IX86_BUILTIN_PSLLD256,
28591 IX86_BUILTIN_PSLLQI256,
28592 IX86_BUILTIN_PSLLQ256,
28593 IX86_BUILTIN_PSRAWI256,
28594 IX86_BUILTIN_PSRAW256,
28595 IX86_BUILTIN_PSRADI256,
28596 IX86_BUILTIN_PSRAD256,
28597 IX86_BUILTIN_PSRLDQI256,
28598 IX86_BUILTIN_PSRLWI256,
28599 IX86_BUILTIN_PSRLW256,
28600 IX86_BUILTIN_PSRLDI256,
28601 IX86_BUILTIN_PSRLD256,
28602 IX86_BUILTIN_PSRLQI256,
28603 IX86_BUILTIN_PSRLQ256,
28604 IX86_BUILTIN_PSUBB256,
28605 IX86_BUILTIN_PSUBW256,
28606 IX86_BUILTIN_PSUBD256,
28607 IX86_BUILTIN_PSUBQ256,
28608 IX86_BUILTIN_PSUBSB256,
28609 IX86_BUILTIN_PSUBSW256,
28610 IX86_BUILTIN_PSUBUSB256,
28611 IX86_BUILTIN_PSUBUSW256,
28612 IX86_BUILTIN_PUNPCKHBW256,
28613 IX86_BUILTIN_PUNPCKHWD256,
28614 IX86_BUILTIN_PUNPCKHDQ256,
28615 IX86_BUILTIN_PUNPCKHQDQ256,
28616 IX86_BUILTIN_PUNPCKLBW256,
28617 IX86_BUILTIN_PUNPCKLWD256,
28618 IX86_BUILTIN_PUNPCKLDQ256,
28619 IX86_BUILTIN_PUNPCKLQDQ256,
28620 IX86_BUILTIN_PXOR256,
28621 IX86_BUILTIN_MOVNTDQA256,
28622 IX86_BUILTIN_VBROADCASTSS_PS,
28623 IX86_BUILTIN_VBROADCASTSS_PS256,
28624 IX86_BUILTIN_VBROADCASTSD_PD256,
28625 IX86_BUILTIN_VBROADCASTSI256,
28626 IX86_BUILTIN_PBLENDD256,
28627 IX86_BUILTIN_PBLENDD128,
28628 IX86_BUILTIN_PBROADCASTB256,
28629 IX86_BUILTIN_PBROADCASTW256,
28630 IX86_BUILTIN_PBROADCASTD256,
28631 IX86_BUILTIN_PBROADCASTQ256,
28632 IX86_BUILTIN_PBROADCASTB128,
28633 IX86_BUILTIN_PBROADCASTW128,
28634 IX86_BUILTIN_PBROADCASTD128,
28635 IX86_BUILTIN_PBROADCASTQ128,
28636 IX86_BUILTIN_VPERMVARSI256,
28637 IX86_BUILTIN_VPERMDF256,
28638 IX86_BUILTIN_VPERMVARSF256,
28639 IX86_BUILTIN_VPERMDI256,
28640 IX86_BUILTIN_VPERMTI256,
28641 IX86_BUILTIN_VEXTRACT128I256,
28642 IX86_BUILTIN_VINSERT128I256,
28643 IX86_BUILTIN_MASKLOADD,
28644 IX86_BUILTIN_MASKLOADQ,
28645 IX86_BUILTIN_MASKLOADD256,
28646 IX86_BUILTIN_MASKLOADQ256,
28647 IX86_BUILTIN_MASKSTORED,
28648 IX86_BUILTIN_MASKSTOREQ,
28649 IX86_BUILTIN_MASKSTORED256,
28650 IX86_BUILTIN_MASKSTOREQ256,
28651 IX86_BUILTIN_PSLLVV4DI,
28652 IX86_BUILTIN_PSLLVV2DI,
28653 IX86_BUILTIN_PSLLVV8SI,
28654 IX86_BUILTIN_PSLLVV4SI,
28655 IX86_BUILTIN_PSRAVV8SI,
28656 IX86_BUILTIN_PSRAVV4SI,
28657 IX86_BUILTIN_PSRLVV4DI,
28658 IX86_BUILTIN_PSRLVV2DI,
28659 IX86_BUILTIN_PSRLVV8SI,
28660 IX86_BUILTIN_PSRLVV4SI,
28661
28662 IX86_BUILTIN_GATHERSIV2DF,
28663 IX86_BUILTIN_GATHERSIV4DF,
28664 IX86_BUILTIN_GATHERDIV2DF,
28665 IX86_BUILTIN_GATHERDIV4DF,
28666 IX86_BUILTIN_GATHERSIV4SF,
28667 IX86_BUILTIN_GATHERSIV8SF,
28668 IX86_BUILTIN_GATHERDIV4SF,
28669 IX86_BUILTIN_GATHERDIV8SF,
28670 IX86_BUILTIN_GATHERSIV2DI,
28671 IX86_BUILTIN_GATHERSIV4DI,
28672 IX86_BUILTIN_GATHERDIV2DI,
28673 IX86_BUILTIN_GATHERDIV4DI,
28674 IX86_BUILTIN_GATHERSIV4SI,
28675 IX86_BUILTIN_GATHERSIV8SI,
28676 IX86_BUILTIN_GATHERDIV4SI,
28677 IX86_BUILTIN_GATHERDIV8SI,
28678
28679 /* AVX512F */
28680 IX86_BUILTIN_SI512_SI256,
28681 IX86_BUILTIN_PD512_PD256,
28682 IX86_BUILTIN_PS512_PS256,
28683 IX86_BUILTIN_SI512_SI,
28684 IX86_BUILTIN_PD512_PD,
28685 IX86_BUILTIN_PS512_PS,
28686 IX86_BUILTIN_ADDPD512,
28687 IX86_BUILTIN_ADDPS512,
28688 IX86_BUILTIN_ADDSD_ROUND,
28689 IX86_BUILTIN_ADDSS_ROUND,
28690 IX86_BUILTIN_ALIGND512,
28691 IX86_BUILTIN_ALIGNQ512,
28692 IX86_BUILTIN_BLENDMD512,
28693 IX86_BUILTIN_BLENDMPD512,
28694 IX86_BUILTIN_BLENDMPS512,
28695 IX86_BUILTIN_BLENDMQ512,
28696 IX86_BUILTIN_BROADCASTF32X4_512,
28697 IX86_BUILTIN_BROADCASTF64X4_512,
28698 IX86_BUILTIN_BROADCASTI32X4_512,
28699 IX86_BUILTIN_BROADCASTI64X4_512,
28700 IX86_BUILTIN_BROADCASTSD512,
28701 IX86_BUILTIN_BROADCASTSS512,
28702 IX86_BUILTIN_CMPD512,
28703 IX86_BUILTIN_CMPPD512,
28704 IX86_BUILTIN_CMPPS512,
28705 IX86_BUILTIN_CMPQ512,
28706 IX86_BUILTIN_CMPSD_MASK,
28707 IX86_BUILTIN_CMPSS_MASK,
28708 IX86_BUILTIN_COMIDF,
28709 IX86_BUILTIN_COMISF,
28710 IX86_BUILTIN_COMPRESSPD512,
28711 IX86_BUILTIN_COMPRESSPDSTORE512,
28712 IX86_BUILTIN_COMPRESSPS512,
28713 IX86_BUILTIN_COMPRESSPSSTORE512,
28714 IX86_BUILTIN_CVTDQ2PD512,
28715 IX86_BUILTIN_CVTDQ2PS512,
28716 IX86_BUILTIN_CVTPD2DQ512,
28717 IX86_BUILTIN_CVTPD2PS512,
28718 IX86_BUILTIN_CVTPD2UDQ512,
28719 IX86_BUILTIN_CVTPH2PS512,
28720 IX86_BUILTIN_CVTPS2DQ512,
28721 IX86_BUILTIN_CVTPS2PD512,
28722 IX86_BUILTIN_CVTPS2PH512,
28723 IX86_BUILTIN_CVTPS2UDQ512,
28724 IX86_BUILTIN_CVTSD2SS_ROUND,
28725 IX86_BUILTIN_CVTSI2SD64,
28726 IX86_BUILTIN_CVTSI2SS32,
28727 IX86_BUILTIN_CVTSI2SS64,
28728 IX86_BUILTIN_CVTSS2SD_ROUND,
28729 IX86_BUILTIN_CVTTPD2DQ512,
28730 IX86_BUILTIN_CVTTPD2UDQ512,
28731 IX86_BUILTIN_CVTTPS2DQ512,
28732 IX86_BUILTIN_CVTTPS2UDQ512,
28733 IX86_BUILTIN_CVTUDQ2PD512,
28734 IX86_BUILTIN_CVTUDQ2PS512,
28735 IX86_BUILTIN_CVTUSI2SD32,
28736 IX86_BUILTIN_CVTUSI2SD64,
28737 IX86_BUILTIN_CVTUSI2SS32,
28738 IX86_BUILTIN_CVTUSI2SS64,
28739 IX86_BUILTIN_DIVPD512,
28740 IX86_BUILTIN_DIVPS512,
28741 IX86_BUILTIN_DIVSD_ROUND,
28742 IX86_BUILTIN_DIVSS_ROUND,
28743 IX86_BUILTIN_EXPANDPD512,
28744 IX86_BUILTIN_EXPANDPD512Z,
28745 IX86_BUILTIN_EXPANDPDLOAD512,
28746 IX86_BUILTIN_EXPANDPDLOAD512Z,
28747 IX86_BUILTIN_EXPANDPS512,
28748 IX86_BUILTIN_EXPANDPS512Z,
28749 IX86_BUILTIN_EXPANDPSLOAD512,
28750 IX86_BUILTIN_EXPANDPSLOAD512Z,
28751 IX86_BUILTIN_EXTRACTF32X4,
28752 IX86_BUILTIN_EXTRACTF64X4,
28753 IX86_BUILTIN_EXTRACTI32X4,
28754 IX86_BUILTIN_EXTRACTI64X4,
28755 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28756 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28757 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28758 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28759 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28760 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28761 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28762 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28763 IX86_BUILTIN_GETEXPPD512,
28764 IX86_BUILTIN_GETEXPPS512,
28765 IX86_BUILTIN_GETEXPSD128,
28766 IX86_BUILTIN_GETEXPSS128,
28767 IX86_BUILTIN_GETMANTPD512,
28768 IX86_BUILTIN_GETMANTPS512,
28769 IX86_BUILTIN_GETMANTSD128,
28770 IX86_BUILTIN_GETMANTSS128,
28771 IX86_BUILTIN_INSERTF32X4,
28772 IX86_BUILTIN_INSERTF64X4,
28773 IX86_BUILTIN_INSERTI32X4,
28774 IX86_BUILTIN_INSERTI64X4,
28775 IX86_BUILTIN_LOADAPD512,
28776 IX86_BUILTIN_LOADAPS512,
28777 IX86_BUILTIN_LOADDQUDI512,
28778 IX86_BUILTIN_LOADDQUSI512,
28779 IX86_BUILTIN_LOADUPD512,
28780 IX86_BUILTIN_LOADUPS512,
28781 IX86_BUILTIN_MAXPD512,
28782 IX86_BUILTIN_MAXPS512,
28783 IX86_BUILTIN_MAXSD_ROUND,
28784 IX86_BUILTIN_MAXSS_ROUND,
28785 IX86_BUILTIN_MINPD512,
28786 IX86_BUILTIN_MINPS512,
28787 IX86_BUILTIN_MINSD_ROUND,
28788 IX86_BUILTIN_MINSS_ROUND,
28789 IX86_BUILTIN_MOVAPD512,
28790 IX86_BUILTIN_MOVAPS512,
28791 IX86_BUILTIN_MOVDDUP512,
28792 IX86_BUILTIN_MOVDQA32LOAD512,
28793 IX86_BUILTIN_MOVDQA32STORE512,
28794 IX86_BUILTIN_MOVDQA32_512,
28795 IX86_BUILTIN_MOVDQA64LOAD512,
28796 IX86_BUILTIN_MOVDQA64STORE512,
28797 IX86_BUILTIN_MOVDQA64_512,
28798 IX86_BUILTIN_MOVNTDQ512,
28799 IX86_BUILTIN_MOVNTDQA512,
28800 IX86_BUILTIN_MOVNTPD512,
28801 IX86_BUILTIN_MOVNTPS512,
28802 IX86_BUILTIN_MOVSHDUP512,
28803 IX86_BUILTIN_MOVSLDUP512,
28804 IX86_BUILTIN_MULPD512,
28805 IX86_BUILTIN_MULPS512,
28806 IX86_BUILTIN_MULSD_ROUND,
28807 IX86_BUILTIN_MULSS_ROUND,
28808 IX86_BUILTIN_PABSD512,
28809 IX86_BUILTIN_PABSQ512,
28810 IX86_BUILTIN_PADDD512,
28811 IX86_BUILTIN_PADDQ512,
28812 IX86_BUILTIN_PANDD512,
28813 IX86_BUILTIN_PANDND512,
28814 IX86_BUILTIN_PANDNQ512,
28815 IX86_BUILTIN_PANDQ512,
28816 IX86_BUILTIN_PBROADCASTD512,
28817 IX86_BUILTIN_PBROADCASTD512_GPR,
28818 IX86_BUILTIN_PBROADCASTMB512,
28819 IX86_BUILTIN_PBROADCASTMW512,
28820 IX86_BUILTIN_PBROADCASTQ512,
28821 IX86_BUILTIN_PBROADCASTQ512_GPR,
28822 IX86_BUILTIN_PBROADCASTQ512_MEM,
28823 IX86_BUILTIN_PCMPEQD512_MASK,
28824 IX86_BUILTIN_PCMPEQQ512_MASK,
28825 IX86_BUILTIN_PCMPGTD512_MASK,
28826 IX86_BUILTIN_PCMPGTQ512_MASK,
28827 IX86_BUILTIN_PCOMPRESSD512,
28828 IX86_BUILTIN_PCOMPRESSDSTORE512,
28829 IX86_BUILTIN_PCOMPRESSQ512,
28830 IX86_BUILTIN_PCOMPRESSQSTORE512,
28831 IX86_BUILTIN_PEXPANDD512,
28832 IX86_BUILTIN_PEXPANDD512Z,
28833 IX86_BUILTIN_PEXPANDDLOAD512,
28834 IX86_BUILTIN_PEXPANDDLOAD512Z,
28835 IX86_BUILTIN_PEXPANDQ512,
28836 IX86_BUILTIN_PEXPANDQ512Z,
28837 IX86_BUILTIN_PEXPANDQLOAD512,
28838 IX86_BUILTIN_PEXPANDQLOAD512Z,
28839 IX86_BUILTIN_PMAXSD512,
28840 IX86_BUILTIN_PMAXSQ512,
28841 IX86_BUILTIN_PMAXUD512,
28842 IX86_BUILTIN_PMAXUQ512,
28843 IX86_BUILTIN_PMINSD512,
28844 IX86_BUILTIN_PMINSQ512,
28845 IX86_BUILTIN_PMINUD512,
28846 IX86_BUILTIN_PMINUQ512,
28847 IX86_BUILTIN_PMOVDB512,
28848 IX86_BUILTIN_PMOVDB512_MEM,
28849 IX86_BUILTIN_PMOVDW512,
28850 IX86_BUILTIN_PMOVDW512_MEM,
28851 IX86_BUILTIN_PMOVQB512,
28852 IX86_BUILTIN_PMOVQB512_MEM,
28853 IX86_BUILTIN_PMOVQD512,
28854 IX86_BUILTIN_PMOVQD512_MEM,
28855 IX86_BUILTIN_PMOVQW512,
28856 IX86_BUILTIN_PMOVQW512_MEM,
28857 IX86_BUILTIN_PMOVSDB512,
28858 IX86_BUILTIN_PMOVSDB512_MEM,
28859 IX86_BUILTIN_PMOVSDW512,
28860 IX86_BUILTIN_PMOVSDW512_MEM,
28861 IX86_BUILTIN_PMOVSQB512,
28862 IX86_BUILTIN_PMOVSQB512_MEM,
28863 IX86_BUILTIN_PMOVSQD512,
28864 IX86_BUILTIN_PMOVSQD512_MEM,
28865 IX86_BUILTIN_PMOVSQW512,
28866 IX86_BUILTIN_PMOVSQW512_MEM,
28867 IX86_BUILTIN_PMOVSXBD512,
28868 IX86_BUILTIN_PMOVSXBQ512,
28869 IX86_BUILTIN_PMOVSXDQ512,
28870 IX86_BUILTIN_PMOVSXWD512,
28871 IX86_BUILTIN_PMOVSXWQ512,
28872 IX86_BUILTIN_PMOVUSDB512,
28873 IX86_BUILTIN_PMOVUSDB512_MEM,
28874 IX86_BUILTIN_PMOVUSDW512,
28875 IX86_BUILTIN_PMOVUSDW512_MEM,
28876 IX86_BUILTIN_PMOVUSQB512,
28877 IX86_BUILTIN_PMOVUSQB512_MEM,
28878 IX86_BUILTIN_PMOVUSQD512,
28879 IX86_BUILTIN_PMOVUSQD512_MEM,
28880 IX86_BUILTIN_PMOVUSQW512,
28881 IX86_BUILTIN_PMOVUSQW512_MEM,
28882 IX86_BUILTIN_PMOVZXBD512,
28883 IX86_BUILTIN_PMOVZXBQ512,
28884 IX86_BUILTIN_PMOVZXDQ512,
28885 IX86_BUILTIN_PMOVZXWD512,
28886 IX86_BUILTIN_PMOVZXWQ512,
28887 IX86_BUILTIN_PMULDQ512,
28888 IX86_BUILTIN_PMULLD512,
28889 IX86_BUILTIN_PMULUDQ512,
28890 IX86_BUILTIN_PORD512,
28891 IX86_BUILTIN_PORQ512,
28892 IX86_BUILTIN_PROLD512,
28893 IX86_BUILTIN_PROLQ512,
28894 IX86_BUILTIN_PROLVD512,
28895 IX86_BUILTIN_PROLVQ512,
28896 IX86_BUILTIN_PRORD512,
28897 IX86_BUILTIN_PRORQ512,
28898 IX86_BUILTIN_PRORVD512,
28899 IX86_BUILTIN_PRORVQ512,
28900 IX86_BUILTIN_PSHUFD512,
28901 IX86_BUILTIN_PSLLD512,
28902 IX86_BUILTIN_PSLLDI512,
28903 IX86_BUILTIN_PSLLQ512,
28904 IX86_BUILTIN_PSLLQI512,
28905 IX86_BUILTIN_PSLLVV16SI,
28906 IX86_BUILTIN_PSLLVV8DI,
28907 IX86_BUILTIN_PSRAD512,
28908 IX86_BUILTIN_PSRADI512,
28909 IX86_BUILTIN_PSRAQ512,
28910 IX86_BUILTIN_PSRAQI512,
28911 IX86_BUILTIN_PSRAVV16SI,
28912 IX86_BUILTIN_PSRAVV8DI,
28913 IX86_BUILTIN_PSRLD512,
28914 IX86_BUILTIN_PSRLDI512,
28915 IX86_BUILTIN_PSRLQ512,
28916 IX86_BUILTIN_PSRLQI512,
28917 IX86_BUILTIN_PSRLVV16SI,
28918 IX86_BUILTIN_PSRLVV8DI,
28919 IX86_BUILTIN_PSUBD512,
28920 IX86_BUILTIN_PSUBQ512,
28921 IX86_BUILTIN_PTESTMD512,
28922 IX86_BUILTIN_PTESTMQ512,
28923 IX86_BUILTIN_PTESTNMD512,
28924 IX86_BUILTIN_PTESTNMQ512,
28925 IX86_BUILTIN_PUNPCKHDQ512,
28926 IX86_BUILTIN_PUNPCKHQDQ512,
28927 IX86_BUILTIN_PUNPCKLDQ512,
28928 IX86_BUILTIN_PUNPCKLQDQ512,
28929 IX86_BUILTIN_PXORD512,
28930 IX86_BUILTIN_PXORQ512,
28931 IX86_BUILTIN_RCP14PD512,
28932 IX86_BUILTIN_RCP14PS512,
28933 IX86_BUILTIN_RCP14SD,
28934 IX86_BUILTIN_RCP14SS,
28935 IX86_BUILTIN_RNDSCALEPD,
28936 IX86_BUILTIN_RNDSCALEPS,
28937 IX86_BUILTIN_RNDSCALESD,
28938 IX86_BUILTIN_RNDSCALESS,
28939 IX86_BUILTIN_RSQRT14PD512,
28940 IX86_BUILTIN_RSQRT14PS512,
28941 IX86_BUILTIN_RSQRT14SD,
28942 IX86_BUILTIN_RSQRT14SS,
28943 IX86_BUILTIN_SCALEFPD512,
28944 IX86_BUILTIN_SCALEFPS512,
28945 IX86_BUILTIN_SCALEFSD,
28946 IX86_BUILTIN_SCALEFSS,
28947 IX86_BUILTIN_SHUFPD512,
28948 IX86_BUILTIN_SHUFPS512,
28949 IX86_BUILTIN_SHUF_F32x4,
28950 IX86_BUILTIN_SHUF_F64x2,
28951 IX86_BUILTIN_SHUF_I32x4,
28952 IX86_BUILTIN_SHUF_I64x2,
28953 IX86_BUILTIN_SQRTPD512,
28954 IX86_BUILTIN_SQRTPD512_MASK,
28955 IX86_BUILTIN_SQRTPS512_MASK,
28956 IX86_BUILTIN_SQRTPS_NR512,
28957 IX86_BUILTIN_SQRTSD_ROUND,
28958 IX86_BUILTIN_SQRTSS_ROUND,
28959 IX86_BUILTIN_STOREAPD512,
28960 IX86_BUILTIN_STOREAPS512,
28961 IX86_BUILTIN_STOREDQUDI512,
28962 IX86_BUILTIN_STOREDQUSI512,
28963 IX86_BUILTIN_STOREUPD512,
28964 IX86_BUILTIN_STOREUPS512,
28965 IX86_BUILTIN_SUBPD512,
28966 IX86_BUILTIN_SUBPS512,
28967 IX86_BUILTIN_SUBSD_ROUND,
28968 IX86_BUILTIN_SUBSS_ROUND,
28969 IX86_BUILTIN_UCMPD512,
28970 IX86_BUILTIN_UCMPQ512,
28971 IX86_BUILTIN_UNPCKHPD512,
28972 IX86_BUILTIN_UNPCKHPS512,
28973 IX86_BUILTIN_UNPCKLPD512,
28974 IX86_BUILTIN_UNPCKLPS512,
28975 IX86_BUILTIN_VCVTSD2SI32,
28976 IX86_BUILTIN_VCVTSD2SI64,
28977 IX86_BUILTIN_VCVTSD2USI32,
28978 IX86_BUILTIN_VCVTSD2USI64,
28979 IX86_BUILTIN_VCVTSS2SI32,
28980 IX86_BUILTIN_VCVTSS2SI64,
28981 IX86_BUILTIN_VCVTSS2USI32,
28982 IX86_BUILTIN_VCVTSS2USI64,
28983 IX86_BUILTIN_VCVTTSD2SI32,
28984 IX86_BUILTIN_VCVTTSD2SI64,
28985 IX86_BUILTIN_VCVTTSD2USI32,
28986 IX86_BUILTIN_VCVTTSD2USI64,
28987 IX86_BUILTIN_VCVTTSS2SI32,
28988 IX86_BUILTIN_VCVTTSS2SI64,
28989 IX86_BUILTIN_VCVTTSS2USI32,
28990 IX86_BUILTIN_VCVTTSS2USI64,
28991 IX86_BUILTIN_VFMADDPD512_MASK,
28992 IX86_BUILTIN_VFMADDPD512_MASK3,
28993 IX86_BUILTIN_VFMADDPD512_MASKZ,
28994 IX86_BUILTIN_VFMADDPS512_MASK,
28995 IX86_BUILTIN_VFMADDPS512_MASK3,
28996 IX86_BUILTIN_VFMADDPS512_MASKZ,
28997 IX86_BUILTIN_VFMADDSD3_ROUND,
28998 IX86_BUILTIN_VFMADDSS3_ROUND,
28999 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29000 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29001 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29002 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29003 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29004 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29005 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29006 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29007 IX86_BUILTIN_VFMSUBPD512_MASK3,
29008 IX86_BUILTIN_VFMSUBPS512_MASK3,
29009 IX86_BUILTIN_VFMSUBSD3_MASK3,
29010 IX86_BUILTIN_VFMSUBSS3_MASK3,
29011 IX86_BUILTIN_VFNMADDPD512_MASK,
29012 IX86_BUILTIN_VFNMADDPS512_MASK,
29013 IX86_BUILTIN_VFNMSUBPD512_MASK,
29014 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29015 IX86_BUILTIN_VFNMSUBPS512_MASK,
29016 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29017 IX86_BUILTIN_VPCLZCNTD512,
29018 IX86_BUILTIN_VPCLZCNTQ512,
29019 IX86_BUILTIN_VPCONFLICTD512,
29020 IX86_BUILTIN_VPCONFLICTQ512,
29021 IX86_BUILTIN_VPERMDF512,
29022 IX86_BUILTIN_VPERMDI512,
29023 IX86_BUILTIN_VPERMI2VARD512,
29024 IX86_BUILTIN_VPERMI2VARPD512,
29025 IX86_BUILTIN_VPERMI2VARPS512,
29026 IX86_BUILTIN_VPERMI2VARQ512,
29027 IX86_BUILTIN_VPERMILPD512,
29028 IX86_BUILTIN_VPERMILPS512,
29029 IX86_BUILTIN_VPERMILVARPD512,
29030 IX86_BUILTIN_VPERMILVARPS512,
29031 IX86_BUILTIN_VPERMT2VARD512,
29032 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29033 IX86_BUILTIN_VPERMT2VARPD512,
29034 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29035 IX86_BUILTIN_VPERMT2VARPS512,
29036 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29037 IX86_BUILTIN_VPERMT2VARQ512,
29038 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29039 IX86_BUILTIN_VPERMVARDF512,
29040 IX86_BUILTIN_VPERMVARDI512,
29041 IX86_BUILTIN_VPERMVARSF512,
29042 IX86_BUILTIN_VPERMVARSI512,
29043 IX86_BUILTIN_VTERNLOGD512_MASK,
29044 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29045 IX86_BUILTIN_VTERNLOGQ512_MASK,
29046 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29047
29048 /* Mask arithmetic operations */
29049 IX86_BUILTIN_KAND16,
29050 IX86_BUILTIN_KANDN16,
29051 IX86_BUILTIN_KNOT16,
29052 IX86_BUILTIN_KOR16,
29053 IX86_BUILTIN_KORTESTC16,
29054 IX86_BUILTIN_KORTESTZ16,
29055 IX86_BUILTIN_KUNPCKBW,
29056 IX86_BUILTIN_KXNOR16,
29057 IX86_BUILTIN_KXOR16,
29058 IX86_BUILTIN_KMOV16,
29059
29060 /* AVX512VL. */
29061 IX86_BUILTIN_PMOVUSQD256_MEM,
29062 IX86_BUILTIN_PMOVUSQD128_MEM,
29063 IX86_BUILTIN_PMOVSQD256_MEM,
29064 IX86_BUILTIN_PMOVSQD128_MEM,
29065 IX86_BUILTIN_PMOVQD256_MEM,
29066 IX86_BUILTIN_PMOVQD128_MEM,
29067 IX86_BUILTIN_PMOVUSQW256_MEM,
29068 IX86_BUILTIN_PMOVUSQW128_MEM,
29069 IX86_BUILTIN_PMOVSQW256_MEM,
29070 IX86_BUILTIN_PMOVSQW128_MEM,
29071 IX86_BUILTIN_PMOVQW256_MEM,
29072 IX86_BUILTIN_PMOVQW128_MEM,
29073 IX86_BUILTIN_PMOVUSQB256_MEM,
29074 IX86_BUILTIN_PMOVUSQB128_MEM,
29075 IX86_BUILTIN_PMOVSQB256_MEM,
29076 IX86_BUILTIN_PMOVSQB128_MEM,
29077 IX86_BUILTIN_PMOVQB256_MEM,
29078 IX86_BUILTIN_PMOVQB128_MEM,
29079 IX86_BUILTIN_PMOVUSDW256_MEM,
29080 IX86_BUILTIN_PMOVUSDW128_MEM,
29081 IX86_BUILTIN_PMOVSDW256_MEM,
29082 IX86_BUILTIN_PMOVSDW128_MEM,
29083 IX86_BUILTIN_PMOVDW256_MEM,
29084 IX86_BUILTIN_PMOVDW128_MEM,
29085 IX86_BUILTIN_PMOVUSDB256_MEM,
29086 IX86_BUILTIN_PMOVUSDB128_MEM,
29087 IX86_BUILTIN_PMOVSDB256_MEM,
29088 IX86_BUILTIN_PMOVSDB128_MEM,
29089 IX86_BUILTIN_PMOVDB256_MEM,
29090 IX86_BUILTIN_PMOVDB128_MEM,
29091 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29092 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29093 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29094 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29095 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29096 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29097 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29098 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29099 IX86_BUILTIN_LOADAPD256_MASK,
29100 IX86_BUILTIN_LOADAPD128_MASK,
29101 IX86_BUILTIN_LOADAPS256_MASK,
29102 IX86_BUILTIN_LOADAPS128_MASK,
29103 IX86_BUILTIN_STOREAPD256_MASK,
29104 IX86_BUILTIN_STOREAPD128_MASK,
29105 IX86_BUILTIN_STOREAPS256_MASK,
29106 IX86_BUILTIN_STOREAPS128_MASK,
29107 IX86_BUILTIN_LOADUPD256_MASK,
29108 IX86_BUILTIN_LOADUPD128_MASK,
29109 IX86_BUILTIN_LOADUPS256_MASK,
29110 IX86_BUILTIN_LOADUPS128_MASK,
29111 IX86_BUILTIN_STOREUPD256_MASK,
29112 IX86_BUILTIN_STOREUPD128_MASK,
29113 IX86_BUILTIN_STOREUPS256_MASK,
29114 IX86_BUILTIN_STOREUPS128_MASK,
29115 IX86_BUILTIN_LOADDQUDI256_MASK,
29116 IX86_BUILTIN_LOADDQUDI128_MASK,
29117 IX86_BUILTIN_LOADDQUSI256_MASK,
29118 IX86_BUILTIN_LOADDQUSI128_MASK,
29119 IX86_BUILTIN_LOADDQUHI256_MASK,
29120 IX86_BUILTIN_LOADDQUHI128_MASK,
29121 IX86_BUILTIN_LOADDQUQI256_MASK,
29122 IX86_BUILTIN_LOADDQUQI128_MASK,
29123 IX86_BUILTIN_STOREDQUDI256_MASK,
29124 IX86_BUILTIN_STOREDQUDI128_MASK,
29125 IX86_BUILTIN_STOREDQUSI256_MASK,
29126 IX86_BUILTIN_STOREDQUSI128_MASK,
29127 IX86_BUILTIN_STOREDQUHI256_MASK,
29128 IX86_BUILTIN_STOREDQUHI128_MASK,
29129 IX86_BUILTIN_STOREDQUQI256_MASK,
29130 IX86_BUILTIN_STOREDQUQI128_MASK,
29131 IX86_BUILTIN_COMPRESSPDSTORE256,
29132 IX86_BUILTIN_COMPRESSPDSTORE128,
29133 IX86_BUILTIN_COMPRESSPSSTORE256,
29134 IX86_BUILTIN_COMPRESSPSSTORE128,
29135 IX86_BUILTIN_PCOMPRESSQSTORE256,
29136 IX86_BUILTIN_PCOMPRESSQSTORE128,
29137 IX86_BUILTIN_PCOMPRESSDSTORE256,
29138 IX86_BUILTIN_PCOMPRESSDSTORE128,
29139 IX86_BUILTIN_EXPANDPDLOAD256,
29140 IX86_BUILTIN_EXPANDPDLOAD128,
29141 IX86_BUILTIN_EXPANDPSLOAD256,
29142 IX86_BUILTIN_EXPANDPSLOAD128,
29143 IX86_BUILTIN_PEXPANDQLOAD256,
29144 IX86_BUILTIN_PEXPANDQLOAD128,
29145 IX86_BUILTIN_PEXPANDDLOAD256,
29146 IX86_BUILTIN_PEXPANDDLOAD128,
29147 IX86_BUILTIN_EXPANDPDLOAD256Z,
29148 IX86_BUILTIN_EXPANDPDLOAD128Z,
29149 IX86_BUILTIN_EXPANDPSLOAD256Z,
29150 IX86_BUILTIN_EXPANDPSLOAD128Z,
29151 IX86_BUILTIN_PEXPANDQLOAD256Z,
29152 IX86_BUILTIN_PEXPANDQLOAD128Z,
29153 IX86_BUILTIN_PEXPANDDLOAD256Z,
29154 IX86_BUILTIN_PEXPANDDLOAD128Z,
29155 IX86_BUILTIN_PALIGNR256_MASK,
29156 IX86_BUILTIN_PALIGNR128_MASK,
29157 IX86_BUILTIN_MOVDQA64_256_MASK,
29158 IX86_BUILTIN_MOVDQA64_128_MASK,
29159 IX86_BUILTIN_MOVDQA32_256_MASK,
29160 IX86_BUILTIN_MOVDQA32_128_MASK,
29161 IX86_BUILTIN_MOVAPD256_MASK,
29162 IX86_BUILTIN_MOVAPD128_MASK,
29163 IX86_BUILTIN_MOVAPS256_MASK,
29164 IX86_BUILTIN_MOVAPS128_MASK,
29165 IX86_BUILTIN_MOVDQUHI256_MASK,
29166 IX86_BUILTIN_MOVDQUHI128_MASK,
29167 IX86_BUILTIN_MOVDQUQI256_MASK,
29168 IX86_BUILTIN_MOVDQUQI128_MASK,
29169 IX86_BUILTIN_MINPS128_MASK,
29170 IX86_BUILTIN_MAXPS128_MASK,
29171 IX86_BUILTIN_MINPD128_MASK,
29172 IX86_BUILTIN_MAXPD128_MASK,
29173 IX86_BUILTIN_MAXPD256_MASK,
29174 IX86_BUILTIN_MAXPS256_MASK,
29175 IX86_BUILTIN_MINPD256_MASK,
29176 IX86_BUILTIN_MINPS256_MASK,
29177 IX86_BUILTIN_MULPS128_MASK,
29178 IX86_BUILTIN_DIVPS128_MASK,
29179 IX86_BUILTIN_MULPD128_MASK,
29180 IX86_BUILTIN_DIVPD128_MASK,
29181 IX86_BUILTIN_DIVPD256_MASK,
29182 IX86_BUILTIN_DIVPS256_MASK,
29183 IX86_BUILTIN_MULPD256_MASK,
29184 IX86_BUILTIN_MULPS256_MASK,
29185 IX86_BUILTIN_ADDPD128_MASK,
29186 IX86_BUILTIN_ADDPD256_MASK,
29187 IX86_BUILTIN_ADDPS128_MASK,
29188 IX86_BUILTIN_ADDPS256_MASK,
29189 IX86_BUILTIN_SUBPD128_MASK,
29190 IX86_BUILTIN_SUBPD256_MASK,
29191 IX86_BUILTIN_SUBPS128_MASK,
29192 IX86_BUILTIN_SUBPS256_MASK,
29193 IX86_BUILTIN_XORPD256_MASK,
29194 IX86_BUILTIN_XORPD128_MASK,
29195 IX86_BUILTIN_XORPS256_MASK,
29196 IX86_BUILTIN_XORPS128_MASK,
29197 IX86_BUILTIN_ORPD256_MASK,
29198 IX86_BUILTIN_ORPD128_MASK,
29199 IX86_BUILTIN_ORPS256_MASK,
29200 IX86_BUILTIN_ORPS128_MASK,
29201 IX86_BUILTIN_BROADCASTF32x2_256,
29202 IX86_BUILTIN_BROADCASTI32x2_256,
29203 IX86_BUILTIN_BROADCASTI32x2_128,
29204 IX86_BUILTIN_BROADCASTF64X2_256,
29205 IX86_BUILTIN_BROADCASTI64X2_256,
29206 IX86_BUILTIN_BROADCASTF32X4_256,
29207 IX86_BUILTIN_BROADCASTI32X4_256,
29208 IX86_BUILTIN_EXTRACTF32X4_256,
29209 IX86_BUILTIN_EXTRACTI32X4_256,
29210 IX86_BUILTIN_DBPSADBW256,
29211 IX86_BUILTIN_DBPSADBW128,
29212 IX86_BUILTIN_CVTTPD2QQ256,
29213 IX86_BUILTIN_CVTTPD2QQ128,
29214 IX86_BUILTIN_CVTTPD2UQQ256,
29215 IX86_BUILTIN_CVTTPD2UQQ128,
29216 IX86_BUILTIN_CVTPD2QQ256,
29217 IX86_BUILTIN_CVTPD2QQ128,
29218 IX86_BUILTIN_CVTPD2UQQ256,
29219 IX86_BUILTIN_CVTPD2UQQ128,
29220 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29221 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29222 IX86_BUILTIN_CVTTPS2QQ256,
29223 IX86_BUILTIN_CVTTPS2QQ128,
29224 IX86_BUILTIN_CVTTPS2UQQ256,
29225 IX86_BUILTIN_CVTTPS2UQQ128,
29226 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29227 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29228 IX86_BUILTIN_CVTTPS2UDQ256,
29229 IX86_BUILTIN_CVTTPS2UDQ128,
29230 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29231 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29232 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29233 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29234 IX86_BUILTIN_CVTPD2DQ256_MASK,
29235 IX86_BUILTIN_CVTPD2DQ128_MASK,
29236 IX86_BUILTIN_CVTDQ2PD256_MASK,
29237 IX86_BUILTIN_CVTDQ2PD128_MASK,
29238 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29239 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29240 IX86_BUILTIN_CVTDQ2PS256_MASK,
29241 IX86_BUILTIN_CVTDQ2PS128_MASK,
29242 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29243 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29244 IX86_BUILTIN_CVTPS2PD256_MASK,
29245 IX86_BUILTIN_CVTPS2PD128_MASK,
29246 IX86_BUILTIN_PBROADCASTB256_MASK,
29247 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29248 IX86_BUILTIN_PBROADCASTB128_MASK,
29249 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29250 IX86_BUILTIN_PBROADCASTW256_MASK,
29251 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29252 IX86_BUILTIN_PBROADCASTW128_MASK,
29253 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29254 IX86_BUILTIN_PBROADCASTD256_MASK,
29255 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29256 IX86_BUILTIN_PBROADCASTD128_MASK,
29257 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29258 IX86_BUILTIN_PBROADCASTQ256_MASK,
29259 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29260 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
29261 IX86_BUILTIN_PBROADCASTQ128_MASK,
29262 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29263 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
29264 IX86_BUILTIN_BROADCASTSS256,
29265 IX86_BUILTIN_BROADCASTSS128,
29266 IX86_BUILTIN_BROADCASTSD256,
29267 IX86_BUILTIN_EXTRACTF64X2_256,
29268 IX86_BUILTIN_EXTRACTI64X2_256,
29269 IX86_BUILTIN_INSERTF32X4_256,
29270 IX86_BUILTIN_INSERTI32X4_256,
29271 IX86_BUILTIN_PMOVSXBW256_MASK,
29272 IX86_BUILTIN_PMOVSXBW128_MASK,
29273 IX86_BUILTIN_PMOVSXBD256_MASK,
29274 IX86_BUILTIN_PMOVSXBD128_MASK,
29275 IX86_BUILTIN_PMOVSXBQ256_MASK,
29276 IX86_BUILTIN_PMOVSXBQ128_MASK,
29277 IX86_BUILTIN_PMOVSXWD256_MASK,
29278 IX86_BUILTIN_PMOVSXWD128_MASK,
29279 IX86_BUILTIN_PMOVSXWQ256_MASK,
29280 IX86_BUILTIN_PMOVSXWQ128_MASK,
29281 IX86_BUILTIN_PMOVSXDQ256_MASK,
29282 IX86_BUILTIN_PMOVSXDQ128_MASK,
29283 IX86_BUILTIN_PMOVZXBW256_MASK,
29284 IX86_BUILTIN_PMOVZXBW128_MASK,
29285 IX86_BUILTIN_PMOVZXBD256_MASK,
29286 IX86_BUILTIN_PMOVZXBD128_MASK,
29287 IX86_BUILTIN_PMOVZXBQ256_MASK,
29288 IX86_BUILTIN_PMOVZXBQ128_MASK,
29289 IX86_BUILTIN_PMOVZXWD256_MASK,
29290 IX86_BUILTIN_PMOVZXWD128_MASK,
29291 IX86_BUILTIN_PMOVZXWQ256_MASK,
29292 IX86_BUILTIN_PMOVZXWQ128_MASK,
29293 IX86_BUILTIN_PMOVZXDQ256_MASK,
29294 IX86_BUILTIN_PMOVZXDQ128_MASK,
29295 IX86_BUILTIN_REDUCEPD256_MASK,
29296 IX86_BUILTIN_REDUCEPD128_MASK,
29297 IX86_BUILTIN_REDUCEPS256_MASK,
29298 IX86_BUILTIN_REDUCEPS128_MASK,
29299 IX86_BUILTIN_REDUCESD_MASK,
29300 IX86_BUILTIN_REDUCESS_MASK,
29301 IX86_BUILTIN_VPERMVARHI256_MASK,
29302 IX86_BUILTIN_VPERMVARHI128_MASK,
29303 IX86_BUILTIN_VPERMT2VARHI256,
29304 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29305 IX86_BUILTIN_VPERMT2VARHI128,
29306 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29307 IX86_BUILTIN_VPERMI2VARHI256,
29308 IX86_BUILTIN_VPERMI2VARHI128,
29309 IX86_BUILTIN_RCP14PD256,
29310 IX86_BUILTIN_RCP14PD128,
29311 IX86_BUILTIN_RCP14PS256,
29312 IX86_BUILTIN_RCP14PS128,
29313 IX86_BUILTIN_RSQRT14PD256_MASK,
29314 IX86_BUILTIN_RSQRT14PD128_MASK,
29315 IX86_BUILTIN_RSQRT14PS256_MASK,
29316 IX86_BUILTIN_RSQRT14PS128_MASK,
29317 IX86_BUILTIN_SQRTPD256_MASK,
29318 IX86_BUILTIN_SQRTPD128_MASK,
29319 IX86_BUILTIN_SQRTPS256_MASK,
29320 IX86_BUILTIN_SQRTPS128_MASK,
29321 IX86_BUILTIN_PADDB128_MASK,
29322 IX86_BUILTIN_PADDW128_MASK,
29323 IX86_BUILTIN_PADDD128_MASK,
29324 IX86_BUILTIN_PADDQ128_MASK,
29325 IX86_BUILTIN_PSUBB128_MASK,
29326 IX86_BUILTIN_PSUBW128_MASK,
29327 IX86_BUILTIN_PSUBD128_MASK,
29328 IX86_BUILTIN_PSUBQ128_MASK,
29329 IX86_BUILTIN_PADDSB128_MASK,
29330 IX86_BUILTIN_PADDSW128_MASK,
29331 IX86_BUILTIN_PSUBSB128_MASK,
29332 IX86_BUILTIN_PSUBSW128_MASK,
29333 IX86_BUILTIN_PADDUSB128_MASK,
29334 IX86_BUILTIN_PADDUSW128_MASK,
29335 IX86_BUILTIN_PSUBUSB128_MASK,
29336 IX86_BUILTIN_PSUBUSW128_MASK,
29337 IX86_BUILTIN_PADDB256_MASK,
29338 IX86_BUILTIN_PADDW256_MASK,
29339 IX86_BUILTIN_PADDD256_MASK,
29340 IX86_BUILTIN_PADDQ256_MASK,
29341 IX86_BUILTIN_PADDSB256_MASK,
29342 IX86_BUILTIN_PADDSW256_MASK,
29343 IX86_BUILTIN_PADDUSB256_MASK,
29344 IX86_BUILTIN_PADDUSW256_MASK,
29345 IX86_BUILTIN_PSUBB256_MASK,
29346 IX86_BUILTIN_PSUBW256_MASK,
29347 IX86_BUILTIN_PSUBD256_MASK,
29348 IX86_BUILTIN_PSUBQ256_MASK,
29349 IX86_BUILTIN_PSUBSB256_MASK,
29350 IX86_BUILTIN_PSUBSW256_MASK,
29351 IX86_BUILTIN_PSUBUSB256_MASK,
29352 IX86_BUILTIN_PSUBUSW256_MASK,
29353 IX86_BUILTIN_SHUF_F64x2_256,
29354 IX86_BUILTIN_SHUF_I64x2_256,
29355 IX86_BUILTIN_SHUF_I32x4_256,
29356 IX86_BUILTIN_SHUF_F32x4_256,
29357 IX86_BUILTIN_PMOVWB128,
29358 IX86_BUILTIN_PMOVWB256,
29359 IX86_BUILTIN_PMOVSWB128,
29360 IX86_BUILTIN_PMOVSWB256,
29361 IX86_BUILTIN_PMOVUSWB128,
29362 IX86_BUILTIN_PMOVUSWB256,
29363 IX86_BUILTIN_PMOVDB128,
29364 IX86_BUILTIN_PMOVDB256,
29365 IX86_BUILTIN_PMOVSDB128,
29366 IX86_BUILTIN_PMOVSDB256,
29367 IX86_BUILTIN_PMOVUSDB128,
29368 IX86_BUILTIN_PMOVUSDB256,
29369 IX86_BUILTIN_PMOVDW128,
29370 IX86_BUILTIN_PMOVDW256,
29371 IX86_BUILTIN_PMOVSDW128,
29372 IX86_BUILTIN_PMOVSDW256,
29373 IX86_BUILTIN_PMOVUSDW128,
29374 IX86_BUILTIN_PMOVUSDW256,
29375 IX86_BUILTIN_PMOVQB128,
29376 IX86_BUILTIN_PMOVQB256,
29377 IX86_BUILTIN_PMOVSQB128,
29378 IX86_BUILTIN_PMOVSQB256,
29379 IX86_BUILTIN_PMOVUSQB128,
29380 IX86_BUILTIN_PMOVUSQB256,
29381 IX86_BUILTIN_PMOVQW128,
29382 IX86_BUILTIN_PMOVQW256,
29383 IX86_BUILTIN_PMOVSQW128,
29384 IX86_BUILTIN_PMOVSQW256,
29385 IX86_BUILTIN_PMOVUSQW128,
29386 IX86_BUILTIN_PMOVUSQW256,
29387 IX86_BUILTIN_PMOVQD128,
29388 IX86_BUILTIN_PMOVQD256,
29389 IX86_BUILTIN_PMOVSQD128,
29390 IX86_BUILTIN_PMOVSQD256,
29391 IX86_BUILTIN_PMOVUSQD128,
29392 IX86_BUILTIN_PMOVUSQD256,
29393 IX86_BUILTIN_RANGEPD256,
29394 IX86_BUILTIN_RANGEPD128,
29395 IX86_BUILTIN_RANGEPS256,
29396 IX86_BUILTIN_RANGEPS128,
29397 IX86_BUILTIN_GETEXPPS256,
29398 IX86_BUILTIN_GETEXPPD256,
29399 IX86_BUILTIN_GETEXPPS128,
29400 IX86_BUILTIN_GETEXPPD128,
29401 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29402 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29403 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29404 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29405 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29406 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29407 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29408 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29409 IX86_BUILTIN_PABSQ256,
29410 IX86_BUILTIN_PABSQ128,
29411 IX86_BUILTIN_PABSD256_MASK,
29412 IX86_BUILTIN_PABSD128_MASK,
29413 IX86_BUILTIN_PMULHRSW256_MASK,
29414 IX86_BUILTIN_PMULHRSW128_MASK,
29415 IX86_BUILTIN_PMULHUW128_MASK,
29416 IX86_BUILTIN_PMULHUW256_MASK,
29417 IX86_BUILTIN_PMULHW256_MASK,
29418 IX86_BUILTIN_PMULHW128_MASK,
29419 IX86_BUILTIN_PMULLW256_MASK,
29420 IX86_BUILTIN_PMULLW128_MASK,
29421 IX86_BUILTIN_PMULLQ256,
29422 IX86_BUILTIN_PMULLQ128,
29423 IX86_BUILTIN_ANDPD256_MASK,
29424 IX86_BUILTIN_ANDPD128_MASK,
29425 IX86_BUILTIN_ANDPS256_MASK,
29426 IX86_BUILTIN_ANDPS128_MASK,
29427 IX86_BUILTIN_ANDNPD256_MASK,
29428 IX86_BUILTIN_ANDNPD128_MASK,
29429 IX86_BUILTIN_ANDNPS256_MASK,
29430 IX86_BUILTIN_ANDNPS128_MASK,
29431 IX86_BUILTIN_PSLLWI128_MASK,
29432 IX86_BUILTIN_PSLLDI128_MASK,
29433 IX86_BUILTIN_PSLLQI128_MASK,
29434 IX86_BUILTIN_PSLLW128_MASK,
29435 IX86_BUILTIN_PSLLD128_MASK,
29436 IX86_BUILTIN_PSLLQ128_MASK,
29437 IX86_BUILTIN_PSLLWI256_MASK ,
29438 IX86_BUILTIN_PSLLW256_MASK,
29439 IX86_BUILTIN_PSLLDI256_MASK,
29440 IX86_BUILTIN_PSLLD256_MASK,
29441 IX86_BUILTIN_PSLLQI256_MASK,
29442 IX86_BUILTIN_PSLLQ256_MASK,
29443 IX86_BUILTIN_PSRADI128_MASK,
29444 IX86_BUILTIN_PSRAD128_MASK,
29445 IX86_BUILTIN_PSRADI256_MASK,
29446 IX86_BUILTIN_PSRAD256_MASK,
29447 IX86_BUILTIN_PSRAQI128_MASK,
29448 IX86_BUILTIN_PSRAQ128_MASK,
29449 IX86_BUILTIN_PSRAQI256_MASK,
29450 IX86_BUILTIN_PSRAQ256_MASK,
29451 IX86_BUILTIN_PANDD256,
29452 IX86_BUILTIN_PANDD128,
29453 IX86_BUILTIN_PSRLDI128_MASK,
29454 IX86_BUILTIN_PSRLD128_MASK,
29455 IX86_BUILTIN_PSRLDI256_MASK,
29456 IX86_BUILTIN_PSRLD256_MASK,
29457 IX86_BUILTIN_PSRLQI128_MASK,
29458 IX86_BUILTIN_PSRLQ128_MASK,
29459 IX86_BUILTIN_PSRLQI256_MASK,
29460 IX86_BUILTIN_PSRLQ256_MASK,
29461 IX86_BUILTIN_PANDQ256,
29462 IX86_BUILTIN_PANDQ128,
29463 IX86_BUILTIN_PANDND256,
29464 IX86_BUILTIN_PANDND128,
29465 IX86_BUILTIN_PANDNQ256,
29466 IX86_BUILTIN_PANDNQ128,
29467 IX86_BUILTIN_PORD256,
29468 IX86_BUILTIN_PORD128,
29469 IX86_BUILTIN_PORQ256,
29470 IX86_BUILTIN_PORQ128,
29471 IX86_BUILTIN_PXORD256,
29472 IX86_BUILTIN_PXORD128,
29473 IX86_BUILTIN_PXORQ256,
29474 IX86_BUILTIN_PXORQ128,
29475 IX86_BUILTIN_PACKSSWB256_MASK,
29476 IX86_BUILTIN_PACKSSWB128_MASK,
29477 IX86_BUILTIN_PACKUSWB256_MASK,
29478 IX86_BUILTIN_PACKUSWB128_MASK,
29479 IX86_BUILTIN_RNDSCALEPS256,
29480 IX86_BUILTIN_RNDSCALEPD256,
29481 IX86_BUILTIN_RNDSCALEPS128,
29482 IX86_BUILTIN_RNDSCALEPD128,
29483 IX86_BUILTIN_VTERNLOGQ256_MASK,
29484 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29485 IX86_BUILTIN_VTERNLOGD256_MASK,
29486 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29487 IX86_BUILTIN_VTERNLOGQ128_MASK,
29488 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29489 IX86_BUILTIN_VTERNLOGD128_MASK,
29490 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29491 IX86_BUILTIN_SCALEFPD256,
29492 IX86_BUILTIN_SCALEFPS256,
29493 IX86_BUILTIN_SCALEFPD128,
29494 IX86_BUILTIN_SCALEFPS128,
29495 IX86_BUILTIN_VFMADDPD256_MASK,
29496 IX86_BUILTIN_VFMADDPD256_MASK3,
29497 IX86_BUILTIN_VFMADDPD256_MASKZ,
29498 IX86_BUILTIN_VFMADDPD128_MASK,
29499 IX86_BUILTIN_VFMADDPD128_MASK3,
29500 IX86_BUILTIN_VFMADDPD128_MASKZ,
29501 IX86_BUILTIN_VFMADDPS256_MASK,
29502 IX86_BUILTIN_VFMADDPS256_MASK3,
29503 IX86_BUILTIN_VFMADDPS256_MASKZ,
29504 IX86_BUILTIN_VFMADDPS128_MASK,
29505 IX86_BUILTIN_VFMADDPS128_MASK3,
29506 IX86_BUILTIN_VFMADDPS128_MASKZ,
29507 IX86_BUILTIN_VFMSUBPD256_MASK3,
29508 IX86_BUILTIN_VFMSUBPD128_MASK3,
29509 IX86_BUILTIN_VFMSUBPS256_MASK3,
29510 IX86_BUILTIN_VFMSUBPS128_MASK3,
29511 IX86_BUILTIN_VFNMADDPD256_MASK,
29512 IX86_BUILTIN_VFNMADDPD128_MASK,
29513 IX86_BUILTIN_VFNMADDPS256_MASK,
29514 IX86_BUILTIN_VFNMADDPS128_MASK,
29515 IX86_BUILTIN_VFNMSUBPD256_MASK,
29516 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29517 IX86_BUILTIN_VFNMSUBPD128_MASK,
29518 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29519 IX86_BUILTIN_VFNMSUBPS256_MASK,
29520 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29521 IX86_BUILTIN_VFNMSUBPS128_MASK,
29522 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29523 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29524 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29525 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29526 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29527 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29528 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29529 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29530 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29531 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29532 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29533 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29534 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29535 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29536 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29537 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29538 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29539 IX86_BUILTIN_INSERTF64X2_256,
29540 IX86_BUILTIN_INSERTI64X2_256,
29541 IX86_BUILTIN_PSRAVV16HI,
29542 IX86_BUILTIN_PSRAVV8HI,
29543 IX86_BUILTIN_PMADDUBSW256_MASK,
29544 IX86_BUILTIN_PMADDUBSW128_MASK,
29545 IX86_BUILTIN_PMADDWD256_MASK,
29546 IX86_BUILTIN_PMADDWD128_MASK,
29547 IX86_BUILTIN_PSRLVV16HI,
29548 IX86_BUILTIN_PSRLVV8HI,
29549 IX86_BUILTIN_CVTPS2DQ256_MASK,
29550 IX86_BUILTIN_CVTPS2DQ128_MASK,
29551 IX86_BUILTIN_CVTPS2UDQ256,
29552 IX86_BUILTIN_CVTPS2UDQ128,
29553 IX86_BUILTIN_CVTPS2QQ256,
29554 IX86_BUILTIN_CVTPS2QQ128,
29555 IX86_BUILTIN_CVTPS2UQQ256,
29556 IX86_BUILTIN_CVTPS2UQQ128,
29557 IX86_BUILTIN_GETMANTPS256,
29558 IX86_BUILTIN_GETMANTPS128,
29559 IX86_BUILTIN_GETMANTPD256,
29560 IX86_BUILTIN_GETMANTPD128,
29561 IX86_BUILTIN_MOVDDUP256_MASK,
29562 IX86_BUILTIN_MOVDDUP128_MASK,
29563 IX86_BUILTIN_MOVSHDUP256_MASK,
29564 IX86_BUILTIN_MOVSHDUP128_MASK,
29565 IX86_BUILTIN_MOVSLDUP256_MASK,
29566 IX86_BUILTIN_MOVSLDUP128_MASK,
29567 IX86_BUILTIN_CVTQQ2PS256,
29568 IX86_BUILTIN_CVTQQ2PS128,
29569 IX86_BUILTIN_CVTUQQ2PS256,
29570 IX86_BUILTIN_CVTUQQ2PS128,
29571 IX86_BUILTIN_CVTQQ2PD256,
29572 IX86_BUILTIN_CVTQQ2PD128,
29573 IX86_BUILTIN_CVTUQQ2PD256,
29574 IX86_BUILTIN_CVTUQQ2PD128,
29575 IX86_BUILTIN_VPERMT2VARQ256,
29576 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29577 IX86_BUILTIN_VPERMT2VARD256,
29578 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29579 IX86_BUILTIN_VPERMI2VARQ256,
29580 IX86_BUILTIN_VPERMI2VARD256,
29581 IX86_BUILTIN_VPERMT2VARPD256,
29582 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29583 IX86_BUILTIN_VPERMT2VARPS256,
29584 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29585 IX86_BUILTIN_VPERMI2VARPD256,
29586 IX86_BUILTIN_VPERMI2VARPS256,
29587 IX86_BUILTIN_VPERMT2VARQ128,
29588 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29589 IX86_BUILTIN_VPERMT2VARD128,
29590 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29591 IX86_BUILTIN_VPERMI2VARQ128,
29592 IX86_BUILTIN_VPERMI2VARD128,
29593 IX86_BUILTIN_VPERMT2VARPD128,
29594 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29595 IX86_BUILTIN_VPERMT2VARPS128,
29596 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29597 IX86_BUILTIN_VPERMI2VARPD128,
29598 IX86_BUILTIN_VPERMI2VARPS128,
29599 IX86_BUILTIN_PSHUFB256_MASK,
29600 IX86_BUILTIN_PSHUFB128_MASK,
29601 IX86_BUILTIN_PSHUFHW256_MASK,
29602 IX86_BUILTIN_PSHUFHW128_MASK,
29603 IX86_BUILTIN_PSHUFLW256_MASK,
29604 IX86_BUILTIN_PSHUFLW128_MASK,
29605 IX86_BUILTIN_PSHUFD256_MASK,
29606 IX86_BUILTIN_PSHUFD128_MASK,
29607 IX86_BUILTIN_SHUFPD256_MASK,
29608 IX86_BUILTIN_SHUFPD128_MASK,
29609 IX86_BUILTIN_SHUFPS256_MASK,
29610 IX86_BUILTIN_SHUFPS128_MASK,
29611 IX86_BUILTIN_PROLVQ256,
29612 IX86_BUILTIN_PROLVQ128,
29613 IX86_BUILTIN_PROLQ256,
29614 IX86_BUILTIN_PROLQ128,
29615 IX86_BUILTIN_PRORVQ256,
29616 IX86_BUILTIN_PRORVQ128,
29617 IX86_BUILTIN_PRORQ256,
29618 IX86_BUILTIN_PRORQ128,
29619 IX86_BUILTIN_PSRAVQ128,
29620 IX86_BUILTIN_PSRAVQ256,
29621 IX86_BUILTIN_PSLLVV4DI_MASK,
29622 IX86_BUILTIN_PSLLVV2DI_MASK,
29623 IX86_BUILTIN_PSLLVV8SI_MASK,
29624 IX86_BUILTIN_PSLLVV4SI_MASK,
29625 IX86_BUILTIN_PSRAVV8SI_MASK,
29626 IX86_BUILTIN_PSRAVV4SI_MASK,
29627 IX86_BUILTIN_PSRLVV4DI_MASK,
29628 IX86_BUILTIN_PSRLVV2DI_MASK,
29629 IX86_BUILTIN_PSRLVV8SI_MASK,
29630 IX86_BUILTIN_PSRLVV4SI_MASK,
29631 IX86_BUILTIN_PSRAWI256_MASK,
29632 IX86_BUILTIN_PSRAW256_MASK,
29633 IX86_BUILTIN_PSRAWI128_MASK,
29634 IX86_BUILTIN_PSRAW128_MASK,
29635 IX86_BUILTIN_PSRLWI256_MASK,
29636 IX86_BUILTIN_PSRLW256_MASK,
29637 IX86_BUILTIN_PSRLWI128_MASK,
29638 IX86_BUILTIN_PSRLW128_MASK,
29639 IX86_BUILTIN_PRORVD256,
29640 IX86_BUILTIN_PROLVD256,
29641 IX86_BUILTIN_PRORD256,
29642 IX86_BUILTIN_PROLD256,
29643 IX86_BUILTIN_PRORVD128,
29644 IX86_BUILTIN_PROLVD128,
29645 IX86_BUILTIN_PRORD128,
29646 IX86_BUILTIN_PROLD128,
29647 IX86_BUILTIN_FPCLASSPD256,
29648 IX86_BUILTIN_FPCLASSPD128,
29649 IX86_BUILTIN_FPCLASSSD,
29650 IX86_BUILTIN_FPCLASSPS256,
29651 IX86_BUILTIN_FPCLASSPS128,
29652 IX86_BUILTIN_FPCLASSSS,
29653 IX86_BUILTIN_CVTB2MASK128,
29654 IX86_BUILTIN_CVTB2MASK256,
29655 IX86_BUILTIN_CVTW2MASK128,
29656 IX86_BUILTIN_CVTW2MASK256,
29657 IX86_BUILTIN_CVTD2MASK128,
29658 IX86_BUILTIN_CVTD2MASK256,
29659 IX86_BUILTIN_CVTQ2MASK128,
29660 IX86_BUILTIN_CVTQ2MASK256,
29661 IX86_BUILTIN_CVTMASK2B128,
29662 IX86_BUILTIN_CVTMASK2B256,
29663 IX86_BUILTIN_CVTMASK2W128,
29664 IX86_BUILTIN_CVTMASK2W256,
29665 IX86_BUILTIN_CVTMASK2D128,
29666 IX86_BUILTIN_CVTMASK2D256,
29667 IX86_BUILTIN_CVTMASK2Q128,
29668 IX86_BUILTIN_CVTMASK2Q256,
29669 IX86_BUILTIN_PCMPEQB128_MASK,
29670 IX86_BUILTIN_PCMPEQB256_MASK,
29671 IX86_BUILTIN_PCMPEQW128_MASK,
29672 IX86_BUILTIN_PCMPEQW256_MASK,
29673 IX86_BUILTIN_PCMPEQD128_MASK,
29674 IX86_BUILTIN_PCMPEQD256_MASK,
29675 IX86_BUILTIN_PCMPEQQ128_MASK,
29676 IX86_BUILTIN_PCMPEQQ256_MASK,
29677 IX86_BUILTIN_PCMPGTB128_MASK,
29678 IX86_BUILTIN_PCMPGTB256_MASK,
29679 IX86_BUILTIN_PCMPGTW128_MASK,
29680 IX86_BUILTIN_PCMPGTW256_MASK,
29681 IX86_BUILTIN_PCMPGTD128_MASK,
29682 IX86_BUILTIN_PCMPGTD256_MASK,
29683 IX86_BUILTIN_PCMPGTQ128_MASK,
29684 IX86_BUILTIN_PCMPGTQ256_MASK,
29685 IX86_BUILTIN_PTESTMB128,
29686 IX86_BUILTIN_PTESTMB256,
29687 IX86_BUILTIN_PTESTMW128,
29688 IX86_BUILTIN_PTESTMW256,
29689 IX86_BUILTIN_PTESTMD128,
29690 IX86_BUILTIN_PTESTMD256,
29691 IX86_BUILTIN_PTESTMQ128,
29692 IX86_BUILTIN_PTESTMQ256,
29693 IX86_BUILTIN_PTESTNMB128,
29694 IX86_BUILTIN_PTESTNMB256,
29695 IX86_BUILTIN_PTESTNMW128,
29696 IX86_BUILTIN_PTESTNMW256,
29697 IX86_BUILTIN_PTESTNMD128,
29698 IX86_BUILTIN_PTESTNMD256,
29699 IX86_BUILTIN_PTESTNMQ128,
29700 IX86_BUILTIN_PTESTNMQ256,
29701 IX86_BUILTIN_PBROADCASTMB128,
29702 IX86_BUILTIN_PBROADCASTMB256,
29703 IX86_BUILTIN_PBROADCASTMW128,
29704 IX86_BUILTIN_PBROADCASTMW256,
29705 IX86_BUILTIN_COMPRESSPD256,
29706 IX86_BUILTIN_COMPRESSPD128,
29707 IX86_BUILTIN_COMPRESSPS256,
29708 IX86_BUILTIN_COMPRESSPS128,
29709 IX86_BUILTIN_PCOMPRESSQ256,
29710 IX86_BUILTIN_PCOMPRESSQ128,
29711 IX86_BUILTIN_PCOMPRESSD256,
29712 IX86_BUILTIN_PCOMPRESSD128,
29713 IX86_BUILTIN_EXPANDPD256,
29714 IX86_BUILTIN_EXPANDPD128,
29715 IX86_BUILTIN_EXPANDPS256,
29716 IX86_BUILTIN_EXPANDPS128,
29717 IX86_BUILTIN_PEXPANDQ256,
29718 IX86_BUILTIN_PEXPANDQ128,
29719 IX86_BUILTIN_PEXPANDD256,
29720 IX86_BUILTIN_PEXPANDD128,
29721 IX86_BUILTIN_EXPANDPD256Z,
29722 IX86_BUILTIN_EXPANDPD128Z,
29723 IX86_BUILTIN_EXPANDPS256Z,
29724 IX86_BUILTIN_EXPANDPS128Z,
29725 IX86_BUILTIN_PEXPANDQ256Z,
29726 IX86_BUILTIN_PEXPANDQ128Z,
29727 IX86_BUILTIN_PEXPANDD256Z,
29728 IX86_BUILTIN_PEXPANDD128Z,
29729 IX86_BUILTIN_PMAXSD256_MASK,
29730 IX86_BUILTIN_PMINSD256_MASK,
29731 IX86_BUILTIN_PMAXUD256_MASK,
29732 IX86_BUILTIN_PMINUD256_MASK,
29733 IX86_BUILTIN_PMAXSD128_MASK,
29734 IX86_BUILTIN_PMINSD128_MASK,
29735 IX86_BUILTIN_PMAXUD128_MASK,
29736 IX86_BUILTIN_PMINUD128_MASK,
29737 IX86_BUILTIN_PMAXSQ256_MASK,
29738 IX86_BUILTIN_PMINSQ256_MASK,
29739 IX86_BUILTIN_PMAXUQ256_MASK,
29740 IX86_BUILTIN_PMINUQ256_MASK,
29741 IX86_BUILTIN_PMAXSQ128_MASK,
29742 IX86_BUILTIN_PMINSQ128_MASK,
29743 IX86_BUILTIN_PMAXUQ128_MASK,
29744 IX86_BUILTIN_PMINUQ128_MASK,
29745 IX86_BUILTIN_PMINSB256_MASK,
29746 IX86_BUILTIN_PMINUB256_MASK,
29747 IX86_BUILTIN_PMAXSB256_MASK,
29748 IX86_BUILTIN_PMAXUB256_MASK,
29749 IX86_BUILTIN_PMINSB128_MASK,
29750 IX86_BUILTIN_PMINUB128_MASK,
29751 IX86_BUILTIN_PMAXSB128_MASK,
29752 IX86_BUILTIN_PMAXUB128_MASK,
29753 IX86_BUILTIN_PMINSW256_MASK,
29754 IX86_BUILTIN_PMINUW256_MASK,
29755 IX86_BUILTIN_PMAXSW256_MASK,
29756 IX86_BUILTIN_PMAXUW256_MASK,
29757 IX86_BUILTIN_PMINSW128_MASK,
29758 IX86_BUILTIN_PMINUW128_MASK,
29759 IX86_BUILTIN_PMAXSW128_MASK,
29760 IX86_BUILTIN_PMAXUW128_MASK,
29761 IX86_BUILTIN_VPCONFLICTQ256,
29762 IX86_BUILTIN_VPCONFLICTD256,
29763 IX86_BUILTIN_VPCLZCNTQ256,
29764 IX86_BUILTIN_VPCLZCNTD256,
29765 IX86_BUILTIN_UNPCKHPD256_MASK,
29766 IX86_BUILTIN_UNPCKHPD128_MASK,
29767 IX86_BUILTIN_UNPCKHPS256_MASK,
29768 IX86_BUILTIN_UNPCKHPS128_MASK,
29769 IX86_BUILTIN_UNPCKLPD256_MASK,
29770 IX86_BUILTIN_UNPCKLPD128_MASK,
29771 IX86_BUILTIN_UNPCKLPS256_MASK,
29772 IX86_BUILTIN_VPCONFLICTQ128,
29773 IX86_BUILTIN_VPCONFLICTD128,
29774 IX86_BUILTIN_VPCLZCNTQ128,
29775 IX86_BUILTIN_VPCLZCNTD128,
29776 IX86_BUILTIN_UNPCKLPS128_MASK,
29777 IX86_BUILTIN_ALIGND256,
29778 IX86_BUILTIN_ALIGNQ256,
29779 IX86_BUILTIN_ALIGND128,
29780 IX86_BUILTIN_ALIGNQ128,
29781 IX86_BUILTIN_CVTPS2PH256_MASK,
29782 IX86_BUILTIN_CVTPS2PH_MASK,
29783 IX86_BUILTIN_CVTPH2PS_MASK,
29784 IX86_BUILTIN_CVTPH2PS256_MASK,
29785 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29786 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29787 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29788 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29789 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29790 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29791 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29792 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29793 IX86_BUILTIN_PUNPCKHBW128_MASK,
29794 IX86_BUILTIN_PUNPCKHBW256_MASK,
29795 IX86_BUILTIN_PUNPCKHWD128_MASK,
29796 IX86_BUILTIN_PUNPCKHWD256_MASK,
29797 IX86_BUILTIN_PUNPCKLBW128_MASK,
29798 IX86_BUILTIN_PUNPCKLBW256_MASK,
29799 IX86_BUILTIN_PUNPCKLWD128_MASK,
29800 IX86_BUILTIN_PUNPCKLWD256_MASK,
29801 IX86_BUILTIN_PSLLVV16HI,
29802 IX86_BUILTIN_PSLLVV8HI,
29803 IX86_BUILTIN_PACKSSDW256_MASK,
29804 IX86_BUILTIN_PACKSSDW128_MASK,
29805 IX86_BUILTIN_PACKUSDW256_MASK,
29806 IX86_BUILTIN_PACKUSDW128_MASK,
29807 IX86_BUILTIN_PAVGB256_MASK,
29808 IX86_BUILTIN_PAVGW256_MASK,
29809 IX86_BUILTIN_PAVGB128_MASK,
29810 IX86_BUILTIN_PAVGW128_MASK,
29811 IX86_BUILTIN_VPERMVARSF256_MASK,
29812 IX86_BUILTIN_VPERMVARDF256_MASK,
29813 IX86_BUILTIN_VPERMDF256_MASK,
29814 IX86_BUILTIN_PABSB256_MASK,
29815 IX86_BUILTIN_PABSB128_MASK,
29816 IX86_BUILTIN_PABSW256_MASK,
29817 IX86_BUILTIN_PABSW128_MASK,
29818 IX86_BUILTIN_VPERMILVARPD_MASK,
29819 IX86_BUILTIN_VPERMILVARPS_MASK,
29820 IX86_BUILTIN_VPERMILVARPD256_MASK,
29821 IX86_BUILTIN_VPERMILVARPS256_MASK,
29822 IX86_BUILTIN_VPERMILPD_MASK,
29823 IX86_BUILTIN_VPERMILPS_MASK,
29824 IX86_BUILTIN_VPERMILPD256_MASK,
29825 IX86_BUILTIN_VPERMILPS256_MASK,
29826 IX86_BUILTIN_BLENDMQ256,
29827 IX86_BUILTIN_BLENDMD256,
29828 IX86_BUILTIN_BLENDMPD256,
29829 IX86_BUILTIN_BLENDMPS256,
29830 IX86_BUILTIN_BLENDMQ128,
29831 IX86_BUILTIN_BLENDMD128,
29832 IX86_BUILTIN_BLENDMPD128,
29833 IX86_BUILTIN_BLENDMPS128,
29834 IX86_BUILTIN_BLENDMW256,
29835 IX86_BUILTIN_BLENDMB256,
29836 IX86_BUILTIN_BLENDMW128,
29837 IX86_BUILTIN_BLENDMB128,
29838 IX86_BUILTIN_PMULLD256_MASK,
29839 IX86_BUILTIN_PMULLD128_MASK,
29840 IX86_BUILTIN_PMULUDQ256_MASK,
29841 IX86_BUILTIN_PMULDQ256_MASK,
29842 IX86_BUILTIN_PMULDQ128_MASK,
29843 IX86_BUILTIN_PMULUDQ128_MASK,
29844 IX86_BUILTIN_CVTPD2PS256_MASK,
29845 IX86_BUILTIN_CVTPD2PS_MASK,
29846 IX86_BUILTIN_VPERMVARSI256_MASK,
29847 IX86_BUILTIN_VPERMVARDI256_MASK,
29848 IX86_BUILTIN_VPERMDI256_MASK,
29849 IX86_BUILTIN_CMPQ256,
29850 IX86_BUILTIN_CMPD256,
29851 IX86_BUILTIN_UCMPQ256,
29852 IX86_BUILTIN_UCMPD256,
29853 IX86_BUILTIN_CMPB256,
29854 IX86_BUILTIN_CMPW256,
29855 IX86_BUILTIN_UCMPB256,
29856 IX86_BUILTIN_UCMPW256,
29857 IX86_BUILTIN_CMPPD256_MASK,
29858 IX86_BUILTIN_CMPPS256_MASK,
29859 IX86_BUILTIN_CMPQ128,
29860 IX86_BUILTIN_CMPD128,
29861 IX86_BUILTIN_UCMPQ128,
29862 IX86_BUILTIN_UCMPD128,
29863 IX86_BUILTIN_CMPB128,
29864 IX86_BUILTIN_CMPW128,
29865 IX86_BUILTIN_UCMPB128,
29866 IX86_BUILTIN_UCMPW128,
29867 IX86_BUILTIN_CMPPD128_MASK,
29868 IX86_BUILTIN_CMPPS128_MASK,
29869
29870 IX86_BUILTIN_GATHER3SIV8SF,
29871 IX86_BUILTIN_GATHER3SIV4SF,
29872 IX86_BUILTIN_GATHER3SIV4DF,
29873 IX86_BUILTIN_GATHER3SIV2DF,
29874 IX86_BUILTIN_GATHER3DIV8SF,
29875 IX86_BUILTIN_GATHER3DIV4SF,
29876 IX86_BUILTIN_GATHER3DIV4DF,
29877 IX86_BUILTIN_GATHER3DIV2DF,
29878 IX86_BUILTIN_GATHER3SIV8SI,
29879 IX86_BUILTIN_GATHER3SIV4SI,
29880 IX86_BUILTIN_GATHER3SIV4DI,
29881 IX86_BUILTIN_GATHER3SIV2DI,
29882 IX86_BUILTIN_GATHER3DIV8SI,
29883 IX86_BUILTIN_GATHER3DIV4SI,
29884 IX86_BUILTIN_GATHER3DIV4DI,
29885 IX86_BUILTIN_GATHER3DIV2DI,
29886 IX86_BUILTIN_SCATTERSIV8SF,
29887 IX86_BUILTIN_SCATTERSIV4SF,
29888 IX86_BUILTIN_SCATTERSIV4DF,
29889 IX86_BUILTIN_SCATTERSIV2DF,
29890 IX86_BUILTIN_SCATTERDIV8SF,
29891 IX86_BUILTIN_SCATTERDIV4SF,
29892 IX86_BUILTIN_SCATTERDIV4DF,
29893 IX86_BUILTIN_SCATTERDIV2DF,
29894 IX86_BUILTIN_SCATTERSIV8SI,
29895 IX86_BUILTIN_SCATTERSIV4SI,
29896 IX86_BUILTIN_SCATTERSIV4DI,
29897 IX86_BUILTIN_SCATTERSIV2DI,
29898 IX86_BUILTIN_SCATTERDIV8SI,
29899 IX86_BUILTIN_SCATTERDIV4SI,
29900 IX86_BUILTIN_SCATTERDIV4DI,
29901 IX86_BUILTIN_SCATTERDIV2DI,
29902
29903 /* AVX512DQ. */
29904 IX86_BUILTIN_RANGESD128,
29905 IX86_BUILTIN_RANGESS128,
29906 IX86_BUILTIN_KUNPCKWD,
29907 IX86_BUILTIN_KUNPCKDQ,
29908 IX86_BUILTIN_BROADCASTF32x2_512,
29909 IX86_BUILTIN_BROADCASTI32x2_512,
29910 IX86_BUILTIN_BROADCASTF64X2_512,
29911 IX86_BUILTIN_BROADCASTI64X2_512,
29912 IX86_BUILTIN_BROADCASTF32X8_512,
29913 IX86_BUILTIN_BROADCASTI32X8_512,
29914 IX86_BUILTIN_EXTRACTF64X2_512,
29915 IX86_BUILTIN_EXTRACTF32X8,
29916 IX86_BUILTIN_EXTRACTI64X2_512,
29917 IX86_BUILTIN_EXTRACTI32X8,
29918 IX86_BUILTIN_REDUCEPD512_MASK,
29919 IX86_BUILTIN_REDUCEPS512_MASK,
29920 IX86_BUILTIN_PMULLQ512,
29921 IX86_BUILTIN_XORPD512,
29922 IX86_BUILTIN_XORPS512,
29923 IX86_BUILTIN_ORPD512,
29924 IX86_BUILTIN_ORPS512,
29925 IX86_BUILTIN_ANDPD512,
29926 IX86_BUILTIN_ANDPS512,
29927 IX86_BUILTIN_ANDNPD512,
29928 IX86_BUILTIN_ANDNPS512,
29929 IX86_BUILTIN_INSERTF32X8,
29930 IX86_BUILTIN_INSERTI32X8,
29931 IX86_BUILTIN_INSERTF64X2_512,
29932 IX86_BUILTIN_INSERTI64X2_512,
29933 IX86_BUILTIN_FPCLASSPD512,
29934 IX86_BUILTIN_FPCLASSPS512,
29935 IX86_BUILTIN_CVTD2MASK512,
29936 IX86_BUILTIN_CVTQ2MASK512,
29937 IX86_BUILTIN_CVTMASK2D512,
29938 IX86_BUILTIN_CVTMASK2Q512,
29939 IX86_BUILTIN_CVTPD2QQ512,
29940 IX86_BUILTIN_CVTPS2QQ512,
29941 IX86_BUILTIN_CVTPD2UQQ512,
29942 IX86_BUILTIN_CVTPS2UQQ512,
29943 IX86_BUILTIN_CVTQQ2PS512,
29944 IX86_BUILTIN_CVTUQQ2PS512,
29945 IX86_BUILTIN_CVTQQ2PD512,
29946 IX86_BUILTIN_CVTUQQ2PD512,
29947 IX86_BUILTIN_CVTTPS2QQ512,
29948 IX86_BUILTIN_CVTTPS2UQQ512,
29949 IX86_BUILTIN_CVTTPD2QQ512,
29950 IX86_BUILTIN_CVTTPD2UQQ512,
29951 IX86_BUILTIN_RANGEPS512,
29952 IX86_BUILTIN_RANGEPD512,
29953
29954 /* AVX512BW. */
29955 IX86_BUILTIN_PACKUSDW512,
29956 IX86_BUILTIN_PACKSSDW512,
29957 IX86_BUILTIN_LOADDQUHI512_MASK,
29958 IX86_BUILTIN_LOADDQUQI512_MASK,
29959 IX86_BUILTIN_PSLLDQ512,
29960 IX86_BUILTIN_PSRLDQ512,
29961 IX86_BUILTIN_STOREDQUHI512_MASK,
29962 IX86_BUILTIN_STOREDQUQI512_MASK,
29963 IX86_BUILTIN_PALIGNR512,
29964 IX86_BUILTIN_PALIGNR512_MASK,
29965 IX86_BUILTIN_MOVDQUHI512_MASK,
29966 IX86_BUILTIN_MOVDQUQI512_MASK,
29967 IX86_BUILTIN_PSADBW512,
29968 IX86_BUILTIN_DBPSADBW512,
29969 IX86_BUILTIN_PBROADCASTB512,
29970 IX86_BUILTIN_PBROADCASTB512_GPR,
29971 IX86_BUILTIN_PBROADCASTW512,
29972 IX86_BUILTIN_PBROADCASTW512_GPR,
29973 IX86_BUILTIN_PMOVSXBW512_MASK,
29974 IX86_BUILTIN_PMOVZXBW512_MASK,
29975 IX86_BUILTIN_VPERMVARHI512_MASK,
29976 IX86_BUILTIN_VPERMT2VARHI512,
29977 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29978 IX86_BUILTIN_VPERMI2VARHI512,
29979 IX86_BUILTIN_PAVGB512,
29980 IX86_BUILTIN_PAVGW512,
29981 IX86_BUILTIN_PADDB512,
29982 IX86_BUILTIN_PSUBB512,
29983 IX86_BUILTIN_PSUBSB512,
29984 IX86_BUILTIN_PADDSB512,
29985 IX86_BUILTIN_PSUBUSB512,
29986 IX86_BUILTIN_PADDUSB512,
29987 IX86_BUILTIN_PSUBW512,
29988 IX86_BUILTIN_PADDW512,
29989 IX86_BUILTIN_PSUBSW512,
29990 IX86_BUILTIN_PADDSW512,
29991 IX86_BUILTIN_PSUBUSW512,
29992 IX86_BUILTIN_PADDUSW512,
29993 IX86_BUILTIN_PMAXUW512,
29994 IX86_BUILTIN_PMAXSW512,
29995 IX86_BUILTIN_PMINUW512,
29996 IX86_BUILTIN_PMINSW512,
29997 IX86_BUILTIN_PMAXUB512,
29998 IX86_BUILTIN_PMAXSB512,
29999 IX86_BUILTIN_PMINUB512,
30000 IX86_BUILTIN_PMINSB512,
30001 IX86_BUILTIN_PMOVWB512,
30002 IX86_BUILTIN_PMOVSWB512,
30003 IX86_BUILTIN_PMOVUSWB512,
30004 IX86_BUILTIN_PMULHRSW512_MASK,
30005 IX86_BUILTIN_PMULHUW512_MASK,
30006 IX86_BUILTIN_PMULHW512_MASK,
30007 IX86_BUILTIN_PMULLW512_MASK,
30008 IX86_BUILTIN_PSLLWI512_MASK,
30009 IX86_BUILTIN_PSLLW512_MASK,
30010 IX86_BUILTIN_PACKSSWB512,
30011 IX86_BUILTIN_PACKUSWB512,
30012 IX86_BUILTIN_PSRAVV32HI,
30013 IX86_BUILTIN_PMADDUBSW512_MASK,
30014 IX86_BUILTIN_PMADDWD512_MASK,
30015 IX86_BUILTIN_PSRLVV32HI,
30016 IX86_BUILTIN_PUNPCKHBW512,
30017 IX86_BUILTIN_PUNPCKHWD512,
30018 IX86_BUILTIN_PUNPCKLBW512,
30019 IX86_BUILTIN_PUNPCKLWD512,
30020 IX86_BUILTIN_PSHUFB512,
30021 IX86_BUILTIN_PSHUFHW512,
30022 IX86_BUILTIN_PSHUFLW512,
30023 IX86_BUILTIN_PSRAWI512,
30024 IX86_BUILTIN_PSRAW512,
30025 IX86_BUILTIN_PSRLWI512,
30026 IX86_BUILTIN_PSRLW512,
30027 IX86_BUILTIN_CVTB2MASK512,
30028 IX86_BUILTIN_CVTW2MASK512,
30029 IX86_BUILTIN_CVTMASK2B512,
30030 IX86_BUILTIN_CVTMASK2W512,
30031 IX86_BUILTIN_PCMPEQB512_MASK,
30032 IX86_BUILTIN_PCMPEQW512_MASK,
30033 IX86_BUILTIN_PCMPGTB512_MASK,
30034 IX86_BUILTIN_PCMPGTW512_MASK,
30035 IX86_BUILTIN_PTESTMB512,
30036 IX86_BUILTIN_PTESTMW512,
30037 IX86_BUILTIN_PTESTNMB512,
30038 IX86_BUILTIN_PTESTNMW512,
30039 IX86_BUILTIN_PSLLVV32HI,
30040 IX86_BUILTIN_PABSB512,
30041 IX86_BUILTIN_PABSW512,
30042 IX86_BUILTIN_BLENDMW512,
30043 IX86_BUILTIN_BLENDMB512,
30044 IX86_BUILTIN_CMPB512,
30045 IX86_BUILTIN_CMPW512,
30046 IX86_BUILTIN_UCMPB512,
30047 IX86_BUILTIN_UCMPW512,
30048
30049 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30050 where all operands are 32-byte or 64-byte wide respectively. */
30051 IX86_BUILTIN_GATHERALTSIV4DF,
30052 IX86_BUILTIN_GATHERALTDIV8SF,
30053 IX86_BUILTIN_GATHERALTSIV4DI,
30054 IX86_BUILTIN_GATHERALTDIV8SI,
30055 IX86_BUILTIN_GATHER3ALTDIV16SF,
30056 IX86_BUILTIN_GATHER3ALTDIV16SI,
30057 IX86_BUILTIN_GATHER3ALTSIV4DF,
30058 IX86_BUILTIN_GATHER3ALTDIV8SF,
30059 IX86_BUILTIN_GATHER3ALTSIV4DI,
30060 IX86_BUILTIN_GATHER3ALTDIV8SI,
30061 IX86_BUILTIN_GATHER3ALTSIV8DF,
30062 IX86_BUILTIN_GATHER3ALTSIV8DI,
30063 IX86_BUILTIN_GATHER3DIV16SF,
30064 IX86_BUILTIN_GATHER3DIV16SI,
30065 IX86_BUILTIN_GATHER3DIV8DF,
30066 IX86_BUILTIN_GATHER3DIV8DI,
30067 IX86_BUILTIN_GATHER3SIV16SF,
30068 IX86_BUILTIN_GATHER3SIV16SI,
30069 IX86_BUILTIN_GATHER3SIV8DF,
30070 IX86_BUILTIN_GATHER3SIV8DI,
30071 IX86_BUILTIN_SCATTERDIV16SF,
30072 IX86_BUILTIN_SCATTERDIV16SI,
30073 IX86_BUILTIN_SCATTERDIV8DF,
30074 IX86_BUILTIN_SCATTERDIV8DI,
30075 IX86_BUILTIN_SCATTERSIV16SF,
30076 IX86_BUILTIN_SCATTERSIV16SI,
30077 IX86_BUILTIN_SCATTERSIV8DF,
30078 IX86_BUILTIN_SCATTERSIV8DI,
30079
30080 /* AVX512PF */
30081 IX86_BUILTIN_GATHERPFQPD,
30082 IX86_BUILTIN_GATHERPFDPS,
30083 IX86_BUILTIN_GATHERPFDPD,
30084 IX86_BUILTIN_GATHERPFQPS,
30085 IX86_BUILTIN_SCATTERPFDPD,
30086 IX86_BUILTIN_SCATTERPFDPS,
30087 IX86_BUILTIN_SCATTERPFQPD,
30088 IX86_BUILTIN_SCATTERPFQPS,
30089
30090 /* AVX-512ER */
30091 IX86_BUILTIN_EXP2PD_MASK,
30092 IX86_BUILTIN_EXP2PS_MASK,
30093 IX86_BUILTIN_EXP2PS,
30094 IX86_BUILTIN_RCP28PD,
30095 IX86_BUILTIN_RCP28PS,
30096 IX86_BUILTIN_RCP28SD,
30097 IX86_BUILTIN_RCP28SS,
30098 IX86_BUILTIN_RSQRT28PD,
30099 IX86_BUILTIN_RSQRT28PS,
30100 IX86_BUILTIN_RSQRT28SD,
30101 IX86_BUILTIN_RSQRT28SS,
30102
30103 /* AVX-512IFMA */
30104 IX86_BUILTIN_VPMADD52LUQ512,
30105 IX86_BUILTIN_VPMADD52HUQ512,
30106 IX86_BUILTIN_VPMADD52LUQ256,
30107 IX86_BUILTIN_VPMADD52HUQ256,
30108 IX86_BUILTIN_VPMADD52LUQ128,
30109 IX86_BUILTIN_VPMADD52HUQ128,
30110 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30111 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30112 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30113 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30114 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30115 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30116
30117 /* AVX-512VBMI */
30118 IX86_BUILTIN_VPMULTISHIFTQB512,
30119 IX86_BUILTIN_VPMULTISHIFTQB256,
30120 IX86_BUILTIN_VPMULTISHIFTQB128,
30121 IX86_BUILTIN_VPERMVARQI512_MASK,
30122 IX86_BUILTIN_VPERMT2VARQI512,
30123 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30124 IX86_BUILTIN_VPERMI2VARQI512,
30125 IX86_BUILTIN_VPERMVARQI256_MASK,
30126 IX86_BUILTIN_VPERMVARQI128_MASK,
30127 IX86_BUILTIN_VPERMT2VARQI256,
30128 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30129 IX86_BUILTIN_VPERMT2VARQI128,
30130 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30131 IX86_BUILTIN_VPERMI2VARQI256,
30132 IX86_BUILTIN_VPERMI2VARQI128,
30133
30134 /* SHA builtins. */
30135 IX86_BUILTIN_SHA1MSG1,
30136 IX86_BUILTIN_SHA1MSG2,
30137 IX86_BUILTIN_SHA1NEXTE,
30138 IX86_BUILTIN_SHA1RNDS4,
30139 IX86_BUILTIN_SHA256MSG1,
30140 IX86_BUILTIN_SHA256MSG2,
30141 IX86_BUILTIN_SHA256RNDS2,
30142
30143 /* CLWB instructions. */
30144 IX86_BUILTIN_CLWB,
30145
30146 /* PCOMMIT instructions. */
30147 IX86_BUILTIN_PCOMMIT,
30148
30149 /* CLFLUSHOPT instructions. */
30150 IX86_BUILTIN_CLFLUSHOPT,
30151
30152 /* TFmode support builtins. */
30153 IX86_BUILTIN_INFQ,
30154 IX86_BUILTIN_HUGE_VALQ,
30155 IX86_BUILTIN_FABSQ,
30156 IX86_BUILTIN_COPYSIGNQ,
30157
30158 /* Vectorizer support builtins. */
30159 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30160 IX86_BUILTIN_CPYSGNPS,
30161 IX86_BUILTIN_CPYSGNPD,
30162 IX86_BUILTIN_CPYSGNPS256,
30163 IX86_BUILTIN_CPYSGNPS512,
30164 IX86_BUILTIN_CPYSGNPD256,
30165 IX86_BUILTIN_CPYSGNPD512,
30166 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30167 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30168
30169
30170 /* FMA4 instructions. */
30171 IX86_BUILTIN_VFMADDSS,
30172 IX86_BUILTIN_VFMADDSD,
30173 IX86_BUILTIN_VFMADDPS,
30174 IX86_BUILTIN_VFMADDPD,
30175 IX86_BUILTIN_VFMADDPS256,
30176 IX86_BUILTIN_VFMADDPD256,
30177 IX86_BUILTIN_VFMADDSUBPS,
30178 IX86_BUILTIN_VFMADDSUBPD,
30179 IX86_BUILTIN_VFMADDSUBPS256,
30180 IX86_BUILTIN_VFMADDSUBPD256,
30181
30182 /* FMA3 instructions. */
30183 IX86_BUILTIN_VFMADDSS3,
30184 IX86_BUILTIN_VFMADDSD3,
30185
30186 /* XOP instructions. */
30187 IX86_BUILTIN_VPCMOV,
30188 IX86_BUILTIN_VPCMOV_V2DI,
30189 IX86_BUILTIN_VPCMOV_V4SI,
30190 IX86_BUILTIN_VPCMOV_V8HI,
30191 IX86_BUILTIN_VPCMOV_V16QI,
30192 IX86_BUILTIN_VPCMOV_V4SF,
30193 IX86_BUILTIN_VPCMOV_V2DF,
30194 IX86_BUILTIN_VPCMOV256,
30195 IX86_BUILTIN_VPCMOV_V4DI256,
30196 IX86_BUILTIN_VPCMOV_V8SI256,
30197 IX86_BUILTIN_VPCMOV_V16HI256,
30198 IX86_BUILTIN_VPCMOV_V32QI256,
30199 IX86_BUILTIN_VPCMOV_V8SF256,
30200 IX86_BUILTIN_VPCMOV_V4DF256,
30201
30202 IX86_BUILTIN_VPPERM,
30203
30204 IX86_BUILTIN_VPMACSSWW,
30205 IX86_BUILTIN_VPMACSWW,
30206 IX86_BUILTIN_VPMACSSWD,
30207 IX86_BUILTIN_VPMACSWD,
30208 IX86_BUILTIN_VPMACSSDD,
30209 IX86_BUILTIN_VPMACSDD,
30210 IX86_BUILTIN_VPMACSSDQL,
30211 IX86_BUILTIN_VPMACSSDQH,
30212 IX86_BUILTIN_VPMACSDQL,
30213 IX86_BUILTIN_VPMACSDQH,
30214 IX86_BUILTIN_VPMADCSSWD,
30215 IX86_BUILTIN_VPMADCSWD,
30216
30217 IX86_BUILTIN_VPHADDBW,
30218 IX86_BUILTIN_VPHADDBD,
30219 IX86_BUILTIN_VPHADDBQ,
30220 IX86_BUILTIN_VPHADDWD,
30221 IX86_BUILTIN_VPHADDWQ,
30222 IX86_BUILTIN_VPHADDDQ,
30223 IX86_BUILTIN_VPHADDUBW,
30224 IX86_BUILTIN_VPHADDUBD,
30225 IX86_BUILTIN_VPHADDUBQ,
30226 IX86_BUILTIN_VPHADDUWD,
30227 IX86_BUILTIN_VPHADDUWQ,
30228 IX86_BUILTIN_VPHADDUDQ,
30229 IX86_BUILTIN_VPHSUBBW,
30230 IX86_BUILTIN_VPHSUBWD,
30231 IX86_BUILTIN_VPHSUBDQ,
30232
30233 IX86_BUILTIN_VPROTB,
30234 IX86_BUILTIN_VPROTW,
30235 IX86_BUILTIN_VPROTD,
30236 IX86_BUILTIN_VPROTQ,
30237 IX86_BUILTIN_VPROTB_IMM,
30238 IX86_BUILTIN_VPROTW_IMM,
30239 IX86_BUILTIN_VPROTD_IMM,
30240 IX86_BUILTIN_VPROTQ_IMM,
30241
30242 IX86_BUILTIN_VPSHLB,
30243 IX86_BUILTIN_VPSHLW,
30244 IX86_BUILTIN_VPSHLD,
30245 IX86_BUILTIN_VPSHLQ,
30246 IX86_BUILTIN_VPSHAB,
30247 IX86_BUILTIN_VPSHAW,
30248 IX86_BUILTIN_VPSHAD,
30249 IX86_BUILTIN_VPSHAQ,
30250
30251 IX86_BUILTIN_VFRCZSS,
30252 IX86_BUILTIN_VFRCZSD,
30253 IX86_BUILTIN_VFRCZPS,
30254 IX86_BUILTIN_VFRCZPD,
30255 IX86_BUILTIN_VFRCZPS256,
30256 IX86_BUILTIN_VFRCZPD256,
30257
30258 IX86_BUILTIN_VPCOMEQUB,
30259 IX86_BUILTIN_VPCOMNEUB,
30260 IX86_BUILTIN_VPCOMLTUB,
30261 IX86_BUILTIN_VPCOMLEUB,
30262 IX86_BUILTIN_VPCOMGTUB,
30263 IX86_BUILTIN_VPCOMGEUB,
30264 IX86_BUILTIN_VPCOMFALSEUB,
30265 IX86_BUILTIN_VPCOMTRUEUB,
30266
30267 IX86_BUILTIN_VPCOMEQUW,
30268 IX86_BUILTIN_VPCOMNEUW,
30269 IX86_BUILTIN_VPCOMLTUW,
30270 IX86_BUILTIN_VPCOMLEUW,
30271 IX86_BUILTIN_VPCOMGTUW,
30272 IX86_BUILTIN_VPCOMGEUW,
30273 IX86_BUILTIN_VPCOMFALSEUW,
30274 IX86_BUILTIN_VPCOMTRUEUW,
30275
30276 IX86_BUILTIN_VPCOMEQUD,
30277 IX86_BUILTIN_VPCOMNEUD,
30278 IX86_BUILTIN_VPCOMLTUD,
30279 IX86_BUILTIN_VPCOMLEUD,
30280 IX86_BUILTIN_VPCOMGTUD,
30281 IX86_BUILTIN_VPCOMGEUD,
30282 IX86_BUILTIN_VPCOMFALSEUD,
30283 IX86_BUILTIN_VPCOMTRUEUD,
30284
30285 IX86_BUILTIN_VPCOMEQUQ,
30286 IX86_BUILTIN_VPCOMNEUQ,
30287 IX86_BUILTIN_VPCOMLTUQ,
30288 IX86_BUILTIN_VPCOMLEUQ,
30289 IX86_BUILTIN_VPCOMGTUQ,
30290 IX86_BUILTIN_VPCOMGEUQ,
30291 IX86_BUILTIN_VPCOMFALSEUQ,
30292 IX86_BUILTIN_VPCOMTRUEUQ,
30293
30294 IX86_BUILTIN_VPCOMEQB,
30295 IX86_BUILTIN_VPCOMNEB,
30296 IX86_BUILTIN_VPCOMLTB,
30297 IX86_BUILTIN_VPCOMLEB,
30298 IX86_BUILTIN_VPCOMGTB,
30299 IX86_BUILTIN_VPCOMGEB,
30300 IX86_BUILTIN_VPCOMFALSEB,
30301 IX86_BUILTIN_VPCOMTRUEB,
30302
30303 IX86_BUILTIN_VPCOMEQW,
30304 IX86_BUILTIN_VPCOMNEW,
30305 IX86_BUILTIN_VPCOMLTW,
30306 IX86_BUILTIN_VPCOMLEW,
30307 IX86_BUILTIN_VPCOMGTW,
30308 IX86_BUILTIN_VPCOMGEW,
30309 IX86_BUILTIN_VPCOMFALSEW,
30310 IX86_BUILTIN_VPCOMTRUEW,
30311
30312 IX86_BUILTIN_VPCOMEQD,
30313 IX86_BUILTIN_VPCOMNED,
30314 IX86_BUILTIN_VPCOMLTD,
30315 IX86_BUILTIN_VPCOMLED,
30316 IX86_BUILTIN_VPCOMGTD,
30317 IX86_BUILTIN_VPCOMGED,
30318 IX86_BUILTIN_VPCOMFALSED,
30319 IX86_BUILTIN_VPCOMTRUED,
30320
30321 IX86_BUILTIN_VPCOMEQQ,
30322 IX86_BUILTIN_VPCOMNEQ,
30323 IX86_BUILTIN_VPCOMLTQ,
30324 IX86_BUILTIN_VPCOMLEQ,
30325 IX86_BUILTIN_VPCOMGTQ,
30326 IX86_BUILTIN_VPCOMGEQ,
30327 IX86_BUILTIN_VPCOMFALSEQ,
30328 IX86_BUILTIN_VPCOMTRUEQ,
30329
30330 /* LWP instructions. */
30331 IX86_BUILTIN_LLWPCB,
30332 IX86_BUILTIN_SLWPCB,
30333 IX86_BUILTIN_LWPVAL32,
30334 IX86_BUILTIN_LWPVAL64,
30335 IX86_BUILTIN_LWPINS32,
30336 IX86_BUILTIN_LWPINS64,
30337
30338 IX86_BUILTIN_CLZS,
30339
30340 /* RTM */
30341 IX86_BUILTIN_XBEGIN,
30342 IX86_BUILTIN_XEND,
30343 IX86_BUILTIN_XABORT,
30344 IX86_BUILTIN_XTEST,
30345
30346 /* MPX */
30347 IX86_BUILTIN_BNDMK,
30348 IX86_BUILTIN_BNDSTX,
30349 IX86_BUILTIN_BNDLDX,
30350 IX86_BUILTIN_BNDCL,
30351 IX86_BUILTIN_BNDCU,
30352 IX86_BUILTIN_BNDRET,
30353 IX86_BUILTIN_BNDNARROW,
30354 IX86_BUILTIN_BNDINT,
30355 IX86_BUILTIN_SIZEOF,
30356 IX86_BUILTIN_BNDLOWER,
30357 IX86_BUILTIN_BNDUPPER,
30358
30359 /* BMI instructions. */
30360 IX86_BUILTIN_BEXTR32,
30361 IX86_BUILTIN_BEXTR64,
30362 IX86_BUILTIN_CTZS,
30363
30364 /* TBM instructions. */
30365 IX86_BUILTIN_BEXTRI32,
30366 IX86_BUILTIN_BEXTRI64,
30367
30368 /* BMI2 instructions. */
30369 IX86_BUILTIN_BZHI32,
30370 IX86_BUILTIN_BZHI64,
30371 IX86_BUILTIN_PDEP32,
30372 IX86_BUILTIN_PDEP64,
30373 IX86_BUILTIN_PEXT32,
30374 IX86_BUILTIN_PEXT64,
30375
30376 /* ADX instructions. */
30377 IX86_BUILTIN_ADDCARRYX32,
30378 IX86_BUILTIN_ADDCARRYX64,
30379
30380 /* SBB instructions. */
30381 IX86_BUILTIN_SBB32,
30382 IX86_BUILTIN_SBB64,
30383
30384 /* FSGSBASE instructions. */
30385 IX86_BUILTIN_RDFSBASE32,
30386 IX86_BUILTIN_RDFSBASE64,
30387 IX86_BUILTIN_RDGSBASE32,
30388 IX86_BUILTIN_RDGSBASE64,
30389 IX86_BUILTIN_WRFSBASE32,
30390 IX86_BUILTIN_WRFSBASE64,
30391 IX86_BUILTIN_WRGSBASE32,
30392 IX86_BUILTIN_WRGSBASE64,
30393
30394 /* RDRND instructions. */
30395 IX86_BUILTIN_RDRAND16_STEP,
30396 IX86_BUILTIN_RDRAND32_STEP,
30397 IX86_BUILTIN_RDRAND64_STEP,
30398
30399 /* RDSEED instructions. */
30400 IX86_BUILTIN_RDSEED16_STEP,
30401 IX86_BUILTIN_RDSEED32_STEP,
30402 IX86_BUILTIN_RDSEED64_STEP,
30403
30404 /* F16C instructions. */
30405 IX86_BUILTIN_CVTPH2PS,
30406 IX86_BUILTIN_CVTPH2PS256,
30407 IX86_BUILTIN_CVTPS2PH,
30408 IX86_BUILTIN_CVTPS2PH256,
30409
30410 /* CFString built-in for darwin */
30411 IX86_BUILTIN_CFSTRING,
30412
30413 /* Builtins to get CPU type and supported features. */
30414 IX86_BUILTIN_CPU_INIT,
30415 IX86_BUILTIN_CPU_IS,
30416 IX86_BUILTIN_CPU_SUPPORTS,
30417
30418 /* Read/write FLAGS register built-ins. */
30419 IX86_BUILTIN_READ_FLAGS,
30420 IX86_BUILTIN_WRITE_FLAGS,
30421
30422 IX86_BUILTIN_MAX
30423 };
30424
30425 /* Table for the ix86 builtin decls. */
30426 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30427
30428 /* Table of all of the builtin functions that are possible with different ISA's
30429 but are waiting to be built until a function is declared to use that
30430 ISA. */
30431 struct builtin_isa {
30432 const char *name; /* function name */
30433 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30434 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30435 bool const_p; /* true if the declaration is constant */
30436 bool leaf_p; /* true if the declaration has leaf attribute */
30437 bool nothrow_p; /* true if the declaration has nothrow attribute */
30438 bool set_and_not_built_p;
30439 };
30440
30441 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30442
30443
30444 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30445 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30446 function decl in the ix86_builtins array. Returns the function decl or
30447 NULL_TREE, if the builtin was not added.
30448
30449 If the front end has a special hook for builtin functions, delay adding
30450 builtin functions that aren't in the current ISA until the ISA is changed
30451 with function specific optimization. Doing so, can save about 300K for the
30452 default compiler. When the builtin is expanded, check at that time whether
30453 it is valid.
30454
30455 If the front end doesn't have a special hook, record all builtins, even if
30456 it isn't an instruction set in the current ISA in case the user uses
30457 function specific options for a different ISA, so that we don't get scope
30458 errors if a builtin is added in the middle of a function scope. */
30459
30460 static inline tree
30461 def_builtin (HOST_WIDE_INT mask, const char *name,
30462 enum ix86_builtin_func_type tcode,
30463 enum ix86_builtins code)
30464 {
30465 tree decl = NULL_TREE;
30466
30467 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30468 {
30469 ix86_builtins_isa[(int) code].isa = mask;
30470
30471 mask &= ~OPTION_MASK_ISA_64BIT;
30472 if (mask == 0
30473 || (mask & ix86_isa_flags) != 0
30474 || (lang_hooks.builtin_function
30475 == lang_hooks.builtin_function_ext_scope))
30476
30477 {
30478 tree type = ix86_get_builtin_func_type (tcode);
30479 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30480 NULL, NULL_TREE);
30481 ix86_builtins[(int) code] = decl;
30482 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30483 }
30484 else
30485 {
30486 ix86_builtins[(int) code] = NULL_TREE;
30487 ix86_builtins_isa[(int) code].tcode = tcode;
30488 ix86_builtins_isa[(int) code].name = name;
30489 ix86_builtins_isa[(int) code].leaf_p = false;
30490 ix86_builtins_isa[(int) code].nothrow_p = false;
30491 ix86_builtins_isa[(int) code].const_p = false;
30492 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30493 }
30494 }
30495
30496 return decl;
30497 }
30498
30499 /* Like def_builtin, but also marks the function decl "const". */
30500
30501 static inline tree
30502 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30503 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30504 {
30505 tree decl = def_builtin (mask, name, tcode, code);
30506 if (decl)
30507 TREE_READONLY (decl) = 1;
30508 else
30509 ix86_builtins_isa[(int) code].const_p = true;
30510
30511 return decl;
30512 }
30513
30514 /* Add any new builtin functions for a given ISA that may not have been
30515 declared. This saves a bit of space compared to adding all of the
30516 declarations to the tree, even if we didn't use them. */
30517
30518 static void
30519 ix86_add_new_builtins (HOST_WIDE_INT isa)
30520 {
30521 int i;
30522
30523 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30524 {
30525 if ((ix86_builtins_isa[i].isa & isa) != 0
30526 && ix86_builtins_isa[i].set_and_not_built_p)
30527 {
30528 tree decl, type;
30529
30530 /* Don't define the builtin again. */
30531 ix86_builtins_isa[i].set_and_not_built_p = false;
30532
30533 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30534 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30535 type, i, BUILT_IN_MD, NULL,
30536 NULL_TREE);
30537
30538 ix86_builtins[i] = decl;
30539 if (ix86_builtins_isa[i].const_p)
30540 TREE_READONLY (decl) = 1;
30541 if (ix86_builtins_isa[i].leaf_p)
30542 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30543 NULL_TREE);
30544 if (ix86_builtins_isa[i].nothrow_p)
30545 TREE_NOTHROW (decl) = 1;
30546 }
30547 }
30548 }
30549
30550 /* Bits for builtin_description.flag. */
30551
30552 /* Set when we don't support the comparison natively, and should
30553 swap_comparison in order to support it. */
30554 #define BUILTIN_DESC_SWAP_OPERANDS 1
30555
30556 struct builtin_description
30557 {
30558 const HOST_WIDE_INT mask;
30559 const enum insn_code icode;
30560 const char *const name;
30561 const enum ix86_builtins code;
30562 const enum rtx_code comparison;
30563 const int flag;
30564 };
30565
30566 static const struct builtin_description bdesc_comi[] =
30567 {
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30592 };
30593
30594 static const struct builtin_description bdesc_pcmpestr[] =
30595 {
30596 /* SSE4.2 */
30597 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30598 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30599 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30600 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30601 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30602 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30603 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30604 };
30605
30606 static const struct builtin_description bdesc_pcmpistr[] =
30607 {
30608 /* SSE4.2 */
30609 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30610 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30611 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30612 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30613 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30614 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30615 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30616 };
30617
30618 /* Special builtins with variable number of arguments. */
30619 static const struct builtin_description bdesc_special_args[] =
30620 {
30621 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30622 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30623 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30624
30625 /* 80387 (for use internally for atomic compound assignment). */
30626 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30627 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30628 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30629 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30630
30631 /* MMX */
30632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30633
30634 /* 3DNow! */
30635 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30636
30637 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30638 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30639 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30640 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30641 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30642 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30643 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30644 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30645 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30646
30647 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30648 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30649 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30650 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30651 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30652 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30653 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30654 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30655
30656 /* SSE */
30657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30660
30661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30665
30666 /* SSE or 3DNow!A */
30667 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30668 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30669
30670 /* SSE2 */
30671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30678 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30681
30682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30684
30685 /* SSE3 */
30686 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30687
30688 /* SSE4.1 */
30689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30690
30691 /* SSE4A */
30692 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30693 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30694
30695 /* AVX */
30696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30698
30699 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30700 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30701 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30704
30705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30712
30713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30716
30717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30725
30726 /* AVX2 */
30727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30736
30737 /* AVX512F */
30738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30785
30786 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30787 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30788 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30789 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30790 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30791 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30792
30793 /* FSGSBASE */
30794 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30795 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30796 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30797 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30798 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30799 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30800 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30801 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30802
30803 /* RTM */
30804 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30805 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30806 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30807
30808 /* AVX512BW */
30809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30813
30814 /* AVX512VL */
30815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30817 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30818 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30853 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30909
30910 /* PCOMMIT. */
30911 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30912 };
30913
30914 /* Builtins with variable number of arguments. */
30915 static const struct builtin_description bdesc_args[] =
30916 {
30917 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30918 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30919 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30920 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30921 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30922 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30923 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30924
30925 /* MMX */
30926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30932
30933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30941
30942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30944
30945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30949
30950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30956
30957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30963
30964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30967
30968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30969
30970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30976
30977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30983
30984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30988
30989 /* 3DNow! */
30990 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30991 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30992 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30993 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30994
30995 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30996 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30997 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30998 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31000 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31001 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31002 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31003 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31004 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31005 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31006 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31007 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31008 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31009 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31010
31011 /* 3DNow!A */
31012 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31013 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31014 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31015 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31016 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31017 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31018
31019 /* SSE */
31020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31022 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31024 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31028 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31032
31033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31034
31035 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31036 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31037 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31043
31044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31064
31065 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31066 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31069
31070 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31072 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31073 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31074
31075 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31076
31077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31080 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31081 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31082
31083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31085 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31086
31087 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31088
31089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31092
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31095
31096 /* SSE MMX or 3Dnow!A */
31097 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31098 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31099 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31100
31101 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31102 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31103 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31104 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31105
31106 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31107 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31108
31109 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31110
31111 /* SSE2 */
31112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31113
31114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31118 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31119
31120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31125
31126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31127
31128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31130 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31131 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31132
31133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31135 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31136
31137 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31138 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31139 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31140 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31145
31146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31166
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31168 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31171
31172 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31174 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31176
31177 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31178
31179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31182
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31184
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31193
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31202
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31205
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31210
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31213
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31220
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31225
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31234
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31238
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31241
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31243 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31244
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31246
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31248 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31251
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31259
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31267
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31272
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31276
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31278
31279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31280
31281 /* SSE2 MMX */
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31284
31285 /* SSE3 */
31286 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31287 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31288
31289 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31290 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31291 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31292 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31293 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31294 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31295
31296 /* SSSE3 */
31297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31303
31304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31313 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31318 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31319 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31320 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31323 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31324 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31326 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31327 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31328
31329 /* SSSE3. */
31330 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31331 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31332
31333 /* SSE4.1 */
31334 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31335 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31336 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31339 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31344
31345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31358
31359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31371
31372 /* SSE4.1 */
31373 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31374 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31375 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31376 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31377
31378 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31379 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31380 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31381 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31382
31383 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31384 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31385
31386 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31387 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31388
31389 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31390 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31391 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31392 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31393
31394 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31395 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31396
31397 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31398 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31399
31400 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31401 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31402 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31403
31404 /* SSE4.2 */
31405 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31406 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31407 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31408 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31409 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31410
31411 /* SSE4A */
31412 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31413 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31414 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31415 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31416
31417 /* AES */
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31420
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31425
31426 /* PCLMUL */
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31428
31429 /* AVX */
31430 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31431 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31434 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31435 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31438 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31444 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31445 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31446 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31447 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31448 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31449 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31450 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31451 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31452 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31453 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31454 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31455 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31456
31457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31461
31462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31479 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31496
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31500
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31506
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31508
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31511
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31516
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31519
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31522
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31527
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31530
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31533
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31538
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31545
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31561
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31564
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31567
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31569
31570 /* AVX2 */
31571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31572 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31573 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31574 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31579 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31580 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31581 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31582 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31588 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31610 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31611 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31612 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31613 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31614 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31615 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31616 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31617 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31717
31718 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31719
31720 /* BMI */
31721 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31722 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31723 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31724
31725 /* TBM */
31726 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31727 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31728
31729 /* F16C */
31730 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31731 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31732 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31733 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31734
31735 /* BMI2 */
31736 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31737 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31738 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31739 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31740 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31741 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31742
31743 /* AVX512F */
31744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31799 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31800 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31802 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31803 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31911 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31912 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31914 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31941
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31946 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31950
31951 /* Mask arithmetic operations */
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31962
31963 /* SHA */
31964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31971
31972 /* AVX512VL. */
31973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31985 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31986 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32011 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32012 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32013 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32015 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32016 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32018 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32019 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32020 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32021 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32022 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32023 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32028 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32030 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32031 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32032 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32033 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32034 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32035 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32036 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32037 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32040 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32041 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32042 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32077 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32078 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32080 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32081 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32085 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32086 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32090 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32102 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32117 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32118 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32119 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32120 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32121 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32122 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32123 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32213 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32238 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32239 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32240 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32241 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32243 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32244 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32245 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32246 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32247 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32248 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32249 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32295 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32357 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32358 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32374 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32385 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32386 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32390 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32391 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32450 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32452 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32453 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32454 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32455 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32467 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32468 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32469 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32470 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32475 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32476 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32477 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32478 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32483 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32484 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32485 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32486 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32519 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32520 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32521 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32522 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32578 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32579 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32580 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32581 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32582 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32590 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32591 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32592 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32593 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32615 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32624 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32687
32688 /* AVX512DQ. */
32689 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32690 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32691 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32692 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32693 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32694 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32695 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32696 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32697 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32698 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32699 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32700 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32701 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32702 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32703 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32704 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32705 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32706 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32707 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32708 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32709 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32710 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32711 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32712 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32713 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32714 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32715 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32716 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32717 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32718 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32719 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32720
32721 /* AVX512BW. */
32722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32741 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32746 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32747 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32748 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32749 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32750 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32751 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32813
32814 /* AVX512IFMA */
32815 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32816 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32817 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32818 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32819 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32820 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32821 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32822 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32823 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32824 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32825 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32826 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32827
32828 /* AVX512VBMI */
32829 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32831 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32832 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32833 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32834 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32835 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32836 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32837 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32838 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32839 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32840 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32841 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32842 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32843 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32844 };
32845
32846 /* Builtins with rounding support. */
32847 static const struct builtin_description bdesc_round_args[] =
32848 {
32849 /* AVX512F */
32850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32869 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32871 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32878 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32880 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32930 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32932 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32934 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32936 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32938 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32940 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32942 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32944 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32969
32970 /* AVX512ER */
32971 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32972 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32973 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32974 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32975 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32976 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32977 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32978 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32979 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32980 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32981
32982 /* AVX512DQ. */
32983 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32984 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32985 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32986 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32987 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32988 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32989 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32990 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32991 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32992 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32993 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32994 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32995 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32996 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32997 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32998 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
32999 };
33000
33001 /* Bultins for MPX. */
33002 static const struct builtin_description bdesc_mpx[] =
33003 {
33004 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33005 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33006 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33007 };
33008
33009 /* Const builtins for MPX. */
33010 static const struct builtin_description bdesc_mpx_const[] =
33011 {
33012 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33013 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33014 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33015 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33016 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33017 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33018 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33019 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33020 };
33021
33022 /* FMA4 and XOP. */
33023 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33024 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33025 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33026 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33027 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33028 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33029 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33030 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33031 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33032 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33033 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33034 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33035 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33036 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33037 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33038 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33039 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33040 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33041 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33042 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33043 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33044 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33045 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33046 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33047 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33048 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33049 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33050 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33051 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33052 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33053 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33054 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33055 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33056 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33057 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33058 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33059 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33060 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33061 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33062 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33063 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33064 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33065 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33066 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33067 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33068 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33069 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33070 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33071 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33072 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33073 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33074 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33075
33076 static const struct builtin_description bdesc_multi_arg[] =
33077 {
33078 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33079 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33080 UNKNOWN, (int)MULTI_ARG_3_SF },
33081 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33082 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33083 UNKNOWN, (int)MULTI_ARG_3_DF },
33084
33085 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33086 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33087 UNKNOWN, (int)MULTI_ARG_3_SF },
33088 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33089 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33090 UNKNOWN, (int)MULTI_ARG_3_DF },
33091
33092 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33093 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33094 UNKNOWN, (int)MULTI_ARG_3_SF },
33095 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33096 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33097 UNKNOWN, (int)MULTI_ARG_3_DF },
33098 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33099 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33100 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33101 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33102 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33103 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33104
33105 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33106 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33107 UNKNOWN, (int)MULTI_ARG_3_SF },
33108 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33109 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33110 UNKNOWN, (int)MULTI_ARG_3_DF },
33111 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33112 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33113 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33114 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33115 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33116 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33117
33118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33125
33126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33133
33134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33135
33136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33148
33149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33165
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33172
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33188
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33196
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33204
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33212
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33220
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33228
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33236
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33244
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33252
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33261
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33270
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33275
33276 };
33277 \f
33278 /* TM vector builtins. */
33279
33280 /* Reuse the existing x86-specific `struct builtin_description' cause
33281 we're lazy. Add casts to make them fit. */
33282 static const struct builtin_description bdesc_tm[] =
33283 {
33284 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33285 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33286 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33287 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33288 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33289 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33290 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33291
33292 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33293 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33294 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33295 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33296 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33297 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33298 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33299
33300 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33301 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33302 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33303 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33304 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33305 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33306 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33307
33308 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33309 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33310 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33311 };
33312
33313 /* TM callbacks. */
33314
33315 /* Return the builtin decl needed to load a vector of TYPE. */
33316
33317 static tree
33318 ix86_builtin_tm_load (tree type)
33319 {
33320 if (TREE_CODE (type) == VECTOR_TYPE)
33321 {
33322 switch (tree_to_uhwi (TYPE_SIZE (type)))
33323 {
33324 case 64:
33325 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33326 case 128:
33327 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33328 case 256:
33329 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33330 }
33331 }
33332 return NULL_TREE;
33333 }
33334
33335 /* Return the builtin decl needed to store a vector of TYPE. */
33336
33337 static tree
33338 ix86_builtin_tm_store (tree type)
33339 {
33340 if (TREE_CODE (type) == VECTOR_TYPE)
33341 {
33342 switch (tree_to_uhwi (TYPE_SIZE (type)))
33343 {
33344 case 64:
33345 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33346 case 128:
33347 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33348 case 256:
33349 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33350 }
33351 }
33352 return NULL_TREE;
33353 }
33354 \f
33355 /* Initialize the transactional memory vector load/store builtins. */
33356
33357 static void
33358 ix86_init_tm_builtins (void)
33359 {
33360 enum ix86_builtin_func_type ftype;
33361 const struct builtin_description *d;
33362 size_t i;
33363 tree decl;
33364 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33365 tree attrs_log, attrs_type_log;
33366
33367 if (!flag_tm)
33368 return;
33369
33370 /* If there are no builtins defined, we must be compiling in a
33371 language without trans-mem support. */
33372 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33373 return;
33374
33375 /* Use whatever attributes a normal TM load has. */
33376 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33377 attrs_load = DECL_ATTRIBUTES (decl);
33378 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33379 /* Use whatever attributes a normal TM store has. */
33380 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33381 attrs_store = DECL_ATTRIBUTES (decl);
33382 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33383 /* Use whatever attributes a normal TM log has. */
33384 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33385 attrs_log = DECL_ATTRIBUTES (decl);
33386 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33387
33388 for (i = 0, d = bdesc_tm;
33389 i < ARRAY_SIZE (bdesc_tm);
33390 i++, d++)
33391 {
33392 if ((d->mask & ix86_isa_flags) != 0
33393 || (lang_hooks.builtin_function
33394 == lang_hooks.builtin_function_ext_scope))
33395 {
33396 tree type, attrs, attrs_type;
33397 enum built_in_function code = (enum built_in_function) d->code;
33398
33399 ftype = (enum ix86_builtin_func_type) d->flag;
33400 type = ix86_get_builtin_func_type (ftype);
33401
33402 if (BUILTIN_TM_LOAD_P (code))
33403 {
33404 attrs = attrs_load;
33405 attrs_type = attrs_type_load;
33406 }
33407 else if (BUILTIN_TM_STORE_P (code))
33408 {
33409 attrs = attrs_store;
33410 attrs_type = attrs_type_store;
33411 }
33412 else
33413 {
33414 attrs = attrs_log;
33415 attrs_type = attrs_type_log;
33416 }
33417 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33418 /* The builtin without the prefix for
33419 calling it directly. */
33420 d->name + strlen ("__builtin_"),
33421 attrs);
33422 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33423 set the TYPE_ATTRIBUTES. */
33424 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33425
33426 set_builtin_decl (code, decl, false);
33427 }
33428 }
33429 }
33430
33431 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33432 in the current target ISA to allow the user to compile particular modules
33433 with different target specific options that differ from the command line
33434 options. */
33435 static void
33436 ix86_init_mmx_sse_builtins (void)
33437 {
33438 const struct builtin_description * d;
33439 enum ix86_builtin_func_type ftype;
33440 size_t i;
33441
33442 /* Add all special builtins with variable number of operands. */
33443 for (i = 0, d = bdesc_special_args;
33444 i < ARRAY_SIZE (bdesc_special_args);
33445 i++, d++)
33446 {
33447 if (d->name == 0)
33448 continue;
33449
33450 ftype = (enum ix86_builtin_func_type) d->flag;
33451 def_builtin (d->mask, d->name, ftype, d->code);
33452 }
33453
33454 /* Add all builtins with variable number of operands. */
33455 for (i = 0, d = bdesc_args;
33456 i < ARRAY_SIZE (bdesc_args);
33457 i++, d++)
33458 {
33459 if (d->name == 0)
33460 continue;
33461
33462 ftype = (enum ix86_builtin_func_type) d->flag;
33463 def_builtin_const (d->mask, d->name, ftype, d->code);
33464 }
33465
33466 /* Add all builtins with rounding. */
33467 for (i = 0, d = bdesc_round_args;
33468 i < ARRAY_SIZE (bdesc_round_args);
33469 i++, d++)
33470 {
33471 if (d->name == 0)
33472 continue;
33473
33474 ftype = (enum ix86_builtin_func_type) d->flag;
33475 def_builtin_const (d->mask, d->name, ftype, d->code);
33476 }
33477
33478 /* pcmpestr[im] insns. */
33479 for (i = 0, d = bdesc_pcmpestr;
33480 i < ARRAY_SIZE (bdesc_pcmpestr);
33481 i++, d++)
33482 {
33483 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33484 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33485 else
33486 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33487 def_builtin_const (d->mask, d->name, ftype, d->code);
33488 }
33489
33490 /* pcmpistr[im] insns. */
33491 for (i = 0, d = bdesc_pcmpistr;
33492 i < ARRAY_SIZE (bdesc_pcmpistr);
33493 i++, d++)
33494 {
33495 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33496 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33497 else
33498 ftype = INT_FTYPE_V16QI_V16QI_INT;
33499 def_builtin_const (d->mask, d->name, ftype, d->code);
33500 }
33501
33502 /* comi/ucomi insns. */
33503 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33504 {
33505 if (d->mask == OPTION_MASK_ISA_SSE2)
33506 ftype = INT_FTYPE_V2DF_V2DF;
33507 else
33508 ftype = INT_FTYPE_V4SF_V4SF;
33509 def_builtin_const (d->mask, d->name, ftype, d->code);
33510 }
33511
33512 /* SSE */
33513 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33514 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33515 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33516 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33517
33518 /* SSE or 3DNow!A */
33519 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33520 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33521 IX86_BUILTIN_MASKMOVQ);
33522
33523 /* SSE2 */
33524 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33525 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33526
33527 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33528 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33529 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33530 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33531
33532 /* SSE3. */
33533 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33534 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33535 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33536 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33537
33538 /* AES */
33539 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33540 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33541 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33542 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33543 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33544 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33545 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33546 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33547 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33548 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33549 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33550 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33551
33552 /* PCLMUL */
33553 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33554 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33555
33556 /* RDRND */
33557 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33558 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33559 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33560 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33561 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33562 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33563 IX86_BUILTIN_RDRAND64_STEP);
33564
33565 /* AVX2 */
33566 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33567 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33568 IX86_BUILTIN_GATHERSIV2DF);
33569
33570 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33571 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33572 IX86_BUILTIN_GATHERSIV4DF);
33573
33574 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33575 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33576 IX86_BUILTIN_GATHERDIV2DF);
33577
33578 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33579 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33580 IX86_BUILTIN_GATHERDIV4DF);
33581
33582 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33583 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33584 IX86_BUILTIN_GATHERSIV4SF);
33585
33586 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33587 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33588 IX86_BUILTIN_GATHERSIV8SF);
33589
33590 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33591 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33592 IX86_BUILTIN_GATHERDIV4SF);
33593
33594 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33595 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33596 IX86_BUILTIN_GATHERDIV8SF);
33597
33598 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33599 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33600 IX86_BUILTIN_GATHERSIV2DI);
33601
33602 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33603 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33604 IX86_BUILTIN_GATHERSIV4DI);
33605
33606 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33607 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33608 IX86_BUILTIN_GATHERDIV2DI);
33609
33610 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33611 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33612 IX86_BUILTIN_GATHERDIV4DI);
33613
33614 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33615 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33616 IX86_BUILTIN_GATHERSIV4SI);
33617
33618 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33619 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33620 IX86_BUILTIN_GATHERSIV8SI);
33621
33622 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33623 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33624 IX86_BUILTIN_GATHERDIV4SI);
33625
33626 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33627 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33628 IX86_BUILTIN_GATHERDIV8SI);
33629
33630 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33631 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33632 IX86_BUILTIN_GATHERALTSIV4DF);
33633
33634 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33635 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33636 IX86_BUILTIN_GATHERALTDIV8SF);
33637
33638 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33639 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33640 IX86_BUILTIN_GATHERALTSIV4DI);
33641
33642 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33643 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33644 IX86_BUILTIN_GATHERALTDIV8SI);
33645
33646 /* AVX512F */
33647 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33648 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33649 IX86_BUILTIN_GATHER3SIV16SF);
33650
33651 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33652 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33653 IX86_BUILTIN_GATHER3SIV8DF);
33654
33655 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33656 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33657 IX86_BUILTIN_GATHER3DIV16SF);
33658
33659 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33660 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33661 IX86_BUILTIN_GATHER3DIV8DF);
33662
33663 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33664 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33665 IX86_BUILTIN_GATHER3SIV16SI);
33666
33667 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33668 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33669 IX86_BUILTIN_GATHER3SIV8DI);
33670
33671 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33672 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33673 IX86_BUILTIN_GATHER3DIV16SI);
33674
33675 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33676 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33677 IX86_BUILTIN_GATHER3DIV8DI);
33678
33679 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33680 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33681 IX86_BUILTIN_GATHER3ALTSIV8DF);
33682
33683 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33684 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33685 IX86_BUILTIN_GATHER3ALTDIV16SF);
33686
33687 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33688 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33689 IX86_BUILTIN_GATHER3ALTSIV8DI);
33690
33691 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33692 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33693 IX86_BUILTIN_GATHER3ALTDIV16SI);
33694
33695 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33696 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33697 IX86_BUILTIN_SCATTERSIV16SF);
33698
33699 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33700 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33701 IX86_BUILTIN_SCATTERSIV8DF);
33702
33703 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33704 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33705 IX86_BUILTIN_SCATTERDIV16SF);
33706
33707 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33708 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33709 IX86_BUILTIN_SCATTERDIV8DF);
33710
33711 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33712 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33713 IX86_BUILTIN_SCATTERSIV16SI);
33714
33715 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33716 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33717 IX86_BUILTIN_SCATTERSIV8DI);
33718
33719 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33720 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33721 IX86_BUILTIN_SCATTERDIV16SI);
33722
33723 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33724 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33725 IX86_BUILTIN_SCATTERDIV8DI);
33726
33727 /* AVX512VL */
33728 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33729 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33730 IX86_BUILTIN_GATHER3SIV2DF);
33731
33732 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33733 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33734 IX86_BUILTIN_GATHER3SIV4DF);
33735
33736 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33737 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33738 IX86_BUILTIN_GATHER3DIV2DF);
33739
33740 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33741 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33742 IX86_BUILTIN_GATHER3DIV4DF);
33743
33744 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33745 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33746 IX86_BUILTIN_GATHER3SIV4SF);
33747
33748 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33749 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33750 IX86_BUILTIN_GATHER3SIV8SF);
33751
33752 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33753 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33754 IX86_BUILTIN_GATHER3DIV4SF);
33755
33756 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33757 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33758 IX86_BUILTIN_GATHER3DIV8SF);
33759
33760 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33761 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33762 IX86_BUILTIN_GATHER3SIV2DI);
33763
33764 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33765 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33766 IX86_BUILTIN_GATHER3SIV4DI);
33767
33768 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33769 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33770 IX86_BUILTIN_GATHER3DIV2DI);
33771
33772 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33773 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33774 IX86_BUILTIN_GATHER3DIV4DI);
33775
33776 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33777 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33778 IX86_BUILTIN_GATHER3SIV4SI);
33779
33780 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33781 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33782 IX86_BUILTIN_GATHER3SIV8SI);
33783
33784 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33785 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33786 IX86_BUILTIN_GATHER3DIV4SI);
33787
33788 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33789 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33790 IX86_BUILTIN_GATHER3DIV8SI);
33791
33792 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33793 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33794 IX86_BUILTIN_GATHER3ALTSIV4DF);
33795
33796 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33797 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33798 IX86_BUILTIN_GATHER3ALTDIV8SF);
33799
33800 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33801 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33802 IX86_BUILTIN_GATHER3ALTSIV4DI);
33803
33804 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33805 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33806 IX86_BUILTIN_GATHER3ALTDIV8SI);
33807
33808 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33809 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33810 IX86_BUILTIN_SCATTERSIV8SF);
33811
33812 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33813 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33814 IX86_BUILTIN_SCATTERSIV4SF);
33815
33816 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33817 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33818 IX86_BUILTIN_SCATTERSIV4DF);
33819
33820 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33821 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33822 IX86_BUILTIN_SCATTERSIV2DF);
33823
33824 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33825 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33826 IX86_BUILTIN_SCATTERDIV8SF);
33827
33828 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33829 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33830 IX86_BUILTIN_SCATTERDIV4SF);
33831
33832 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33833 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33834 IX86_BUILTIN_SCATTERDIV4DF);
33835
33836 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33837 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33838 IX86_BUILTIN_SCATTERDIV2DF);
33839
33840 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33841 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33842 IX86_BUILTIN_SCATTERSIV8SI);
33843
33844 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33845 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33846 IX86_BUILTIN_SCATTERSIV4SI);
33847
33848 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33849 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33850 IX86_BUILTIN_SCATTERSIV4DI);
33851
33852 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33853 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33854 IX86_BUILTIN_SCATTERSIV2DI);
33855
33856 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33857 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33858 IX86_BUILTIN_SCATTERDIV8SI);
33859
33860 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33861 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33862 IX86_BUILTIN_SCATTERDIV4SI);
33863
33864 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33865 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33866 IX86_BUILTIN_SCATTERDIV4DI);
33867
33868 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33869 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33870 IX86_BUILTIN_SCATTERDIV2DI);
33871
33872 /* AVX512PF */
33873 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33874 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33875 IX86_BUILTIN_GATHERPFDPD);
33876 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33877 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33878 IX86_BUILTIN_GATHERPFDPS);
33879 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33880 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33881 IX86_BUILTIN_GATHERPFQPD);
33882 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33883 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33884 IX86_BUILTIN_GATHERPFQPS);
33885 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33886 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33887 IX86_BUILTIN_SCATTERPFDPD);
33888 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33889 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33890 IX86_BUILTIN_SCATTERPFDPS);
33891 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33892 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33893 IX86_BUILTIN_SCATTERPFQPD);
33894 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33895 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33896 IX86_BUILTIN_SCATTERPFQPS);
33897
33898 /* SHA */
33899 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33900 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33901 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33902 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33903 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33904 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33905 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33906 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33907 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33908 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33909 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33910 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33911 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33912 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33913
33914 /* RTM. */
33915 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33916 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33917
33918 /* MMX access to the vec_init patterns. */
33919 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33920 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33921
33922 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33923 V4HI_FTYPE_HI_HI_HI_HI,
33924 IX86_BUILTIN_VEC_INIT_V4HI);
33925
33926 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33927 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33928 IX86_BUILTIN_VEC_INIT_V8QI);
33929
33930 /* Access to the vec_extract patterns. */
33931 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33932 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33933 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33934 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33935 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33936 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33937 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33938 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33939 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33940 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33941
33942 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33943 "__builtin_ia32_vec_ext_v4hi",
33944 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33945
33946 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33947 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33948
33949 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33950 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33951
33952 /* Access to the vec_set patterns. */
33953 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33954 "__builtin_ia32_vec_set_v2di",
33955 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33956
33957 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33958 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33959
33960 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33961 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33962
33963 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33964 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33965
33966 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33967 "__builtin_ia32_vec_set_v4hi",
33968 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33969
33970 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33971 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33972
33973 /* RDSEED */
33974 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33975 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33976 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33977 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33978 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33979 "__builtin_ia32_rdseed_di_step",
33980 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33981
33982 /* ADCX */
33983 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33984 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33985 def_builtin (OPTION_MASK_ISA_64BIT,
33986 "__builtin_ia32_addcarryx_u64",
33987 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33988 IX86_BUILTIN_ADDCARRYX64);
33989
33990 /* SBB */
33991 def_builtin (0, "__builtin_ia32_sbb_u32",
33992 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
33993 def_builtin (OPTION_MASK_ISA_64BIT,
33994 "__builtin_ia32_sbb_u64",
33995 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33996 IX86_BUILTIN_SBB64);
33997
33998 /* Read/write FLAGS. */
33999 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34000 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34001 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34002 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34003 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34004 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34005 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34006 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34007
34008 /* CLFLUSHOPT. */
34009 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34010 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34011
34012 /* CLWB. */
34013 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34014 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34015
34016 /* Add FMA4 multi-arg argument instructions */
34017 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34018 {
34019 if (d->name == 0)
34020 continue;
34021
34022 ftype = (enum ix86_builtin_func_type) d->flag;
34023 def_builtin_const (d->mask, d->name, ftype, d->code);
34024 }
34025 }
34026
34027 static void
34028 ix86_init_mpx_builtins ()
34029 {
34030 const struct builtin_description * d;
34031 enum ix86_builtin_func_type ftype;
34032 tree decl;
34033 size_t i;
34034
34035 for (i = 0, d = bdesc_mpx;
34036 i < ARRAY_SIZE (bdesc_mpx);
34037 i++, d++)
34038 {
34039 if (d->name == 0)
34040 continue;
34041
34042 ftype = (enum ix86_builtin_func_type) d->flag;
34043 decl = def_builtin (d->mask, d->name, ftype, d->code);
34044
34045 /* With no leaf and nothrow flags for MPX builtins
34046 abnormal edges may follow its call when setjmp
34047 presents in the function. Since we may have a lot
34048 of MPX builtins calls it causes lots of useless
34049 edges and enormous PHI nodes. To avoid this we mark
34050 MPX builtins as leaf and nothrow. */
34051 if (decl)
34052 {
34053 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34054 NULL_TREE);
34055 TREE_NOTHROW (decl) = 1;
34056 }
34057 else
34058 {
34059 ix86_builtins_isa[(int)d->code].leaf_p = true;
34060 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34061 }
34062 }
34063
34064 for (i = 0, d = bdesc_mpx_const;
34065 i < ARRAY_SIZE (bdesc_mpx_const);
34066 i++, d++)
34067 {
34068 if (d->name == 0)
34069 continue;
34070
34071 ftype = (enum ix86_builtin_func_type) d->flag;
34072 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34073
34074 if (decl)
34075 {
34076 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34077 NULL_TREE);
34078 TREE_NOTHROW (decl) = 1;
34079 }
34080 else
34081 {
34082 ix86_builtins_isa[(int)d->code].leaf_p = true;
34083 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34084 }
34085 }
34086 }
34087
34088 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34089 to return a pointer to VERSION_DECL if the outcome of the expression
34090 formed by PREDICATE_CHAIN is true. This function will be called during
34091 version dispatch to decide which function version to execute. It returns
34092 the basic block at the end, to which more conditions can be added. */
34093
34094 static basic_block
34095 add_condition_to_bb (tree function_decl, tree version_decl,
34096 tree predicate_chain, basic_block new_bb)
34097 {
34098 gimple return_stmt;
34099 tree convert_expr, result_var;
34100 gimple convert_stmt;
34101 gimple call_cond_stmt;
34102 gimple if_else_stmt;
34103
34104 basic_block bb1, bb2, bb3;
34105 edge e12, e23;
34106
34107 tree cond_var, and_expr_var = NULL_TREE;
34108 gimple_seq gseq;
34109
34110 tree predicate_decl, predicate_arg;
34111
34112 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34113
34114 gcc_assert (new_bb != NULL);
34115 gseq = bb_seq (new_bb);
34116
34117
34118 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34119 build_fold_addr_expr (version_decl));
34120 result_var = create_tmp_var (ptr_type_node, NULL);
34121 convert_stmt = gimple_build_assign (result_var, convert_expr);
34122 return_stmt = gimple_build_return (result_var);
34123
34124 if (predicate_chain == NULL_TREE)
34125 {
34126 gimple_seq_add_stmt (&gseq, convert_stmt);
34127 gimple_seq_add_stmt (&gseq, return_stmt);
34128 set_bb_seq (new_bb, gseq);
34129 gimple_set_bb (convert_stmt, new_bb);
34130 gimple_set_bb (return_stmt, new_bb);
34131 pop_cfun ();
34132 return new_bb;
34133 }
34134
34135 while (predicate_chain != NULL)
34136 {
34137 cond_var = create_tmp_var (integer_type_node, NULL);
34138 predicate_decl = TREE_PURPOSE (predicate_chain);
34139 predicate_arg = TREE_VALUE (predicate_chain);
34140 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34141 gimple_call_set_lhs (call_cond_stmt, cond_var);
34142
34143 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34144 gimple_set_bb (call_cond_stmt, new_bb);
34145 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34146
34147 predicate_chain = TREE_CHAIN (predicate_chain);
34148
34149 if (and_expr_var == NULL)
34150 and_expr_var = cond_var;
34151 else
34152 {
34153 gimple assign_stmt;
34154 /* Use MIN_EXPR to check if any integer is zero?.
34155 and_expr_var = min_expr <cond_var, and_expr_var> */
34156 assign_stmt = gimple_build_assign (and_expr_var,
34157 build2 (MIN_EXPR, integer_type_node,
34158 cond_var, and_expr_var));
34159
34160 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34161 gimple_set_bb (assign_stmt, new_bb);
34162 gimple_seq_add_stmt (&gseq, assign_stmt);
34163 }
34164 }
34165
34166 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34167 integer_zero_node,
34168 NULL_TREE, NULL_TREE);
34169 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34170 gimple_set_bb (if_else_stmt, new_bb);
34171 gimple_seq_add_stmt (&gseq, if_else_stmt);
34172
34173 gimple_seq_add_stmt (&gseq, convert_stmt);
34174 gimple_seq_add_stmt (&gseq, return_stmt);
34175 set_bb_seq (new_bb, gseq);
34176
34177 bb1 = new_bb;
34178 e12 = split_block (bb1, if_else_stmt);
34179 bb2 = e12->dest;
34180 e12->flags &= ~EDGE_FALLTHRU;
34181 e12->flags |= EDGE_TRUE_VALUE;
34182
34183 e23 = split_block (bb2, return_stmt);
34184
34185 gimple_set_bb (convert_stmt, bb2);
34186 gimple_set_bb (return_stmt, bb2);
34187
34188 bb3 = e23->dest;
34189 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34190
34191 remove_edge (e23);
34192 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34193
34194 pop_cfun ();
34195
34196 return bb3;
34197 }
34198
34199 /* This parses the attribute arguments to target in DECL and determines
34200 the right builtin to use to match the platform specification.
34201 It returns the priority value for this version decl. If PREDICATE_LIST
34202 is not NULL, it stores the list of cpu features that need to be checked
34203 before dispatching this function. */
34204
34205 static unsigned int
34206 get_builtin_code_for_version (tree decl, tree *predicate_list)
34207 {
34208 tree attrs;
34209 struct cl_target_option cur_target;
34210 tree target_node;
34211 struct cl_target_option *new_target;
34212 const char *arg_str = NULL;
34213 const char *attrs_str = NULL;
34214 char *tok_str = NULL;
34215 char *token;
34216
34217 /* Priority of i386 features, greater value is higher priority. This is
34218 used to decide the order in which function dispatch must happen. For
34219 instance, a version specialized for SSE4.2 should be checked for dispatch
34220 before a version for SSE3, as SSE4.2 implies SSE3. */
34221 enum feature_priority
34222 {
34223 P_ZERO = 0,
34224 P_MMX,
34225 P_SSE,
34226 P_SSE2,
34227 P_SSE3,
34228 P_SSSE3,
34229 P_PROC_SSSE3,
34230 P_SSE4_A,
34231 P_PROC_SSE4_A,
34232 P_SSE4_1,
34233 P_SSE4_2,
34234 P_PROC_SSE4_2,
34235 P_POPCNT,
34236 P_AVX,
34237 P_PROC_AVX,
34238 P_FMA4,
34239 P_XOP,
34240 P_PROC_XOP,
34241 P_FMA,
34242 P_PROC_FMA,
34243 P_AVX2,
34244 P_PROC_AVX2
34245 };
34246
34247 enum feature_priority priority = P_ZERO;
34248
34249 /* These are the target attribute strings for which a dispatcher is
34250 available, from fold_builtin_cpu. */
34251
34252 static struct _feature_list
34253 {
34254 const char *const name;
34255 const enum feature_priority priority;
34256 }
34257 const feature_list[] =
34258 {
34259 {"mmx", P_MMX},
34260 {"sse", P_SSE},
34261 {"sse2", P_SSE2},
34262 {"sse3", P_SSE3},
34263 {"sse4a", P_SSE4_A},
34264 {"ssse3", P_SSSE3},
34265 {"sse4.1", P_SSE4_1},
34266 {"sse4.2", P_SSE4_2},
34267 {"popcnt", P_POPCNT},
34268 {"avx", P_AVX},
34269 {"fma4", P_FMA4},
34270 {"xop", P_XOP},
34271 {"fma", P_FMA},
34272 {"avx2", P_AVX2}
34273 };
34274
34275
34276 static unsigned int NUM_FEATURES
34277 = sizeof (feature_list) / sizeof (struct _feature_list);
34278
34279 unsigned int i;
34280
34281 tree predicate_chain = NULL_TREE;
34282 tree predicate_decl, predicate_arg;
34283
34284 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34285 gcc_assert (attrs != NULL);
34286
34287 attrs = TREE_VALUE (TREE_VALUE (attrs));
34288
34289 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34290 attrs_str = TREE_STRING_POINTER (attrs);
34291
34292 /* Return priority zero for default function. */
34293 if (strcmp (attrs_str, "default") == 0)
34294 return 0;
34295
34296 /* Handle arch= if specified. For priority, set it to be 1 more than
34297 the best instruction set the processor can handle. For instance, if
34298 there is a version for atom and a version for ssse3 (the highest ISA
34299 priority for atom), the atom version must be checked for dispatch
34300 before the ssse3 version. */
34301 if (strstr (attrs_str, "arch=") != NULL)
34302 {
34303 cl_target_option_save (&cur_target, &global_options);
34304 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34305 &global_options_set);
34306
34307 gcc_assert (target_node);
34308 new_target = TREE_TARGET_OPTION (target_node);
34309 gcc_assert (new_target);
34310
34311 if (new_target->arch_specified && new_target->arch > 0)
34312 {
34313 switch (new_target->arch)
34314 {
34315 case PROCESSOR_CORE2:
34316 arg_str = "core2";
34317 priority = P_PROC_SSSE3;
34318 break;
34319 case PROCESSOR_NEHALEM:
34320 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34321 arg_str = "westmere";
34322 else
34323 /* We translate "arch=corei7" and "arch=nehalem" to
34324 "corei7" so that it will be mapped to M_INTEL_COREI7
34325 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34326 arg_str = "corei7";
34327 priority = P_PROC_SSE4_2;
34328 break;
34329 case PROCESSOR_SANDYBRIDGE:
34330 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34331 arg_str = "ivybridge";
34332 else
34333 arg_str = "sandybridge";
34334 priority = P_PROC_AVX;
34335 break;
34336 case PROCESSOR_HASWELL:
34337 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34338 arg_str = "broadwell";
34339 else
34340 arg_str = "haswell";
34341 priority = P_PROC_AVX2;
34342 break;
34343 case PROCESSOR_BONNELL:
34344 arg_str = "bonnell";
34345 priority = P_PROC_SSSE3;
34346 break;
34347 case PROCESSOR_SILVERMONT:
34348 arg_str = "silvermont";
34349 priority = P_PROC_SSE4_2;
34350 break;
34351 case PROCESSOR_AMDFAM10:
34352 arg_str = "amdfam10h";
34353 priority = P_PROC_SSE4_A;
34354 break;
34355 case PROCESSOR_BTVER1:
34356 arg_str = "btver1";
34357 priority = P_PROC_SSE4_A;
34358 break;
34359 case PROCESSOR_BTVER2:
34360 arg_str = "btver2";
34361 priority = P_PROC_AVX;
34362 break;
34363 case PROCESSOR_BDVER1:
34364 arg_str = "bdver1";
34365 priority = P_PROC_XOP;
34366 break;
34367 case PROCESSOR_BDVER2:
34368 arg_str = "bdver2";
34369 priority = P_PROC_FMA;
34370 break;
34371 case PROCESSOR_BDVER3:
34372 arg_str = "bdver3";
34373 priority = P_PROC_FMA;
34374 break;
34375 case PROCESSOR_BDVER4:
34376 arg_str = "bdver4";
34377 priority = P_PROC_AVX2;
34378 break;
34379 }
34380 }
34381
34382 cl_target_option_restore (&global_options, &cur_target);
34383
34384 if (predicate_list && arg_str == NULL)
34385 {
34386 error_at (DECL_SOURCE_LOCATION (decl),
34387 "No dispatcher found for the versioning attributes");
34388 return 0;
34389 }
34390
34391 if (predicate_list)
34392 {
34393 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34394 /* For a C string literal the length includes the trailing NULL. */
34395 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34396 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34397 predicate_chain);
34398 }
34399 }
34400
34401 /* Process feature name. */
34402 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34403 strcpy (tok_str, attrs_str);
34404 token = strtok (tok_str, ",");
34405 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34406
34407 while (token != NULL)
34408 {
34409 /* Do not process "arch=" */
34410 if (strncmp (token, "arch=", 5) == 0)
34411 {
34412 token = strtok (NULL, ",");
34413 continue;
34414 }
34415 for (i = 0; i < NUM_FEATURES; ++i)
34416 {
34417 if (strcmp (token, feature_list[i].name) == 0)
34418 {
34419 if (predicate_list)
34420 {
34421 predicate_arg = build_string_literal (
34422 strlen (feature_list[i].name) + 1,
34423 feature_list[i].name);
34424 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34425 predicate_chain);
34426 }
34427 /* Find the maximum priority feature. */
34428 if (feature_list[i].priority > priority)
34429 priority = feature_list[i].priority;
34430
34431 break;
34432 }
34433 }
34434 if (predicate_list && i == NUM_FEATURES)
34435 {
34436 error_at (DECL_SOURCE_LOCATION (decl),
34437 "No dispatcher found for %s", token);
34438 return 0;
34439 }
34440 token = strtok (NULL, ",");
34441 }
34442 free (tok_str);
34443
34444 if (predicate_list && predicate_chain == NULL_TREE)
34445 {
34446 error_at (DECL_SOURCE_LOCATION (decl),
34447 "No dispatcher found for the versioning attributes : %s",
34448 attrs_str);
34449 return 0;
34450 }
34451 else if (predicate_list)
34452 {
34453 predicate_chain = nreverse (predicate_chain);
34454 *predicate_list = predicate_chain;
34455 }
34456
34457 return priority;
34458 }
34459
34460 /* This compares the priority of target features in function DECL1
34461 and DECL2. It returns positive value if DECL1 is higher priority,
34462 negative value if DECL2 is higher priority and 0 if they are the
34463 same. */
34464
34465 static int
34466 ix86_compare_version_priority (tree decl1, tree decl2)
34467 {
34468 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34469 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34470
34471 return (int)priority1 - (int)priority2;
34472 }
34473
34474 /* V1 and V2 point to function versions with different priorities
34475 based on the target ISA. This function compares their priorities. */
34476
34477 static int
34478 feature_compare (const void *v1, const void *v2)
34479 {
34480 typedef struct _function_version_info
34481 {
34482 tree version_decl;
34483 tree predicate_chain;
34484 unsigned int dispatch_priority;
34485 } function_version_info;
34486
34487 const function_version_info c1 = *(const function_version_info *)v1;
34488 const function_version_info c2 = *(const function_version_info *)v2;
34489 return (c2.dispatch_priority - c1.dispatch_priority);
34490 }
34491
34492 /* This function generates the dispatch function for
34493 multi-versioned functions. DISPATCH_DECL is the function which will
34494 contain the dispatch logic. FNDECLS are the function choices for
34495 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34496 in DISPATCH_DECL in which the dispatch code is generated. */
34497
34498 static int
34499 dispatch_function_versions (tree dispatch_decl,
34500 void *fndecls_p,
34501 basic_block *empty_bb)
34502 {
34503 tree default_decl;
34504 gimple ifunc_cpu_init_stmt;
34505 gimple_seq gseq;
34506 int ix;
34507 tree ele;
34508 vec<tree> *fndecls;
34509 unsigned int num_versions = 0;
34510 unsigned int actual_versions = 0;
34511 unsigned int i;
34512
34513 struct _function_version_info
34514 {
34515 tree version_decl;
34516 tree predicate_chain;
34517 unsigned int dispatch_priority;
34518 }*function_version_info;
34519
34520 gcc_assert (dispatch_decl != NULL
34521 && fndecls_p != NULL
34522 && empty_bb != NULL);
34523
34524 /*fndecls_p is actually a vector. */
34525 fndecls = static_cast<vec<tree> *> (fndecls_p);
34526
34527 /* At least one more version other than the default. */
34528 num_versions = fndecls->length ();
34529 gcc_assert (num_versions >= 2);
34530
34531 function_version_info = (struct _function_version_info *)
34532 XNEWVEC (struct _function_version_info, (num_versions - 1));
34533
34534 /* The first version in the vector is the default decl. */
34535 default_decl = (*fndecls)[0];
34536
34537 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34538
34539 gseq = bb_seq (*empty_bb);
34540 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34541 constructors, so explicity call __builtin_cpu_init here. */
34542 ifunc_cpu_init_stmt = gimple_build_call_vec (
34543 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34544 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34545 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34546 set_bb_seq (*empty_bb, gseq);
34547
34548 pop_cfun ();
34549
34550
34551 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34552 {
34553 tree version_decl = ele;
34554 tree predicate_chain = NULL_TREE;
34555 unsigned int priority;
34556 /* Get attribute string, parse it and find the right predicate decl.
34557 The predicate function could be a lengthy combination of many
34558 features, like arch-type and various isa-variants. */
34559 priority = get_builtin_code_for_version (version_decl,
34560 &predicate_chain);
34561
34562 if (predicate_chain == NULL_TREE)
34563 continue;
34564
34565 function_version_info [actual_versions].version_decl = version_decl;
34566 function_version_info [actual_versions].predicate_chain
34567 = predicate_chain;
34568 function_version_info [actual_versions].dispatch_priority = priority;
34569 actual_versions++;
34570 }
34571
34572 /* Sort the versions according to descending order of dispatch priority. The
34573 priority is based on the ISA. This is not a perfect solution. There
34574 could still be ambiguity. If more than one function version is suitable
34575 to execute, which one should be dispatched? In future, allow the user
34576 to specify a dispatch priority next to the version. */
34577 qsort (function_version_info, actual_versions,
34578 sizeof (struct _function_version_info), feature_compare);
34579
34580 for (i = 0; i < actual_versions; ++i)
34581 *empty_bb = add_condition_to_bb (dispatch_decl,
34582 function_version_info[i].version_decl,
34583 function_version_info[i].predicate_chain,
34584 *empty_bb);
34585
34586 /* dispatch default version at the end. */
34587 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34588 NULL, *empty_bb);
34589
34590 free (function_version_info);
34591 return 0;
34592 }
34593
34594 /* Comparator function to be used in qsort routine to sort attribute
34595 specification strings to "target". */
34596
34597 static int
34598 attr_strcmp (const void *v1, const void *v2)
34599 {
34600 const char *c1 = *(char *const*)v1;
34601 const char *c2 = *(char *const*)v2;
34602 return strcmp (c1, c2);
34603 }
34604
34605 /* ARGLIST is the argument to target attribute. This function tokenizes
34606 the comma separated arguments, sorts them and returns a string which
34607 is a unique identifier for the comma separated arguments. It also
34608 replaces non-identifier characters "=,-" with "_". */
34609
34610 static char *
34611 sorted_attr_string (tree arglist)
34612 {
34613 tree arg;
34614 size_t str_len_sum = 0;
34615 char **args = NULL;
34616 char *attr_str, *ret_str;
34617 char *attr = NULL;
34618 unsigned int argnum = 1;
34619 unsigned int i;
34620
34621 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34622 {
34623 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34624 size_t len = strlen (str);
34625 str_len_sum += len + 1;
34626 if (arg != arglist)
34627 argnum++;
34628 for (i = 0; i < strlen (str); i++)
34629 if (str[i] == ',')
34630 argnum++;
34631 }
34632
34633 attr_str = XNEWVEC (char, str_len_sum);
34634 str_len_sum = 0;
34635 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34636 {
34637 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34638 size_t len = strlen (str);
34639 memcpy (attr_str + str_len_sum, str, len);
34640 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34641 str_len_sum += len + 1;
34642 }
34643
34644 /* Replace "=,-" with "_". */
34645 for (i = 0; i < strlen (attr_str); i++)
34646 if (attr_str[i] == '=' || attr_str[i]== '-')
34647 attr_str[i] = '_';
34648
34649 if (argnum == 1)
34650 return attr_str;
34651
34652 args = XNEWVEC (char *, argnum);
34653
34654 i = 0;
34655 attr = strtok (attr_str, ",");
34656 while (attr != NULL)
34657 {
34658 args[i] = attr;
34659 i++;
34660 attr = strtok (NULL, ",");
34661 }
34662
34663 qsort (args, argnum, sizeof (char *), attr_strcmp);
34664
34665 ret_str = XNEWVEC (char, str_len_sum);
34666 str_len_sum = 0;
34667 for (i = 0; i < argnum; i++)
34668 {
34669 size_t len = strlen (args[i]);
34670 memcpy (ret_str + str_len_sum, args[i], len);
34671 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34672 str_len_sum += len + 1;
34673 }
34674
34675 XDELETEVEC (args);
34676 XDELETEVEC (attr_str);
34677 return ret_str;
34678 }
34679
34680 /* This function changes the assembler name for functions that are
34681 versions. If DECL is a function version and has a "target"
34682 attribute, it appends the attribute string to its assembler name. */
34683
34684 static tree
34685 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34686 {
34687 tree version_attr;
34688 const char *orig_name, *version_string;
34689 char *attr_str, *assembler_name;
34690
34691 if (DECL_DECLARED_INLINE_P (decl)
34692 && lookup_attribute ("gnu_inline",
34693 DECL_ATTRIBUTES (decl)))
34694 error_at (DECL_SOURCE_LOCATION (decl),
34695 "Function versions cannot be marked as gnu_inline,"
34696 " bodies have to be generated");
34697
34698 if (DECL_VIRTUAL_P (decl)
34699 || DECL_VINDEX (decl))
34700 sorry ("Virtual function multiversioning not supported");
34701
34702 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34703
34704 /* target attribute string cannot be NULL. */
34705 gcc_assert (version_attr != NULL_TREE);
34706
34707 orig_name = IDENTIFIER_POINTER (id);
34708 version_string
34709 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34710
34711 if (strcmp (version_string, "default") == 0)
34712 return id;
34713
34714 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34715 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34716
34717 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34718
34719 /* Allow assembler name to be modified if already set. */
34720 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34721 SET_DECL_RTL (decl, NULL);
34722
34723 tree ret = get_identifier (assembler_name);
34724 XDELETEVEC (attr_str);
34725 XDELETEVEC (assembler_name);
34726 return ret;
34727 }
34728
34729 /* This function returns true if FN1 and FN2 are versions of the same function,
34730 that is, the target strings of the function decls are different. This assumes
34731 that FN1 and FN2 have the same signature. */
34732
34733 static bool
34734 ix86_function_versions (tree fn1, tree fn2)
34735 {
34736 tree attr1, attr2;
34737 char *target1, *target2;
34738 bool result;
34739
34740 if (TREE_CODE (fn1) != FUNCTION_DECL
34741 || TREE_CODE (fn2) != FUNCTION_DECL)
34742 return false;
34743
34744 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34745 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34746
34747 /* At least one function decl should have the target attribute specified. */
34748 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34749 return false;
34750
34751 /* Diagnose missing target attribute if one of the decls is already
34752 multi-versioned. */
34753 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34754 {
34755 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34756 {
34757 if (attr2 != NULL_TREE)
34758 {
34759 tree tem = fn1;
34760 fn1 = fn2;
34761 fn2 = tem;
34762 attr1 = attr2;
34763 }
34764 error_at (DECL_SOURCE_LOCATION (fn2),
34765 "missing %<target%> attribute for multi-versioned %D",
34766 fn2);
34767 inform (DECL_SOURCE_LOCATION (fn1),
34768 "previous declaration of %D", fn1);
34769 /* Prevent diagnosing of the same error multiple times. */
34770 DECL_ATTRIBUTES (fn2)
34771 = tree_cons (get_identifier ("target"),
34772 copy_node (TREE_VALUE (attr1)),
34773 DECL_ATTRIBUTES (fn2));
34774 }
34775 return false;
34776 }
34777
34778 target1 = sorted_attr_string (TREE_VALUE (attr1));
34779 target2 = sorted_attr_string (TREE_VALUE (attr2));
34780
34781 /* The sorted target strings must be different for fn1 and fn2
34782 to be versions. */
34783 if (strcmp (target1, target2) == 0)
34784 result = false;
34785 else
34786 result = true;
34787
34788 XDELETEVEC (target1);
34789 XDELETEVEC (target2);
34790
34791 return result;
34792 }
34793
34794 static tree
34795 ix86_mangle_decl_assembler_name (tree decl, tree id)
34796 {
34797 /* For function version, add the target suffix to the assembler name. */
34798 if (TREE_CODE (decl) == FUNCTION_DECL
34799 && DECL_FUNCTION_VERSIONED (decl))
34800 id = ix86_mangle_function_version_assembler_name (decl, id);
34801 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34802 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34803 #endif
34804
34805 return id;
34806 }
34807
34808 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34809 is true, append the full path name of the source file. */
34810
34811 static char *
34812 make_name (tree decl, const char *suffix, bool make_unique)
34813 {
34814 char *global_var_name;
34815 int name_len;
34816 const char *name;
34817 const char *unique_name = NULL;
34818
34819 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34820
34821 /* Get a unique name that can be used globally without any chances
34822 of collision at link time. */
34823 if (make_unique)
34824 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34825
34826 name_len = strlen (name) + strlen (suffix) + 2;
34827
34828 if (make_unique)
34829 name_len += strlen (unique_name) + 1;
34830 global_var_name = XNEWVEC (char, name_len);
34831
34832 /* Use '.' to concatenate names as it is demangler friendly. */
34833 if (make_unique)
34834 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34835 suffix);
34836 else
34837 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34838
34839 return global_var_name;
34840 }
34841
34842 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34843
34844 /* Make a dispatcher declaration for the multi-versioned function DECL.
34845 Calls to DECL function will be replaced with calls to the dispatcher
34846 by the front-end. Return the decl created. */
34847
34848 static tree
34849 make_dispatcher_decl (const tree decl)
34850 {
34851 tree func_decl;
34852 char *func_name;
34853 tree fn_type, func_type;
34854 bool is_uniq = false;
34855
34856 if (TREE_PUBLIC (decl) == 0)
34857 is_uniq = true;
34858
34859 func_name = make_name (decl, "ifunc", is_uniq);
34860
34861 fn_type = TREE_TYPE (decl);
34862 func_type = build_function_type (TREE_TYPE (fn_type),
34863 TYPE_ARG_TYPES (fn_type));
34864
34865 func_decl = build_fn_decl (func_name, func_type);
34866 XDELETEVEC (func_name);
34867 TREE_USED (func_decl) = 1;
34868 DECL_CONTEXT (func_decl) = NULL_TREE;
34869 DECL_INITIAL (func_decl) = error_mark_node;
34870 DECL_ARTIFICIAL (func_decl) = 1;
34871 /* Mark this func as external, the resolver will flip it again if
34872 it gets generated. */
34873 DECL_EXTERNAL (func_decl) = 1;
34874 /* This will be of type IFUNCs have to be externally visible. */
34875 TREE_PUBLIC (func_decl) = 1;
34876
34877 return func_decl;
34878 }
34879
34880 #endif
34881
34882 /* Returns true if decl is multi-versioned and DECL is the default function,
34883 that is it is not tagged with target specific optimization. */
34884
34885 static bool
34886 is_function_default_version (const tree decl)
34887 {
34888 if (TREE_CODE (decl) != FUNCTION_DECL
34889 || !DECL_FUNCTION_VERSIONED (decl))
34890 return false;
34891 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34892 gcc_assert (attr);
34893 attr = TREE_VALUE (TREE_VALUE (attr));
34894 return (TREE_CODE (attr) == STRING_CST
34895 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34896 }
34897
34898 /* Make a dispatcher declaration for the multi-versioned function DECL.
34899 Calls to DECL function will be replaced with calls to the dispatcher
34900 by the front-end. Returns the decl of the dispatcher function. */
34901
34902 static tree
34903 ix86_get_function_versions_dispatcher (void *decl)
34904 {
34905 tree fn = (tree) decl;
34906 struct cgraph_node *node = NULL;
34907 struct cgraph_node *default_node = NULL;
34908 struct cgraph_function_version_info *node_v = NULL;
34909 struct cgraph_function_version_info *first_v = NULL;
34910
34911 tree dispatch_decl = NULL;
34912
34913 struct cgraph_function_version_info *default_version_info = NULL;
34914
34915 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34916
34917 node = cgraph_node::get (fn);
34918 gcc_assert (node != NULL);
34919
34920 node_v = node->function_version ();
34921 gcc_assert (node_v != NULL);
34922
34923 if (node_v->dispatcher_resolver != NULL)
34924 return node_v->dispatcher_resolver;
34925
34926 /* Find the default version and make it the first node. */
34927 first_v = node_v;
34928 /* Go to the beginning of the chain. */
34929 while (first_v->prev != NULL)
34930 first_v = first_v->prev;
34931 default_version_info = first_v;
34932 while (default_version_info != NULL)
34933 {
34934 if (is_function_default_version
34935 (default_version_info->this_node->decl))
34936 break;
34937 default_version_info = default_version_info->next;
34938 }
34939
34940 /* If there is no default node, just return NULL. */
34941 if (default_version_info == NULL)
34942 return NULL;
34943
34944 /* Make default info the first node. */
34945 if (first_v != default_version_info)
34946 {
34947 default_version_info->prev->next = default_version_info->next;
34948 if (default_version_info->next)
34949 default_version_info->next->prev = default_version_info->prev;
34950 first_v->prev = default_version_info;
34951 default_version_info->next = first_v;
34952 default_version_info->prev = NULL;
34953 }
34954
34955 default_node = default_version_info->this_node;
34956
34957 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34958 if (targetm.has_ifunc_p ())
34959 {
34960 struct cgraph_function_version_info *it_v = NULL;
34961 struct cgraph_node *dispatcher_node = NULL;
34962 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34963
34964 /* Right now, the dispatching is done via ifunc. */
34965 dispatch_decl = make_dispatcher_decl (default_node->decl);
34966
34967 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34968 gcc_assert (dispatcher_node != NULL);
34969 dispatcher_node->dispatcher_function = 1;
34970 dispatcher_version_info
34971 = dispatcher_node->insert_new_function_version ();
34972 dispatcher_version_info->next = default_version_info;
34973 dispatcher_node->definition = 1;
34974
34975 /* Set the dispatcher for all the versions. */
34976 it_v = default_version_info;
34977 while (it_v != NULL)
34978 {
34979 it_v->dispatcher_resolver = dispatch_decl;
34980 it_v = it_v->next;
34981 }
34982 }
34983 else
34984 #endif
34985 {
34986 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34987 "multiversioning needs ifunc which is not supported "
34988 "on this target");
34989 }
34990
34991 return dispatch_decl;
34992 }
34993
34994 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34995 it to CHAIN. */
34996
34997 static tree
34998 make_attribute (const char *name, const char *arg_name, tree chain)
34999 {
35000 tree attr_name;
35001 tree attr_arg_name;
35002 tree attr_args;
35003 tree attr;
35004
35005 attr_name = get_identifier (name);
35006 attr_arg_name = build_string (strlen (arg_name), arg_name);
35007 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35008 attr = tree_cons (attr_name, attr_args, chain);
35009 return attr;
35010 }
35011
35012 /* Make the resolver function decl to dispatch the versions of
35013 a multi-versioned function, DEFAULT_DECL. Create an
35014 empty basic block in the resolver and store the pointer in
35015 EMPTY_BB. Return the decl of the resolver function. */
35016
35017 static tree
35018 make_resolver_func (const tree default_decl,
35019 const tree dispatch_decl,
35020 basic_block *empty_bb)
35021 {
35022 char *resolver_name;
35023 tree decl, type, decl_name, t;
35024 bool is_uniq = false;
35025
35026 /* IFUNC's have to be globally visible. So, if the default_decl is
35027 not, then the name of the IFUNC should be made unique. */
35028 if (TREE_PUBLIC (default_decl) == 0)
35029 is_uniq = true;
35030
35031 /* Append the filename to the resolver function if the versions are
35032 not externally visible. This is because the resolver function has
35033 to be externally visible for the loader to find it. So, appending
35034 the filename will prevent conflicts with a resolver function from
35035 another module which is based on the same version name. */
35036 resolver_name = make_name (default_decl, "resolver", is_uniq);
35037
35038 /* The resolver function should return a (void *). */
35039 type = build_function_type_list (ptr_type_node, NULL_TREE);
35040
35041 decl = build_fn_decl (resolver_name, type);
35042 decl_name = get_identifier (resolver_name);
35043 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35044
35045 DECL_NAME (decl) = decl_name;
35046 TREE_USED (decl) = 1;
35047 DECL_ARTIFICIAL (decl) = 1;
35048 DECL_IGNORED_P (decl) = 0;
35049 /* IFUNC resolvers have to be externally visible. */
35050 TREE_PUBLIC (decl) = 1;
35051 DECL_UNINLINABLE (decl) = 1;
35052
35053 /* Resolver is not external, body is generated. */
35054 DECL_EXTERNAL (decl) = 0;
35055 DECL_EXTERNAL (dispatch_decl) = 0;
35056
35057 DECL_CONTEXT (decl) = NULL_TREE;
35058 DECL_INITIAL (decl) = make_node (BLOCK);
35059 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35060
35061 if (DECL_COMDAT_GROUP (default_decl)
35062 || TREE_PUBLIC (default_decl))
35063 {
35064 /* In this case, each translation unit with a call to this
35065 versioned function will put out a resolver. Ensure it
35066 is comdat to keep just one copy. */
35067 DECL_COMDAT (decl) = 1;
35068 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35069 }
35070 /* Build result decl and add to function_decl. */
35071 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35072 DECL_ARTIFICIAL (t) = 1;
35073 DECL_IGNORED_P (t) = 1;
35074 DECL_RESULT (decl) = t;
35075
35076 gimplify_function_tree (decl);
35077 push_cfun (DECL_STRUCT_FUNCTION (decl));
35078 *empty_bb = init_lowered_empty_function (decl, false);
35079
35080 cgraph_node::add_new_function (decl, true);
35081 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35082
35083 pop_cfun ();
35084
35085 gcc_assert (dispatch_decl != NULL);
35086 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35087 DECL_ATTRIBUTES (dispatch_decl)
35088 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35089
35090 /* Create the alias for dispatch to resolver here. */
35091 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35092 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35093 XDELETEVEC (resolver_name);
35094 return decl;
35095 }
35096
35097 /* Generate the dispatching code body to dispatch multi-versioned function
35098 DECL. The target hook is called to process the "target" attributes and
35099 provide the code to dispatch the right function at run-time. NODE points
35100 to the dispatcher decl whose body will be created. */
35101
35102 static tree
35103 ix86_generate_version_dispatcher_body (void *node_p)
35104 {
35105 tree resolver_decl;
35106 basic_block empty_bb;
35107 tree default_ver_decl;
35108 struct cgraph_node *versn;
35109 struct cgraph_node *node;
35110
35111 struct cgraph_function_version_info *node_version_info = NULL;
35112 struct cgraph_function_version_info *versn_info = NULL;
35113
35114 node = (cgraph_node *)node_p;
35115
35116 node_version_info = node->function_version ();
35117 gcc_assert (node->dispatcher_function
35118 && node_version_info != NULL);
35119
35120 if (node_version_info->dispatcher_resolver)
35121 return node_version_info->dispatcher_resolver;
35122
35123 /* The first version in the chain corresponds to the default version. */
35124 default_ver_decl = node_version_info->next->this_node->decl;
35125
35126 /* node is going to be an alias, so remove the finalized bit. */
35127 node->definition = false;
35128
35129 resolver_decl = make_resolver_func (default_ver_decl,
35130 node->decl, &empty_bb);
35131
35132 node_version_info->dispatcher_resolver = resolver_decl;
35133
35134 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35135
35136 auto_vec<tree, 2> fn_ver_vec;
35137
35138 for (versn_info = node_version_info->next; versn_info;
35139 versn_info = versn_info->next)
35140 {
35141 versn = versn_info->this_node;
35142 /* Check for virtual functions here again, as by this time it should
35143 have been determined if this function needs a vtable index or
35144 not. This happens for methods in derived classes that override
35145 virtual methods in base classes but are not explicitly marked as
35146 virtual. */
35147 if (DECL_VINDEX (versn->decl))
35148 sorry ("Virtual function multiversioning not supported");
35149
35150 fn_ver_vec.safe_push (versn->decl);
35151 }
35152
35153 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35154 cgraph_edge::rebuild_edges ();
35155 pop_cfun ();
35156 return resolver_decl;
35157 }
35158 /* This builds the processor_model struct type defined in
35159 libgcc/config/i386/cpuinfo.c */
35160
35161 static tree
35162 build_processor_model_struct (void)
35163 {
35164 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35165 "__cpu_features"};
35166 tree field = NULL_TREE, field_chain = NULL_TREE;
35167 int i;
35168 tree type = make_node (RECORD_TYPE);
35169
35170 /* The first 3 fields are unsigned int. */
35171 for (i = 0; i < 3; ++i)
35172 {
35173 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35174 get_identifier (field_name[i]), unsigned_type_node);
35175 if (field_chain != NULL_TREE)
35176 DECL_CHAIN (field) = field_chain;
35177 field_chain = field;
35178 }
35179
35180 /* The last field is an array of unsigned integers of size one. */
35181 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35182 get_identifier (field_name[3]),
35183 build_array_type (unsigned_type_node,
35184 build_index_type (size_one_node)));
35185 if (field_chain != NULL_TREE)
35186 DECL_CHAIN (field) = field_chain;
35187 field_chain = field;
35188
35189 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35190 return type;
35191 }
35192
35193 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35194
35195 static tree
35196 make_var_decl (tree type, const char *name)
35197 {
35198 tree new_decl;
35199
35200 new_decl = build_decl (UNKNOWN_LOCATION,
35201 VAR_DECL,
35202 get_identifier(name),
35203 type);
35204
35205 DECL_EXTERNAL (new_decl) = 1;
35206 TREE_STATIC (new_decl) = 1;
35207 TREE_PUBLIC (new_decl) = 1;
35208 DECL_INITIAL (new_decl) = 0;
35209 DECL_ARTIFICIAL (new_decl) = 0;
35210 DECL_PRESERVE_P (new_decl) = 1;
35211
35212 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35213 assemble_variable (new_decl, 0, 0, 0);
35214
35215 return new_decl;
35216 }
35217
35218 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35219 into an integer defined in libgcc/config/i386/cpuinfo.c */
35220
35221 static tree
35222 fold_builtin_cpu (tree fndecl, tree *args)
35223 {
35224 unsigned int i;
35225 enum ix86_builtins fn_code = (enum ix86_builtins)
35226 DECL_FUNCTION_CODE (fndecl);
35227 tree param_string_cst = NULL;
35228
35229 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35230 enum processor_features
35231 {
35232 F_CMOV = 0,
35233 F_MMX,
35234 F_POPCNT,
35235 F_SSE,
35236 F_SSE2,
35237 F_SSE3,
35238 F_SSSE3,
35239 F_SSE4_1,
35240 F_SSE4_2,
35241 F_AVX,
35242 F_AVX2,
35243 F_SSE4_A,
35244 F_FMA4,
35245 F_XOP,
35246 F_FMA,
35247 F_MAX
35248 };
35249
35250 /* These are the values for vendor types and cpu types and subtypes
35251 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35252 the corresponding start value. */
35253 enum processor_model
35254 {
35255 M_INTEL = 1,
35256 M_AMD,
35257 M_CPU_TYPE_START,
35258 M_INTEL_BONNELL,
35259 M_INTEL_CORE2,
35260 M_INTEL_COREI7,
35261 M_AMDFAM10H,
35262 M_AMDFAM15H,
35263 M_INTEL_SILVERMONT,
35264 M_AMD_BTVER1,
35265 M_AMD_BTVER2,
35266 M_CPU_SUBTYPE_START,
35267 M_INTEL_COREI7_NEHALEM,
35268 M_INTEL_COREI7_WESTMERE,
35269 M_INTEL_COREI7_SANDYBRIDGE,
35270 M_AMDFAM10H_BARCELONA,
35271 M_AMDFAM10H_SHANGHAI,
35272 M_AMDFAM10H_ISTANBUL,
35273 M_AMDFAM15H_BDVER1,
35274 M_AMDFAM15H_BDVER2,
35275 M_AMDFAM15H_BDVER3,
35276 M_AMDFAM15H_BDVER4,
35277 M_INTEL_COREI7_IVYBRIDGE,
35278 M_INTEL_COREI7_HASWELL
35279 };
35280
35281 static struct _arch_names_table
35282 {
35283 const char *const name;
35284 const enum processor_model model;
35285 }
35286 const arch_names_table[] =
35287 {
35288 {"amd", M_AMD},
35289 {"intel", M_INTEL},
35290 {"atom", M_INTEL_BONNELL},
35291 {"slm", M_INTEL_SILVERMONT},
35292 {"core2", M_INTEL_CORE2},
35293 {"corei7", M_INTEL_COREI7},
35294 {"nehalem", M_INTEL_COREI7_NEHALEM},
35295 {"westmere", M_INTEL_COREI7_WESTMERE},
35296 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35297 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35298 {"haswell", M_INTEL_COREI7_HASWELL},
35299 {"bonnell", M_INTEL_BONNELL},
35300 {"silvermont", M_INTEL_SILVERMONT},
35301 {"amdfam10h", M_AMDFAM10H},
35302 {"barcelona", M_AMDFAM10H_BARCELONA},
35303 {"shanghai", M_AMDFAM10H_SHANGHAI},
35304 {"istanbul", M_AMDFAM10H_ISTANBUL},
35305 {"btver1", M_AMD_BTVER1},
35306 {"amdfam15h", M_AMDFAM15H},
35307 {"bdver1", M_AMDFAM15H_BDVER1},
35308 {"bdver2", M_AMDFAM15H_BDVER2},
35309 {"bdver3", M_AMDFAM15H_BDVER3},
35310 {"bdver4", M_AMDFAM15H_BDVER4},
35311 {"btver2", M_AMD_BTVER2},
35312 };
35313
35314 static struct _isa_names_table
35315 {
35316 const char *const name;
35317 const enum processor_features feature;
35318 }
35319 const isa_names_table[] =
35320 {
35321 {"cmov", F_CMOV},
35322 {"mmx", F_MMX},
35323 {"popcnt", F_POPCNT},
35324 {"sse", F_SSE},
35325 {"sse2", F_SSE2},
35326 {"sse3", F_SSE3},
35327 {"ssse3", F_SSSE3},
35328 {"sse4a", F_SSE4_A},
35329 {"sse4.1", F_SSE4_1},
35330 {"sse4.2", F_SSE4_2},
35331 {"avx", F_AVX},
35332 {"fma4", F_FMA4},
35333 {"xop", F_XOP},
35334 {"fma", F_FMA},
35335 {"avx2", F_AVX2}
35336 };
35337
35338 tree __processor_model_type = build_processor_model_struct ();
35339 tree __cpu_model_var = make_var_decl (__processor_model_type,
35340 "__cpu_model");
35341
35342
35343 varpool_node::add (__cpu_model_var);
35344
35345 gcc_assert ((args != NULL) && (*args != NULL));
35346
35347 param_string_cst = *args;
35348 while (param_string_cst
35349 && TREE_CODE (param_string_cst) != STRING_CST)
35350 {
35351 /* *args must be a expr that can contain other EXPRS leading to a
35352 STRING_CST. */
35353 if (!EXPR_P (param_string_cst))
35354 {
35355 error ("Parameter to builtin must be a string constant or literal");
35356 return integer_zero_node;
35357 }
35358 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35359 }
35360
35361 gcc_assert (param_string_cst);
35362
35363 if (fn_code == IX86_BUILTIN_CPU_IS)
35364 {
35365 tree ref;
35366 tree field;
35367 tree final;
35368
35369 unsigned int field_val = 0;
35370 unsigned int NUM_ARCH_NAMES
35371 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35372
35373 for (i = 0; i < NUM_ARCH_NAMES; i++)
35374 if (strcmp (arch_names_table[i].name,
35375 TREE_STRING_POINTER (param_string_cst)) == 0)
35376 break;
35377
35378 if (i == NUM_ARCH_NAMES)
35379 {
35380 error ("Parameter to builtin not valid: %s",
35381 TREE_STRING_POINTER (param_string_cst));
35382 return integer_zero_node;
35383 }
35384
35385 field = TYPE_FIELDS (__processor_model_type);
35386 field_val = arch_names_table[i].model;
35387
35388 /* CPU types are stored in the next field. */
35389 if (field_val > M_CPU_TYPE_START
35390 && field_val < M_CPU_SUBTYPE_START)
35391 {
35392 field = DECL_CHAIN (field);
35393 field_val -= M_CPU_TYPE_START;
35394 }
35395
35396 /* CPU subtypes are stored in the next field. */
35397 if (field_val > M_CPU_SUBTYPE_START)
35398 {
35399 field = DECL_CHAIN ( DECL_CHAIN (field));
35400 field_val -= M_CPU_SUBTYPE_START;
35401 }
35402
35403 /* Get the appropriate field in __cpu_model. */
35404 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35405 field, NULL_TREE);
35406
35407 /* Check the value. */
35408 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35409 build_int_cstu (unsigned_type_node, field_val));
35410 return build1 (CONVERT_EXPR, integer_type_node, final);
35411 }
35412 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35413 {
35414 tree ref;
35415 tree array_elt;
35416 tree field;
35417 tree final;
35418
35419 unsigned int field_val = 0;
35420 unsigned int NUM_ISA_NAMES
35421 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35422
35423 for (i = 0; i < NUM_ISA_NAMES; i++)
35424 if (strcmp (isa_names_table[i].name,
35425 TREE_STRING_POINTER (param_string_cst)) == 0)
35426 break;
35427
35428 if (i == NUM_ISA_NAMES)
35429 {
35430 error ("Parameter to builtin not valid: %s",
35431 TREE_STRING_POINTER (param_string_cst));
35432 return integer_zero_node;
35433 }
35434
35435 field = TYPE_FIELDS (__processor_model_type);
35436 /* Get the last field, which is __cpu_features. */
35437 while (DECL_CHAIN (field))
35438 field = DECL_CHAIN (field);
35439
35440 /* Get the appropriate field: __cpu_model.__cpu_features */
35441 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35442 field, NULL_TREE);
35443
35444 /* Access the 0th element of __cpu_features array. */
35445 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35446 integer_zero_node, NULL_TREE, NULL_TREE);
35447
35448 field_val = (1 << isa_names_table[i].feature);
35449 /* Return __cpu_model.__cpu_features[0] & field_val */
35450 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35451 build_int_cstu (unsigned_type_node, field_val));
35452 return build1 (CONVERT_EXPR, integer_type_node, final);
35453 }
35454 gcc_unreachable ();
35455 }
35456
35457 static tree
35458 ix86_fold_builtin (tree fndecl, int n_args,
35459 tree *args, bool ignore ATTRIBUTE_UNUSED)
35460 {
35461 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35462 {
35463 enum ix86_builtins fn_code = (enum ix86_builtins)
35464 DECL_FUNCTION_CODE (fndecl);
35465 if (fn_code == IX86_BUILTIN_CPU_IS
35466 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35467 {
35468 gcc_assert (n_args == 1);
35469 return fold_builtin_cpu (fndecl, args);
35470 }
35471 }
35472
35473 #ifdef SUBTARGET_FOLD_BUILTIN
35474 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35475 #endif
35476
35477 return NULL_TREE;
35478 }
35479
35480 /* Make builtins to detect cpu type and features supported. NAME is
35481 the builtin name, CODE is the builtin code, and FTYPE is the function
35482 type of the builtin. */
35483
35484 static void
35485 make_cpu_type_builtin (const char* name, int code,
35486 enum ix86_builtin_func_type ftype, bool is_const)
35487 {
35488 tree decl;
35489 tree type;
35490
35491 type = ix86_get_builtin_func_type (ftype);
35492 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35493 NULL, NULL_TREE);
35494 gcc_assert (decl != NULL_TREE);
35495 ix86_builtins[(int) code] = decl;
35496 TREE_READONLY (decl) = is_const;
35497 }
35498
35499 /* Make builtins to get CPU type and features supported. The created
35500 builtins are :
35501
35502 __builtin_cpu_init (), to detect cpu type and features,
35503 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35504 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35505 */
35506
35507 static void
35508 ix86_init_platform_type_builtins (void)
35509 {
35510 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35511 INT_FTYPE_VOID, false);
35512 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35513 INT_FTYPE_PCCHAR, true);
35514 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35515 INT_FTYPE_PCCHAR, true);
35516 }
35517
35518 /* Internal method for ix86_init_builtins. */
35519
35520 static void
35521 ix86_init_builtins_va_builtins_abi (void)
35522 {
35523 tree ms_va_ref, sysv_va_ref;
35524 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35525 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35526 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35527 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35528
35529 if (!TARGET_64BIT)
35530 return;
35531 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35532 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35533 ms_va_ref = build_reference_type (ms_va_list_type_node);
35534 sysv_va_ref =
35535 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35536
35537 fnvoid_va_end_ms =
35538 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35539 fnvoid_va_start_ms =
35540 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35541 fnvoid_va_end_sysv =
35542 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35543 fnvoid_va_start_sysv =
35544 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35545 NULL_TREE);
35546 fnvoid_va_copy_ms =
35547 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35548 NULL_TREE);
35549 fnvoid_va_copy_sysv =
35550 build_function_type_list (void_type_node, sysv_va_ref,
35551 sysv_va_ref, NULL_TREE);
35552
35553 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35554 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35555 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35556 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35557 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35558 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35559 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35560 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35561 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35562 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35563 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35564 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35565 }
35566
35567 static void
35568 ix86_init_builtin_types (void)
35569 {
35570 tree float128_type_node, float80_type_node;
35571
35572 /* The __float80 type. */
35573 float80_type_node = long_double_type_node;
35574 if (TYPE_MODE (float80_type_node) != XFmode)
35575 {
35576 /* The __float80 type. */
35577 float80_type_node = make_node (REAL_TYPE);
35578
35579 TYPE_PRECISION (float80_type_node) = 80;
35580 layout_type (float80_type_node);
35581 }
35582 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35583
35584 /* The __float128 type. */
35585 float128_type_node = make_node (REAL_TYPE);
35586 TYPE_PRECISION (float128_type_node) = 128;
35587 layout_type (float128_type_node);
35588 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35589
35590 /* This macro is built by i386-builtin-types.awk. */
35591 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35592 }
35593
35594 static void
35595 ix86_init_builtins (void)
35596 {
35597 tree t;
35598
35599 ix86_init_builtin_types ();
35600
35601 /* Builtins to get CPU type and features. */
35602 ix86_init_platform_type_builtins ();
35603
35604 /* TFmode support builtins. */
35605 def_builtin_const (0, "__builtin_infq",
35606 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35607 def_builtin_const (0, "__builtin_huge_valq",
35608 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35609
35610 /* We will expand them to normal call if SSE isn't available since
35611 they are used by libgcc. */
35612 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35613 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35614 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35615 TREE_READONLY (t) = 1;
35616 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35617
35618 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35619 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35620 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35621 TREE_READONLY (t) = 1;
35622 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35623
35624 ix86_init_tm_builtins ();
35625 ix86_init_mmx_sse_builtins ();
35626 ix86_init_mpx_builtins ();
35627
35628 if (TARGET_LP64)
35629 ix86_init_builtins_va_builtins_abi ();
35630
35631 #ifdef SUBTARGET_INIT_BUILTINS
35632 SUBTARGET_INIT_BUILTINS;
35633 #endif
35634 }
35635
35636 /* Return the ix86 builtin for CODE. */
35637
35638 static tree
35639 ix86_builtin_decl (unsigned code, bool)
35640 {
35641 if (code >= IX86_BUILTIN_MAX)
35642 return error_mark_node;
35643
35644 return ix86_builtins[code];
35645 }
35646
35647 /* Errors in the source file can cause expand_expr to return const0_rtx
35648 where we expect a vector. To avoid crashing, use one of the vector
35649 clear instructions. */
35650 static rtx
35651 safe_vector_operand (rtx x, machine_mode mode)
35652 {
35653 if (x == const0_rtx)
35654 x = CONST0_RTX (mode);
35655 return x;
35656 }
35657
35658 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35659
35660 static rtx
35661 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35662 {
35663 rtx pat;
35664 tree arg0 = CALL_EXPR_ARG (exp, 0);
35665 tree arg1 = CALL_EXPR_ARG (exp, 1);
35666 rtx op0 = expand_normal (arg0);
35667 rtx op1 = expand_normal (arg1);
35668 machine_mode tmode = insn_data[icode].operand[0].mode;
35669 machine_mode mode0 = insn_data[icode].operand[1].mode;
35670 machine_mode mode1 = insn_data[icode].operand[2].mode;
35671
35672 if (VECTOR_MODE_P (mode0))
35673 op0 = safe_vector_operand (op0, mode0);
35674 if (VECTOR_MODE_P (mode1))
35675 op1 = safe_vector_operand (op1, mode1);
35676
35677 if (optimize || !target
35678 || GET_MODE (target) != tmode
35679 || !insn_data[icode].operand[0].predicate (target, tmode))
35680 target = gen_reg_rtx (tmode);
35681
35682 if (GET_MODE (op1) == SImode && mode1 == TImode)
35683 {
35684 rtx x = gen_reg_rtx (V4SImode);
35685 emit_insn (gen_sse2_loadd (x, op1));
35686 op1 = gen_lowpart (TImode, x);
35687 }
35688
35689 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35690 op0 = copy_to_mode_reg (mode0, op0);
35691 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35692 op1 = copy_to_mode_reg (mode1, op1);
35693
35694 pat = GEN_FCN (icode) (target, op0, op1);
35695 if (! pat)
35696 return 0;
35697
35698 emit_insn (pat);
35699
35700 return target;
35701 }
35702
35703 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35704
35705 static rtx
35706 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35707 enum ix86_builtin_func_type m_type,
35708 enum rtx_code sub_code)
35709 {
35710 rtx pat;
35711 int i;
35712 int nargs;
35713 bool comparison_p = false;
35714 bool tf_p = false;
35715 bool last_arg_constant = false;
35716 int num_memory = 0;
35717 struct {
35718 rtx op;
35719 machine_mode mode;
35720 } args[4];
35721
35722 machine_mode tmode = insn_data[icode].operand[0].mode;
35723
35724 switch (m_type)
35725 {
35726 case MULTI_ARG_4_DF2_DI_I:
35727 case MULTI_ARG_4_DF2_DI_I1:
35728 case MULTI_ARG_4_SF2_SI_I:
35729 case MULTI_ARG_4_SF2_SI_I1:
35730 nargs = 4;
35731 last_arg_constant = true;
35732 break;
35733
35734 case MULTI_ARG_3_SF:
35735 case MULTI_ARG_3_DF:
35736 case MULTI_ARG_3_SF2:
35737 case MULTI_ARG_3_DF2:
35738 case MULTI_ARG_3_DI:
35739 case MULTI_ARG_3_SI:
35740 case MULTI_ARG_3_SI_DI:
35741 case MULTI_ARG_3_HI:
35742 case MULTI_ARG_3_HI_SI:
35743 case MULTI_ARG_3_QI:
35744 case MULTI_ARG_3_DI2:
35745 case MULTI_ARG_3_SI2:
35746 case MULTI_ARG_3_HI2:
35747 case MULTI_ARG_3_QI2:
35748 nargs = 3;
35749 break;
35750
35751 case MULTI_ARG_2_SF:
35752 case MULTI_ARG_2_DF:
35753 case MULTI_ARG_2_DI:
35754 case MULTI_ARG_2_SI:
35755 case MULTI_ARG_2_HI:
35756 case MULTI_ARG_2_QI:
35757 nargs = 2;
35758 break;
35759
35760 case MULTI_ARG_2_DI_IMM:
35761 case MULTI_ARG_2_SI_IMM:
35762 case MULTI_ARG_2_HI_IMM:
35763 case MULTI_ARG_2_QI_IMM:
35764 nargs = 2;
35765 last_arg_constant = true;
35766 break;
35767
35768 case MULTI_ARG_1_SF:
35769 case MULTI_ARG_1_DF:
35770 case MULTI_ARG_1_SF2:
35771 case MULTI_ARG_1_DF2:
35772 case MULTI_ARG_1_DI:
35773 case MULTI_ARG_1_SI:
35774 case MULTI_ARG_1_HI:
35775 case MULTI_ARG_1_QI:
35776 case MULTI_ARG_1_SI_DI:
35777 case MULTI_ARG_1_HI_DI:
35778 case MULTI_ARG_1_HI_SI:
35779 case MULTI_ARG_1_QI_DI:
35780 case MULTI_ARG_1_QI_SI:
35781 case MULTI_ARG_1_QI_HI:
35782 nargs = 1;
35783 break;
35784
35785 case MULTI_ARG_2_DI_CMP:
35786 case MULTI_ARG_2_SI_CMP:
35787 case MULTI_ARG_2_HI_CMP:
35788 case MULTI_ARG_2_QI_CMP:
35789 nargs = 2;
35790 comparison_p = true;
35791 break;
35792
35793 case MULTI_ARG_2_SF_TF:
35794 case MULTI_ARG_2_DF_TF:
35795 case MULTI_ARG_2_DI_TF:
35796 case MULTI_ARG_2_SI_TF:
35797 case MULTI_ARG_2_HI_TF:
35798 case MULTI_ARG_2_QI_TF:
35799 nargs = 2;
35800 tf_p = true;
35801 break;
35802
35803 default:
35804 gcc_unreachable ();
35805 }
35806
35807 if (optimize || !target
35808 || GET_MODE (target) != tmode
35809 || !insn_data[icode].operand[0].predicate (target, tmode))
35810 target = gen_reg_rtx (tmode);
35811
35812 gcc_assert (nargs <= 4);
35813
35814 for (i = 0; i < nargs; i++)
35815 {
35816 tree arg = CALL_EXPR_ARG (exp, i);
35817 rtx op = expand_normal (arg);
35818 int adjust = (comparison_p) ? 1 : 0;
35819 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35820
35821 if (last_arg_constant && i == nargs - 1)
35822 {
35823 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35824 {
35825 enum insn_code new_icode = icode;
35826 switch (icode)
35827 {
35828 case CODE_FOR_xop_vpermil2v2df3:
35829 case CODE_FOR_xop_vpermil2v4sf3:
35830 case CODE_FOR_xop_vpermil2v4df3:
35831 case CODE_FOR_xop_vpermil2v8sf3:
35832 error ("the last argument must be a 2-bit immediate");
35833 return gen_reg_rtx (tmode);
35834 case CODE_FOR_xop_rotlv2di3:
35835 new_icode = CODE_FOR_rotlv2di3;
35836 goto xop_rotl;
35837 case CODE_FOR_xop_rotlv4si3:
35838 new_icode = CODE_FOR_rotlv4si3;
35839 goto xop_rotl;
35840 case CODE_FOR_xop_rotlv8hi3:
35841 new_icode = CODE_FOR_rotlv8hi3;
35842 goto xop_rotl;
35843 case CODE_FOR_xop_rotlv16qi3:
35844 new_icode = CODE_FOR_rotlv16qi3;
35845 xop_rotl:
35846 if (CONST_INT_P (op))
35847 {
35848 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35849 op = GEN_INT (INTVAL (op) & mask);
35850 gcc_checking_assert
35851 (insn_data[icode].operand[i + 1].predicate (op, mode));
35852 }
35853 else
35854 {
35855 gcc_checking_assert
35856 (nargs == 2
35857 && insn_data[new_icode].operand[0].mode == tmode
35858 && insn_data[new_icode].operand[1].mode == tmode
35859 && insn_data[new_icode].operand[2].mode == mode
35860 && insn_data[new_icode].operand[0].predicate
35861 == insn_data[icode].operand[0].predicate
35862 && insn_data[new_icode].operand[1].predicate
35863 == insn_data[icode].operand[1].predicate);
35864 icode = new_icode;
35865 goto non_constant;
35866 }
35867 break;
35868 default:
35869 gcc_unreachable ();
35870 }
35871 }
35872 }
35873 else
35874 {
35875 non_constant:
35876 if (VECTOR_MODE_P (mode))
35877 op = safe_vector_operand (op, mode);
35878
35879 /* If we aren't optimizing, only allow one memory operand to be
35880 generated. */
35881 if (memory_operand (op, mode))
35882 num_memory++;
35883
35884 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35885
35886 if (optimize
35887 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35888 || num_memory > 1)
35889 op = force_reg (mode, op);
35890 }
35891
35892 args[i].op = op;
35893 args[i].mode = mode;
35894 }
35895
35896 switch (nargs)
35897 {
35898 case 1:
35899 pat = GEN_FCN (icode) (target, args[0].op);
35900 break;
35901
35902 case 2:
35903 if (tf_p)
35904 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35905 GEN_INT ((int)sub_code));
35906 else if (! comparison_p)
35907 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35908 else
35909 {
35910 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35911 args[0].op,
35912 args[1].op);
35913
35914 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35915 }
35916 break;
35917
35918 case 3:
35919 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35920 break;
35921
35922 case 4:
35923 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35924 break;
35925
35926 default:
35927 gcc_unreachable ();
35928 }
35929
35930 if (! pat)
35931 return 0;
35932
35933 emit_insn (pat);
35934 return target;
35935 }
35936
35937 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35938 insns with vec_merge. */
35939
35940 static rtx
35941 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35942 rtx target)
35943 {
35944 rtx pat;
35945 tree arg0 = CALL_EXPR_ARG (exp, 0);
35946 rtx op1, op0 = expand_normal (arg0);
35947 machine_mode tmode = insn_data[icode].operand[0].mode;
35948 machine_mode mode0 = insn_data[icode].operand[1].mode;
35949
35950 if (optimize || !target
35951 || GET_MODE (target) != tmode
35952 || !insn_data[icode].operand[0].predicate (target, tmode))
35953 target = gen_reg_rtx (tmode);
35954
35955 if (VECTOR_MODE_P (mode0))
35956 op0 = safe_vector_operand (op0, mode0);
35957
35958 if ((optimize && !register_operand (op0, mode0))
35959 || !insn_data[icode].operand[1].predicate (op0, mode0))
35960 op0 = copy_to_mode_reg (mode0, op0);
35961
35962 op1 = op0;
35963 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35964 op1 = copy_to_mode_reg (mode0, op1);
35965
35966 pat = GEN_FCN (icode) (target, op0, op1);
35967 if (! pat)
35968 return 0;
35969 emit_insn (pat);
35970 return target;
35971 }
35972
35973 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35974
35975 static rtx
35976 ix86_expand_sse_compare (const struct builtin_description *d,
35977 tree exp, rtx target, bool swap)
35978 {
35979 rtx pat;
35980 tree arg0 = CALL_EXPR_ARG (exp, 0);
35981 tree arg1 = CALL_EXPR_ARG (exp, 1);
35982 rtx op0 = expand_normal (arg0);
35983 rtx op1 = expand_normal (arg1);
35984 rtx op2;
35985 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35986 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35987 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35988 enum rtx_code comparison = d->comparison;
35989
35990 if (VECTOR_MODE_P (mode0))
35991 op0 = safe_vector_operand (op0, mode0);
35992 if (VECTOR_MODE_P (mode1))
35993 op1 = safe_vector_operand (op1, mode1);
35994
35995 /* Swap operands if we have a comparison that isn't available in
35996 hardware. */
35997 if (swap)
35998 {
35999 rtx tmp = gen_reg_rtx (mode1);
36000 emit_move_insn (tmp, op1);
36001 op1 = op0;
36002 op0 = tmp;
36003 }
36004
36005 if (optimize || !target
36006 || GET_MODE (target) != tmode
36007 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36008 target = gen_reg_rtx (tmode);
36009
36010 if ((optimize && !register_operand (op0, mode0))
36011 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36012 op0 = copy_to_mode_reg (mode0, op0);
36013 if ((optimize && !register_operand (op1, mode1))
36014 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36015 op1 = copy_to_mode_reg (mode1, op1);
36016
36017 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36018 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36019 if (! pat)
36020 return 0;
36021 emit_insn (pat);
36022 return target;
36023 }
36024
36025 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36026
36027 static rtx
36028 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36029 rtx target)
36030 {
36031 rtx pat;
36032 tree arg0 = CALL_EXPR_ARG (exp, 0);
36033 tree arg1 = CALL_EXPR_ARG (exp, 1);
36034 rtx op0 = expand_normal (arg0);
36035 rtx op1 = expand_normal (arg1);
36036 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36037 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36038 enum rtx_code comparison = d->comparison;
36039
36040 if (VECTOR_MODE_P (mode0))
36041 op0 = safe_vector_operand (op0, mode0);
36042 if (VECTOR_MODE_P (mode1))
36043 op1 = safe_vector_operand (op1, mode1);
36044
36045 /* Swap operands if we have a comparison that isn't available in
36046 hardware. */
36047 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36048 std::swap (op1, op0);
36049
36050 target = gen_reg_rtx (SImode);
36051 emit_move_insn (target, const0_rtx);
36052 target = gen_rtx_SUBREG (QImode, target, 0);
36053
36054 if ((optimize && !register_operand (op0, mode0))
36055 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36056 op0 = copy_to_mode_reg (mode0, op0);
36057 if ((optimize && !register_operand (op1, mode1))
36058 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36059 op1 = copy_to_mode_reg (mode1, op1);
36060
36061 pat = GEN_FCN (d->icode) (op0, op1);
36062 if (! pat)
36063 return 0;
36064 emit_insn (pat);
36065 emit_insn (gen_rtx_SET (VOIDmode,
36066 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36067 gen_rtx_fmt_ee (comparison, QImode,
36068 SET_DEST (pat),
36069 const0_rtx)));
36070
36071 return SUBREG_REG (target);
36072 }
36073
36074 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36075
36076 static rtx
36077 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36078 rtx target)
36079 {
36080 rtx pat;
36081 tree arg0 = CALL_EXPR_ARG (exp, 0);
36082 rtx op1, op0 = expand_normal (arg0);
36083 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36084 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36085
36086 if (optimize || target == 0
36087 || GET_MODE (target) != tmode
36088 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36089 target = gen_reg_rtx (tmode);
36090
36091 if (VECTOR_MODE_P (mode0))
36092 op0 = safe_vector_operand (op0, mode0);
36093
36094 if ((optimize && !register_operand (op0, mode0))
36095 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36096 op0 = copy_to_mode_reg (mode0, op0);
36097
36098 op1 = GEN_INT (d->comparison);
36099
36100 pat = GEN_FCN (d->icode) (target, op0, op1);
36101 if (! pat)
36102 return 0;
36103 emit_insn (pat);
36104 return target;
36105 }
36106
36107 static rtx
36108 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36109 tree exp, rtx target)
36110 {
36111 rtx pat;
36112 tree arg0 = CALL_EXPR_ARG (exp, 0);
36113 tree arg1 = CALL_EXPR_ARG (exp, 1);
36114 rtx op0 = expand_normal (arg0);
36115 rtx op1 = expand_normal (arg1);
36116 rtx op2;
36117 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36118 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36119 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36120
36121 if (optimize || target == 0
36122 || GET_MODE (target) != tmode
36123 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36124 target = gen_reg_rtx (tmode);
36125
36126 op0 = safe_vector_operand (op0, mode0);
36127 op1 = safe_vector_operand (op1, mode1);
36128
36129 if ((optimize && !register_operand (op0, mode0))
36130 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36131 op0 = copy_to_mode_reg (mode0, op0);
36132 if ((optimize && !register_operand (op1, mode1))
36133 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36134 op1 = copy_to_mode_reg (mode1, op1);
36135
36136 op2 = GEN_INT (d->comparison);
36137
36138 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36139 if (! pat)
36140 return 0;
36141 emit_insn (pat);
36142 return target;
36143 }
36144
36145 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36146
36147 static rtx
36148 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36149 rtx target)
36150 {
36151 rtx pat;
36152 tree arg0 = CALL_EXPR_ARG (exp, 0);
36153 tree arg1 = CALL_EXPR_ARG (exp, 1);
36154 rtx op0 = expand_normal (arg0);
36155 rtx op1 = expand_normal (arg1);
36156 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36157 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36158 enum rtx_code comparison = d->comparison;
36159
36160 if (VECTOR_MODE_P (mode0))
36161 op0 = safe_vector_operand (op0, mode0);
36162 if (VECTOR_MODE_P (mode1))
36163 op1 = safe_vector_operand (op1, mode1);
36164
36165 target = gen_reg_rtx (SImode);
36166 emit_move_insn (target, const0_rtx);
36167 target = gen_rtx_SUBREG (QImode, target, 0);
36168
36169 if ((optimize && !register_operand (op0, mode0))
36170 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36171 op0 = copy_to_mode_reg (mode0, op0);
36172 if ((optimize && !register_operand (op1, mode1))
36173 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36174 op1 = copy_to_mode_reg (mode1, op1);
36175
36176 pat = GEN_FCN (d->icode) (op0, op1);
36177 if (! pat)
36178 return 0;
36179 emit_insn (pat);
36180 emit_insn (gen_rtx_SET (VOIDmode,
36181 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36182 gen_rtx_fmt_ee (comparison, QImode,
36183 SET_DEST (pat),
36184 const0_rtx)));
36185
36186 return SUBREG_REG (target);
36187 }
36188
36189 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36190
36191 static rtx
36192 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36193 tree exp, rtx target)
36194 {
36195 rtx pat;
36196 tree arg0 = CALL_EXPR_ARG (exp, 0);
36197 tree arg1 = CALL_EXPR_ARG (exp, 1);
36198 tree arg2 = CALL_EXPR_ARG (exp, 2);
36199 tree arg3 = CALL_EXPR_ARG (exp, 3);
36200 tree arg4 = CALL_EXPR_ARG (exp, 4);
36201 rtx scratch0, scratch1;
36202 rtx op0 = expand_normal (arg0);
36203 rtx op1 = expand_normal (arg1);
36204 rtx op2 = expand_normal (arg2);
36205 rtx op3 = expand_normal (arg3);
36206 rtx op4 = expand_normal (arg4);
36207 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36208
36209 tmode0 = insn_data[d->icode].operand[0].mode;
36210 tmode1 = insn_data[d->icode].operand[1].mode;
36211 modev2 = insn_data[d->icode].operand[2].mode;
36212 modei3 = insn_data[d->icode].operand[3].mode;
36213 modev4 = insn_data[d->icode].operand[4].mode;
36214 modei5 = insn_data[d->icode].operand[5].mode;
36215 modeimm = insn_data[d->icode].operand[6].mode;
36216
36217 if (VECTOR_MODE_P (modev2))
36218 op0 = safe_vector_operand (op0, modev2);
36219 if (VECTOR_MODE_P (modev4))
36220 op2 = safe_vector_operand (op2, modev4);
36221
36222 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36223 op0 = copy_to_mode_reg (modev2, op0);
36224 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36225 op1 = copy_to_mode_reg (modei3, op1);
36226 if ((optimize && !register_operand (op2, modev4))
36227 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36228 op2 = copy_to_mode_reg (modev4, op2);
36229 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36230 op3 = copy_to_mode_reg (modei5, op3);
36231
36232 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36233 {
36234 error ("the fifth argument must be an 8-bit immediate");
36235 return const0_rtx;
36236 }
36237
36238 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36239 {
36240 if (optimize || !target
36241 || GET_MODE (target) != tmode0
36242 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36243 target = gen_reg_rtx (tmode0);
36244
36245 scratch1 = gen_reg_rtx (tmode1);
36246
36247 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36248 }
36249 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36250 {
36251 if (optimize || !target
36252 || GET_MODE (target) != tmode1
36253 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36254 target = gen_reg_rtx (tmode1);
36255
36256 scratch0 = gen_reg_rtx (tmode0);
36257
36258 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36259 }
36260 else
36261 {
36262 gcc_assert (d->flag);
36263
36264 scratch0 = gen_reg_rtx (tmode0);
36265 scratch1 = gen_reg_rtx (tmode1);
36266
36267 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36268 }
36269
36270 if (! pat)
36271 return 0;
36272
36273 emit_insn (pat);
36274
36275 if (d->flag)
36276 {
36277 target = gen_reg_rtx (SImode);
36278 emit_move_insn (target, const0_rtx);
36279 target = gen_rtx_SUBREG (QImode, target, 0);
36280
36281 emit_insn
36282 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36283 gen_rtx_fmt_ee (EQ, QImode,
36284 gen_rtx_REG ((machine_mode) d->flag,
36285 FLAGS_REG),
36286 const0_rtx)));
36287 return SUBREG_REG (target);
36288 }
36289 else
36290 return target;
36291 }
36292
36293
36294 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36295
36296 static rtx
36297 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36298 tree exp, rtx target)
36299 {
36300 rtx pat;
36301 tree arg0 = CALL_EXPR_ARG (exp, 0);
36302 tree arg1 = CALL_EXPR_ARG (exp, 1);
36303 tree arg2 = CALL_EXPR_ARG (exp, 2);
36304 rtx scratch0, scratch1;
36305 rtx op0 = expand_normal (arg0);
36306 rtx op1 = expand_normal (arg1);
36307 rtx op2 = expand_normal (arg2);
36308 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36309
36310 tmode0 = insn_data[d->icode].operand[0].mode;
36311 tmode1 = insn_data[d->icode].operand[1].mode;
36312 modev2 = insn_data[d->icode].operand[2].mode;
36313 modev3 = insn_data[d->icode].operand[3].mode;
36314 modeimm = insn_data[d->icode].operand[4].mode;
36315
36316 if (VECTOR_MODE_P (modev2))
36317 op0 = safe_vector_operand (op0, modev2);
36318 if (VECTOR_MODE_P (modev3))
36319 op1 = safe_vector_operand (op1, modev3);
36320
36321 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36322 op0 = copy_to_mode_reg (modev2, op0);
36323 if ((optimize && !register_operand (op1, modev3))
36324 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36325 op1 = copy_to_mode_reg (modev3, op1);
36326
36327 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36328 {
36329 error ("the third argument must be an 8-bit immediate");
36330 return const0_rtx;
36331 }
36332
36333 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36334 {
36335 if (optimize || !target
36336 || GET_MODE (target) != tmode0
36337 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36338 target = gen_reg_rtx (tmode0);
36339
36340 scratch1 = gen_reg_rtx (tmode1);
36341
36342 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36343 }
36344 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36345 {
36346 if (optimize || !target
36347 || GET_MODE (target) != tmode1
36348 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36349 target = gen_reg_rtx (tmode1);
36350
36351 scratch0 = gen_reg_rtx (tmode0);
36352
36353 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36354 }
36355 else
36356 {
36357 gcc_assert (d->flag);
36358
36359 scratch0 = gen_reg_rtx (tmode0);
36360 scratch1 = gen_reg_rtx (tmode1);
36361
36362 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36363 }
36364
36365 if (! pat)
36366 return 0;
36367
36368 emit_insn (pat);
36369
36370 if (d->flag)
36371 {
36372 target = gen_reg_rtx (SImode);
36373 emit_move_insn (target, const0_rtx);
36374 target = gen_rtx_SUBREG (QImode, target, 0);
36375
36376 emit_insn
36377 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36378 gen_rtx_fmt_ee (EQ, QImode,
36379 gen_rtx_REG ((machine_mode) d->flag,
36380 FLAGS_REG),
36381 const0_rtx)));
36382 return SUBREG_REG (target);
36383 }
36384 else
36385 return target;
36386 }
36387
36388 /* Subroutine of ix86_expand_builtin to take care of insns with
36389 variable number of operands. */
36390
36391 static rtx
36392 ix86_expand_args_builtin (const struct builtin_description *d,
36393 tree exp, rtx target)
36394 {
36395 rtx pat, real_target;
36396 unsigned int i, nargs;
36397 unsigned int nargs_constant = 0;
36398 unsigned int mask_pos = 0;
36399 int num_memory = 0;
36400 struct
36401 {
36402 rtx op;
36403 machine_mode mode;
36404 } args[6];
36405 bool last_arg_count = false;
36406 enum insn_code icode = d->icode;
36407 const struct insn_data_d *insn_p = &insn_data[icode];
36408 machine_mode tmode = insn_p->operand[0].mode;
36409 machine_mode rmode = VOIDmode;
36410 bool swap = false;
36411 enum rtx_code comparison = d->comparison;
36412
36413 switch ((enum ix86_builtin_func_type) d->flag)
36414 {
36415 case V2DF_FTYPE_V2DF_ROUND:
36416 case V4DF_FTYPE_V4DF_ROUND:
36417 case V4SF_FTYPE_V4SF_ROUND:
36418 case V8SF_FTYPE_V8SF_ROUND:
36419 case V4SI_FTYPE_V4SF_ROUND:
36420 case V8SI_FTYPE_V8SF_ROUND:
36421 return ix86_expand_sse_round (d, exp, target);
36422 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36423 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36424 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36425 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36426 case INT_FTYPE_V8SF_V8SF_PTEST:
36427 case INT_FTYPE_V4DI_V4DI_PTEST:
36428 case INT_FTYPE_V4DF_V4DF_PTEST:
36429 case INT_FTYPE_V4SF_V4SF_PTEST:
36430 case INT_FTYPE_V2DI_V2DI_PTEST:
36431 case INT_FTYPE_V2DF_V2DF_PTEST:
36432 return ix86_expand_sse_ptest (d, exp, target);
36433 case FLOAT128_FTYPE_FLOAT128:
36434 case FLOAT_FTYPE_FLOAT:
36435 case INT_FTYPE_INT:
36436 case UINT64_FTYPE_INT:
36437 case UINT16_FTYPE_UINT16:
36438 case INT64_FTYPE_INT64:
36439 case INT64_FTYPE_V4SF:
36440 case INT64_FTYPE_V2DF:
36441 case INT_FTYPE_V16QI:
36442 case INT_FTYPE_V8QI:
36443 case INT_FTYPE_V8SF:
36444 case INT_FTYPE_V4DF:
36445 case INT_FTYPE_V4SF:
36446 case INT_FTYPE_V2DF:
36447 case INT_FTYPE_V32QI:
36448 case V16QI_FTYPE_V16QI:
36449 case V8SI_FTYPE_V8SF:
36450 case V8SI_FTYPE_V4SI:
36451 case V8HI_FTYPE_V8HI:
36452 case V8HI_FTYPE_V16QI:
36453 case V8QI_FTYPE_V8QI:
36454 case V8SF_FTYPE_V8SF:
36455 case V8SF_FTYPE_V8SI:
36456 case V8SF_FTYPE_V4SF:
36457 case V8SF_FTYPE_V8HI:
36458 case V4SI_FTYPE_V4SI:
36459 case V4SI_FTYPE_V16QI:
36460 case V4SI_FTYPE_V4SF:
36461 case V4SI_FTYPE_V8SI:
36462 case V4SI_FTYPE_V8HI:
36463 case V4SI_FTYPE_V4DF:
36464 case V4SI_FTYPE_V2DF:
36465 case V4HI_FTYPE_V4HI:
36466 case V4DF_FTYPE_V4DF:
36467 case V4DF_FTYPE_V4SI:
36468 case V4DF_FTYPE_V4SF:
36469 case V4DF_FTYPE_V2DF:
36470 case V4SF_FTYPE_V4SF:
36471 case V4SF_FTYPE_V4SI:
36472 case V4SF_FTYPE_V8SF:
36473 case V4SF_FTYPE_V4DF:
36474 case V4SF_FTYPE_V8HI:
36475 case V4SF_FTYPE_V2DF:
36476 case V2DI_FTYPE_V2DI:
36477 case V2DI_FTYPE_V16QI:
36478 case V2DI_FTYPE_V8HI:
36479 case V2DI_FTYPE_V4SI:
36480 case V2DF_FTYPE_V2DF:
36481 case V2DF_FTYPE_V4SI:
36482 case V2DF_FTYPE_V4DF:
36483 case V2DF_FTYPE_V4SF:
36484 case V2DF_FTYPE_V2SI:
36485 case V2SI_FTYPE_V2SI:
36486 case V2SI_FTYPE_V4SF:
36487 case V2SI_FTYPE_V2SF:
36488 case V2SI_FTYPE_V2DF:
36489 case V2SF_FTYPE_V2SF:
36490 case V2SF_FTYPE_V2SI:
36491 case V32QI_FTYPE_V32QI:
36492 case V32QI_FTYPE_V16QI:
36493 case V16HI_FTYPE_V16HI:
36494 case V16HI_FTYPE_V8HI:
36495 case V8SI_FTYPE_V8SI:
36496 case V16HI_FTYPE_V16QI:
36497 case V8SI_FTYPE_V16QI:
36498 case V4DI_FTYPE_V16QI:
36499 case V8SI_FTYPE_V8HI:
36500 case V4DI_FTYPE_V8HI:
36501 case V4DI_FTYPE_V4SI:
36502 case V4DI_FTYPE_V2DI:
36503 case HI_FTYPE_HI:
36504 case HI_FTYPE_V16QI:
36505 case SI_FTYPE_V32QI:
36506 case DI_FTYPE_V64QI:
36507 case V16QI_FTYPE_HI:
36508 case V32QI_FTYPE_SI:
36509 case V64QI_FTYPE_DI:
36510 case V8HI_FTYPE_QI:
36511 case V16HI_FTYPE_HI:
36512 case V32HI_FTYPE_SI:
36513 case V4SI_FTYPE_QI:
36514 case V8SI_FTYPE_QI:
36515 case V4SI_FTYPE_HI:
36516 case V8SI_FTYPE_HI:
36517 case QI_FTYPE_V8HI:
36518 case HI_FTYPE_V16HI:
36519 case SI_FTYPE_V32HI:
36520 case QI_FTYPE_V4SI:
36521 case QI_FTYPE_V8SI:
36522 case HI_FTYPE_V16SI:
36523 case QI_FTYPE_V2DI:
36524 case QI_FTYPE_V4DI:
36525 case QI_FTYPE_V8DI:
36526 case UINT_FTYPE_V2DF:
36527 case UINT_FTYPE_V4SF:
36528 case UINT64_FTYPE_V2DF:
36529 case UINT64_FTYPE_V4SF:
36530 case V16QI_FTYPE_V8DI:
36531 case V16HI_FTYPE_V16SI:
36532 case V16SI_FTYPE_HI:
36533 case V2DI_FTYPE_QI:
36534 case V4DI_FTYPE_QI:
36535 case V16SI_FTYPE_V16SI:
36536 case V16SI_FTYPE_INT:
36537 case V16SF_FTYPE_FLOAT:
36538 case V16SF_FTYPE_V8SF:
36539 case V16SI_FTYPE_V8SI:
36540 case V16SF_FTYPE_V4SF:
36541 case V16SI_FTYPE_V4SI:
36542 case V16SF_FTYPE_V16SF:
36543 case V8HI_FTYPE_V8DI:
36544 case V8UHI_FTYPE_V8UHI:
36545 case V8SI_FTYPE_V8DI:
36546 case V8SF_FTYPE_V8DF:
36547 case V8DI_FTYPE_QI:
36548 case V8DI_FTYPE_INT64:
36549 case V8DI_FTYPE_V4DI:
36550 case V8DI_FTYPE_V8DI:
36551 case V8DF_FTYPE_DOUBLE:
36552 case V8DF_FTYPE_V4DF:
36553 case V8DF_FTYPE_V2DF:
36554 case V8DF_FTYPE_V8DF:
36555 case V8DF_FTYPE_V8SI:
36556 nargs = 1;
36557 break;
36558 case V4SF_FTYPE_V4SF_VEC_MERGE:
36559 case V2DF_FTYPE_V2DF_VEC_MERGE:
36560 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36561 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36562 case V16QI_FTYPE_V16QI_V16QI:
36563 case V16QI_FTYPE_V8HI_V8HI:
36564 case V16SI_FTYPE_V16SI_V16SI:
36565 case V16SF_FTYPE_V16SF_V16SF:
36566 case V16SF_FTYPE_V16SF_V16SI:
36567 case V8QI_FTYPE_V8QI_V8QI:
36568 case V8QI_FTYPE_V4HI_V4HI:
36569 case V8HI_FTYPE_V8HI_V8HI:
36570 case V8HI_FTYPE_V16QI_V16QI:
36571 case V8HI_FTYPE_V4SI_V4SI:
36572 case V8SF_FTYPE_V8SF_V8SF:
36573 case V8SF_FTYPE_V8SF_V8SI:
36574 case V8DI_FTYPE_V8DI_V8DI:
36575 case V8DF_FTYPE_V8DF_V8DF:
36576 case V8DF_FTYPE_V8DF_V8DI:
36577 case V4SI_FTYPE_V4SI_V4SI:
36578 case V4SI_FTYPE_V8HI_V8HI:
36579 case V4SI_FTYPE_V4SF_V4SF:
36580 case V4SI_FTYPE_V2DF_V2DF:
36581 case V4HI_FTYPE_V4HI_V4HI:
36582 case V4HI_FTYPE_V8QI_V8QI:
36583 case V4HI_FTYPE_V2SI_V2SI:
36584 case V4DF_FTYPE_V4DF_V4DF:
36585 case V4DF_FTYPE_V4DF_V4DI:
36586 case V4SF_FTYPE_V4SF_V4SF:
36587 case V4SF_FTYPE_V4SF_V4SI:
36588 case V4SF_FTYPE_V4SF_V2SI:
36589 case V4SF_FTYPE_V4SF_V2DF:
36590 case V4SF_FTYPE_V4SF_UINT:
36591 case V4SF_FTYPE_V4SF_UINT64:
36592 case V4SF_FTYPE_V4SF_DI:
36593 case V4SF_FTYPE_V4SF_SI:
36594 case V2DI_FTYPE_V2DI_V2DI:
36595 case V2DI_FTYPE_V16QI_V16QI:
36596 case V2DI_FTYPE_V4SI_V4SI:
36597 case V2UDI_FTYPE_V4USI_V4USI:
36598 case V2DI_FTYPE_V2DI_V16QI:
36599 case V2DI_FTYPE_V2DF_V2DF:
36600 case V2SI_FTYPE_V2SI_V2SI:
36601 case V2SI_FTYPE_V4HI_V4HI:
36602 case V2SI_FTYPE_V2SF_V2SF:
36603 case V2DF_FTYPE_V2DF_V2DF:
36604 case V2DF_FTYPE_V2DF_V4SF:
36605 case V2DF_FTYPE_V2DF_V2DI:
36606 case V2DF_FTYPE_V2DF_DI:
36607 case V2DF_FTYPE_V2DF_SI:
36608 case V2DF_FTYPE_V2DF_UINT:
36609 case V2DF_FTYPE_V2DF_UINT64:
36610 case V2SF_FTYPE_V2SF_V2SF:
36611 case V1DI_FTYPE_V1DI_V1DI:
36612 case V1DI_FTYPE_V8QI_V8QI:
36613 case V1DI_FTYPE_V2SI_V2SI:
36614 case V32QI_FTYPE_V16HI_V16HI:
36615 case V16HI_FTYPE_V8SI_V8SI:
36616 case V32QI_FTYPE_V32QI_V32QI:
36617 case V16HI_FTYPE_V32QI_V32QI:
36618 case V16HI_FTYPE_V16HI_V16HI:
36619 case V8SI_FTYPE_V4DF_V4DF:
36620 case V8SI_FTYPE_V8SI_V8SI:
36621 case V8SI_FTYPE_V16HI_V16HI:
36622 case V4DI_FTYPE_V4DI_V4DI:
36623 case V4DI_FTYPE_V8SI_V8SI:
36624 case V4UDI_FTYPE_V8USI_V8USI:
36625 case QI_FTYPE_V8DI_V8DI:
36626 case V8DI_FTYPE_V64QI_V64QI:
36627 case HI_FTYPE_V16SI_V16SI:
36628 if (comparison == UNKNOWN)
36629 return ix86_expand_binop_builtin (icode, exp, target);
36630 nargs = 2;
36631 break;
36632 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36633 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36634 gcc_assert (comparison != UNKNOWN);
36635 nargs = 2;
36636 swap = true;
36637 break;
36638 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36639 case V16HI_FTYPE_V16HI_SI_COUNT:
36640 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36641 case V8SI_FTYPE_V8SI_SI_COUNT:
36642 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36643 case V4DI_FTYPE_V4DI_INT_COUNT:
36644 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36645 case V8HI_FTYPE_V8HI_SI_COUNT:
36646 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36647 case V4SI_FTYPE_V4SI_SI_COUNT:
36648 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36649 case V4HI_FTYPE_V4HI_SI_COUNT:
36650 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36651 case V2DI_FTYPE_V2DI_SI_COUNT:
36652 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36653 case V2SI_FTYPE_V2SI_SI_COUNT:
36654 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36655 case V1DI_FTYPE_V1DI_SI_COUNT:
36656 nargs = 2;
36657 last_arg_count = true;
36658 break;
36659 case UINT64_FTYPE_UINT64_UINT64:
36660 case UINT_FTYPE_UINT_UINT:
36661 case UINT_FTYPE_UINT_USHORT:
36662 case UINT_FTYPE_UINT_UCHAR:
36663 case UINT16_FTYPE_UINT16_INT:
36664 case UINT8_FTYPE_UINT8_INT:
36665 case HI_FTYPE_HI_HI:
36666 case SI_FTYPE_SI_SI:
36667 case DI_FTYPE_DI_DI:
36668 case V16SI_FTYPE_V8DF_V8DF:
36669 nargs = 2;
36670 break;
36671 case V2DI_FTYPE_V2DI_INT_CONVERT:
36672 nargs = 2;
36673 rmode = V1TImode;
36674 nargs_constant = 1;
36675 break;
36676 case V4DI_FTYPE_V4DI_INT_CONVERT:
36677 nargs = 2;
36678 rmode = V2TImode;
36679 nargs_constant = 1;
36680 break;
36681 case V8DI_FTYPE_V8DI_INT_CONVERT:
36682 nargs = 2;
36683 rmode = V4TImode;
36684 nargs_constant = 1;
36685 break;
36686 case V8HI_FTYPE_V8HI_INT:
36687 case V8HI_FTYPE_V8SF_INT:
36688 case V16HI_FTYPE_V16SF_INT:
36689 case V8HI_FTYPE_V4SF_INT:
36690 case V8SF_FTYPE_V8SF_INT:
36691 case V4SF_FTYPE_V16SF_INT:
36692 case V16SF_FTYPE_V16SF_INT:
36693 case V4SI_FTYPE_V4SI_INT:
36694 case V4SI_FTYPE_V8SI_INT:
36695 case V4HI_FTYPE_V4HI_INT:
36696 case V4DF_FTYPE_V4DF_INT:
36697 case V4DF_FTYPE_V8DF_INT:
36698 case V4SF_FTYPE_V4SF_INT:
36699 case V4SF_FTYPE_V8SF_INT:
36700 case V2DI_FTYPE_V2DI_INT:
36701 case V2DF_FTYPE_V2DF_INT:
36702 case V2DF_FTYPE_V4DF_INT:
36703 case V16HI_FTYPE_V16HI_INT:
36704 case V8SI_FTYPE_V8SI_INT:
36705 case V16SI_FTYPE_V16SI_INT:
36706 case V4SI_FTYPE_V16SI_INT:
36707 case V4DI_FTYPE_V4DI_INT:
36708 case V2DI_FTYPE_V4DI_INT:
36709 case V4DI_FTYPE_V8DI_INT:
36710 case HI_FTYPE_HI_INT:
36711 case QI_FTYPE_V4SF_INT:
36712 case QI_FTYPE_V2DF_INT:
36713 nargs = 2;
36714 nargs_constant = 1;
36715 break;
36716 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36717 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36718 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36719 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36720 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36721 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36722 case HI_FTYPE_V16SI_V16SI_HI:
36723 case QI_FTYPE_V8DI_V8DI_QI:
36724 case V16HI_FTYPE_V16SI_V16HI_HI:
36725 case V16QI_FTYPE_V16SI_V16QI_HI:
36726 case V16QI_FTYPE_V8DI_V16QI_QI:
36727 case V16SF_FTYPE_V16SF_V16SF_HI:
36728 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36729 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36730 case V16SF_FTYPE_V16SI_V16SF_HI:
36731 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36732 case V16SF_FTYPE_V4SF_V16SF_HI:
36733 case V16SI_FTYPE_SI_V16SI_HI:
36734 case V16SI_FTYPE_V16HI_V16SI_HI:
36735 case V16SI_FTYPE_V16QI_V16SI_HI:
36736 case V16SI_FTYPE_V16SF_V16SI_HI:
36737 case V8SF_FTYPE_V4SF_V8SF_QI:
36738 case V4DF_FTYPE_V2DF_V4DF_QI:
36739 case V8SI_FTYPE_V4SI_V8SI_QI:
36740 case V8SI_FTYPE_SI_V8SI_QI:
36741 case V4SI_FTYPE_V4SI_V4SI_QI:
36742 case V4SI_FTYPE_SI_V4SI_QI:
36743 case V4DI_FTYPE_V2DI_V4DI_QI:
36744 case V4DI_FTYPE_DI_V4DI_QI:
36745 case V2DI_FTYPE_V2DI_V2DI_QI:
36746 case V2DI_FTYPE_DI_V2DI_QI:
36747 case V64QI_FTYPE_V64QI_V64QI_DI:
36748 case V64QI_FTYPE_V16QI_V64QI_DI:
36749 case V64QI_FTYPE_QI_V64QI_DI:
36750 case V32QI_FTYPE_V32QI_V32QI_SI:
36751 case V32QI_FTYPE_V16QI_V32QI_SI:
36752 case V32QI_FTYPE_QI_V32QI_SI:
36753 case V16QI_FTYPE_V16QI_V16QI_HI:
36754 case V16QI_FTYPE_QI_V16QI_HI:
36755 case V32HI_FTYPE_V8HI_V32HI_SI:
36756 case V32HI_FTYPE_HI_V32HI_SI:
36757 case V16HI_FTYPE_V8HI_V16HI_HI:
36758 case V16HI_FTYPE_HI_V16HI_HI:
36759 case V8HI_FTYPE_V8HI_V8HI_QI:
36760 case V8HI_FTYPE_HI_V8HI_QI:
36761 case V8SF_FTYPE_V8HI_V8SF_QI:
36762 case V4SF_FTYPE_V8HI_V4SF_QI:
36763 case V8SI_FTYPE_V8SF_V8SI_QI:
36764 case V4SI_FTYPE_V4SF_V4SI_QI:
36765 case V8DI_FTYPE_V8SF_V8DI_QI:
36766 case V4DI_FTYPE_V4SF_V4DI_QI:
36767 case V2DI_FTYPE_V4SF_V2DI_QI:
36768 case V8SF_FTYPE_V8DI_V8SF_QI:
36769 case V4SF_FTYPE_V4DI_V4SF_QI:
36770 case V4SF_FTYPE_V2DI_V4SF_QI:
36771 case V8DF_FTYPE_V8DI_V8DF_QI:
36772 case V4DF_FTYPE_V4DI_V4DF_QI:
36773 case V2DF_FTYPE_V2DI_V2DF_QI:
36774 case V16QI_FTYPE_V8HI_V16QI_QI:
36775 case V16QI_FTYPE_V16HI_V16QI_HI:
36776 case V16QI_FTYPE_V4SI_V16QI_QI:
36777 case V16QI_FTYPE_V8SI_V16QI_QI:
36778 case V8HI_FTYPE_V4SI_V8HI_QI:
36779 case V8HI_FTYPE_V8SI_V8HI_QI:
36780 case V16QI_FTYPE_V2DI_V16QI_QI:
36781 case V16QI_FTYPE_V4DI_V16QI_QI:
36782 case V8HI_FTYPE_V2DI_V8HI_QI:
36783 case V8HI_FTYPE_V4DI_V8HI_QI:
36784 case V4SI_FTYPE_V2DI_V4SI_QI:
36785 case V4SI_FTYPE_V4DI_V4SI_QI:
36786 case V32QI_FTYPE_V32HI_V32QI_SI:
36787 case HI_FTYPE_V16QI_V16QI_HI:
36788 case SI_FTYPE_V32QI_V32QI_SI:
36789 case DI_FTYPE_V64QI_V64QI_DI:
36790 case QI_FTYPE_V8HI_V8HI_QI:
36791 case HI_FTYPE_V16HI_V16HI_HI:
36792 case SI_FTYPE_V32HI_V32HI_SI:
36793 case QI_FTYPE_V4SI_V4SI_QI:
36794 case QI_FTYPE_V8SI_V8SI_QI:
36795 case QI_FTYPE_V2DI_V2DI_QI:
36796 case QI_FTYPE_V4DI_V4DI_QI:
36797 case V4SF_FTYPE_V2DF_V4SF_QI:
36798 case V4SF_FTYPE_V4DF_V4SF_QI:
36799 case V16SI_FTYPE_V16SI_V16SI_HI:
36800 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36801 case V16SI_FTYPE_V4SI_V16SI_HI:
36802 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36803 case V2DI_FTYPE_V4SI_V2DI_QI:
36804 case V2DI_FTYPE_V8HI_V2DI_QI:
36805 case V2DI_FTYPE_V16QI_V2DI_QI:
36806 case V4DI_FTYPE_V4DI_V4DI_QI:
36807 case V4DI_FTYPE_V4SI_V4DI_QI:
36808 case V4DI_FTYPE_V8HI_V4DI_QI:
36809 case V4DI_FTYPE_V16QI_V4DI_QI:
36810 case V8DI_FTYPE_V8DF_V8DI_QI:
36811 case V4DI_FTYPE_V4DF_V4DI_QI:
36812 case V2DI_FTYPE_V2DF_V2DI_QI:
36813 case V4SI_FTYPE_V4DF_V4SI_QI:
36814 case V4SI_FTYPE_V2DF_V4SI_QI:
36815 case V4SI_FTYPE_V8HI_V4SI_QI:
36816 case V4SI_FTYPE_V16QI_V4SI_QI:
36817 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36818 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36819 case V8DF_FTYPE_V2DF_V8DF_QI:
36820 case V8DF_FTYPE_V4DF_V8DF_QI:
36821 case V8DF_FTYPE_V8DF_V8DF_QI:
36822 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36823 case V8SF_FTYPE_V8SF_V8SF_QI:
36824 case V8SF_FTYPE_V8SI_V8SF_QI:
36825 case V4DF_FTYPE_V4DF_V4DF_QI:
36826 case V4SF_FTYPE_V4SF_V4SF_QI:
36827 case V2DF_FTYPE_V2DF_V2DF_QI:
36828 case V2DF_FTYPE_V4SF_V2DF_QI:
36829 case V2DF_FTYPE_V4SI_V2DF_QI:
36830 case V4SF_FTYPE_V4SI_V4SF_QI:
36831 case V4DF_FTYPE_V4SF_V4DF_QI:
36832 case V4DF_FTYPE_V4SI_V4DF_QI:
36833 case V8SI_FTYPE_V8SI_V8SI_QI:
36834 case V8SI_FTYPE_V8HI_V8SI_QI:
36835 case V8SI_FTYPE_V16QI_V8SI_QI:
36836 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36837 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36838 case V8DF_FTYPE_V8SF_V8DF_QI:
36839 case V8DF_FTYPE_V8SI_V8DF_QI:
36840 case V8DI_FTYPE_DI_V8DI_QI:
36841 case V16SF_FTYPE_V8SF_V16SF_HI:
36842 case V16SI_FTYPE_V8SI_V16SI_HI:
36843 case V16HI_FTYPE_V16HI_V16HI_HI:
36844 case V8HI_FTYPE_V16QI_V8HI_QI:
36845 case V16HI_FTYPE_V16QI_V16HI_HI:
36846 case V32HI_FTYPE_V32HI_V32HI_SI:
36847 case V32HI_FTYPE_V32QI_V32HI_SI:
36848 case V8DI_FTYPE_V16QI_V8DI_QI:
36849 case V8DI_FTYPE_V2DI_V8DI_QI:
36850 case V8DI_FTYPE_V4DI_V8DI_QI:
36851 case V8DI_FTYPE_V8DI_V8DI_QI:
36852 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36853 case V8DI_FTYPE_V8HI_V8DI_QI:
36854 case V8DI_FTYPE_V8SI_V8DI_QI:
36855 case V8HI_FTYPE_V8DI_V8HI_QI:
36856 case V8SF_FTYPE_V8DF_V8SF_QI:
36857 case V8SI_FTYPE_V8DF_V8SI_QI:
36858 case V8SI_FTYPE_V8DI_V8SI_QI:
36859 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36860 nargs = 3;
36861 break;
36862 case V32QI_FTYPE_V32QI_V32QI_INT:
36863 case V16HI_FTYPE_V16HI_V16HI_INT:
36864 case V16QI_FTYPE_V16QI_V16QI_INT:
36865 case V4DI_FTYPE_V4DI_V4DI_INT:
36866 case V8HI_FTYPE_V8HI_V8HI_INT:
36867 case V8SI_FTYPE_V8SI_V8SI_INT:
36868 case V8SI_FTYPE_V8SI_V4SI_INT:
36869 case V8SF_FTYPE_V8SF_V8SF_INT:
36870 case V8SF_FTYPE_V8SF_V4SF_INT:
36871 case V4SI_FTYPE_V4SI_V4SI_INT:
36872 case V4DF_FTYPE_V4DF_V4DF_INT:
36873 case V16SF_FTYPE_V16SF_V16SF_INT:
36874 case V16SF_FTYPE_V16SF_V4SF_INT:
36875 case V16SI_FTYPE_V16SI_V4SI_INT:
36876 case V4DF_FTYPE_V4DF_V2DF_INT:
36877 case V4SF_FTYPE_V4SF_V4SF_INT:
36878 case V2DI_FTYPE_V2DI_V2DI_INT:
36879 case V4DI_FTYPE_V4DI_V2DI_INT:
36880 case V2DF_FTYPE_V2DF_V2DF_INT:
36881 case QI_FTYPE_V8DI_V8DI_INT:
36882 case QI_FTYPE_V8DF_V8DF_INT:
36883 case QI_FTYPE_V2DF_V2DF_INT:
36884 case QI_FTYPE_V4SF_V4SF_INT:
36885 case HI_FTYPE_V16SI_V16SI_INT:
36886 case HI_FTYPE_V16SF_V16SF_INT:
36887 nargs = 3;
36888 nargs_constant = 1;
36889 break;
36890 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36891 nargs = 3;
36892 rmode = V4DImode;
36893 nargs_constant = 1;
36894 break;
36895 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36896 nargs = 3;
36897 rmode = V2DImode;
36898 nargs_constant = 1;
36899 break;
36900 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36901 nargs = 3;
36902 rmode = DImode;
36903 nargs_constant = 1;
36904 break;
36905 case V2DI_FTYPE_V2DI_UINT_UINT:
36906 nargs = 3;
36907 nargs_constant = 2;
36908 break;
36909 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36910 nargs = 3;
36911 rmode = V8DImode;
36912 nargs_constant = 1;
36913 break;
36914 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36915 nargs = 5;
36916 rmode = V8DImode;
36917 mask_pos = 2;
36918 nargs_constant = 1;
36919 break;
36920 case QI_FTYPE_V8DF_INT_QI:
36921 case QI_FTYPE_V4DF_INT_QI:
36922 case QI_FTYPE_V2DF_INT_QI:
36923 case HI_FTYPE_V16SF_INT_HI:
36924 case QI_FTYPE_V8SF_INT_QI:
36925 case QI_FTYPE_V4SF_INT_QI:
36926 nargs = 3;
36927 mask_pos = 1;
36928 nargs_constant = 1;
36929 break;
36930 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36931 nargs = 5;
36932 rmode = V4DImode;
36933 mask_pos = 2;
36934 nargs_constant = 1;
36935 break;
36936 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36937 nargs = 5;
36938 rmode = V2DImode;
36939 mask_pos = 2;
36940 nargs_constant = 1;
36941 break;
36942 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36943 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36944 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36945 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36946 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36947 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36948 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36949 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36950 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36951 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36952 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36953 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36954 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36955 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36956 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36957 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36958 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36959 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36960 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36961 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36962 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36963 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36964 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36965 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36966 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36967 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36968 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36969 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36970 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36971 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36972 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36973 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36974 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36975 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36976 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36977 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36978 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36979 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36980 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36981 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36982 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36983 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36984 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36985 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36986 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36987 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36988 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36989 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36990 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36991 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36992 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36993 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36994 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36995 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
36996 nargs = 4;
36997 break;
36998 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
36999 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37000 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37001 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37002 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37003 nargs = 4;
37004 nargs_constant = 1;
37005 break;
37006 case QI_FTYPE_V4DI_V4DI_INT_QI:
37007 case QI_FTYPE_V8SI_V8SI_INT_QI:
37008 case QI_FTYPE_V4DF_V4DF_INT_QI:
37009 case QI_FTYPE_V8SF_V8SF_INT_QI:
37010 case QI_FTYPE_V2DI_V2DI_INT_QI:
37011 case QI_FTYPE_V4SI_V4SI_INT_QI:
37012 case QI_FTYPE_V2DF_V2DF_INT_QI:
37013 case QI_FTYPE_V4SF_V4SF_INT_QI:
37014 case DI_FTYPE_V64QI_V64QI_INT_DI:
37015 case SI_FTYPE_V32QI_V32QI_INT_SI:
37016 case HI_FTYPE_V16QI_V16QI_INT_HI:
37017 case SI_FTYPE_V32HI_V32HI_INT_SI:
37018 case HI_FTYPE_V16HI_V16HI_INT_HI:
37019 case QI_FTYPE_V8HI_V8HI_INT_QI:
37020 nargs = 4;
37021 mask_pos = 1;
37022 nargs_constant = 1;
37023 break;
37024 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37025 nargs = 4;
37026 nargs_constant = 2;
37027 break;
37028 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37029 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37030 nargs = 4;
37031 break;
37032 case QI_FTYPE_V8DI_V8DI_INT_QI:
37033 case HI_FTYPE_V16SI_V16SI_INT_HI:
37034 case QI_FTYPE_V8DF_V8DF_INT_QI:
37035 case HI_FTYPE_V16SF_V16SF_INT_HI:
37036 mask_pos = 1;
37037 nargs = 4;
37038 nargs_constant = 1;
37039 break;
37040 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37041 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37042 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37043 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37044 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37045 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37046 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37047 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37048 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37049 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37050 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37051 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37052 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37053 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37054 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37055 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37056 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37057 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37058 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37059 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37060 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37061 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37062 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37063 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37064 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37065 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37066 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37067 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37068 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37069 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37070 nargs = 4;
37071 mask_pos = 2;
37072 nargs_constant = 1;
37073 break;
37074 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37075 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37076 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37077 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37078 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37079 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37080 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37081 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37082 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37083 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37084 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37085 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37086 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37087 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37088 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37089 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37090 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37091 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37092 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37093 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37094 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37095 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37096 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37097 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37098 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37099 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37100 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37101 nargs = 5;
37102 mask_pos = 2;
37103 nargs_constant = 1;
37104 break;
37105 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37106 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37107 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37108 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37109 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37110 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37111 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37112 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37113 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37114 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37115 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37116 nargs = 5;
37117 nargs = 5;
37118 mask_pos = 1;
37119 nargs_constant = 1;
37120 break;
37121
37122 default:
37123 gcc_unreachable ();
37124 }
37125
37126 gcc_assert (nargs <= ARRAY_SIZE (args));
37127
37128 if (comparison != UNKNOWN)
37129 {
37130 gcc_assert (nargs == 2);
37131 return ix86_expand_sse_compare (d, exp, target, swap);
37132 }
37133
37134 if (rmode == VOIDmode || rmode == tmode)
37135 {
37136 if (optimize
37137 || target == 0
37138 || GET_MODE (target) != tmode
37139 || !insn_p->operand[0].predicate (target, tmode))
37140 target = gen_reg_rtx (tmode);
37141 real_target = target;
37142 }
37143 else
37144 {
37145 real_target = gen_reg_rtx (tmode);
37146 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37147 }
37148
37149 for (i = 0; i < nargs; i++)
37150 {
37151 tree arg = CALL_EXPR_ARG (exp, i);
37152 rtx op = expand_normal (arg);
37153 machine_mode mode = insn_p->operand[i + 1].mode;
37154 bool match = insn_p->operand[i + 1].predicate (op, mode);
37155
37156 if (last_arg_count && (i + 1) == nargs)
37157 {
37158 /* SIMD shift insns take either an 8-bit immediate or
37159 register as count. But builtin functions take int as
37160 count. If count doesn't match, we put it in register. */
37161 if (!match)
37162 {
37163 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37164 if (!insn_p->operand[i + 1].predicate (op, mode))
37165 op = copy_to_reg (op);
37166 }
37167 }
37168 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37169 (!mask_pos && (nargs - i) <= nargs_constant))
37170 {
37171 if (!match)
37172 switch (icode)
37173 {
37174 case CODE_FOR_avx_vinsertf128v4di:
37175 case CODE_FOR_avx_vextractf128v4di:
37176 error ("the last argument must be an 1-bit immediate");
37177 return const0_rtx;
37178
37179 case CODE_FOR_avx512f_cmpv8di3_mask:
37180 case CODE_FOR_avx512f_cmpv16si3_mask:
37181 case CODE_FOR_avx512f_ucmpv8di3_mask:
37182 case CODE_FOR_avx512f_ucmpv16si3_mask:
37183 case CODE_FOR_avx512vl_cmpv4di3_mask:
37184 case CODE_FOR_avx512vl_cmpv8si3_mask:
37185 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37186 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37187 case CODE_FOR_avx512vl_cmpv2di3_mask:
37188 case CODE_FOR_avx512vl_cmpv4si3_mask:
37189 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37190 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37191 error ("the last argument must be a 3-bit immediate");
37192 return const0_rtx;
37193
37194 case CODE_FOR_sse4_1_roundsd:
37195 case CODE_FOR_sse4_1_roundss:
37196
37197 case CODE_FOR_sse4_1_roundpd:
37198 case CODE_FOR_sse4_1_roundps:
37199 case CODE_FOR_avx_roundpd256:
37200 case CODE_FOR_avx_roundps256:
37201
37202 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37203 case CODE_FOR_sse4_1_roundps_sfix:
37204 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37205 case CODE_FOR_avx_roundps_sfix256:
37206
37207 case CODE_FOR_sse4_1_blendps:
37208 case CODE_FOR_avx_blendpd256:
37209 case CODE_FOR_avx_vpermilv4df:
37210 case CODE_FOR_avx_vpermilv4df_mask:
37211 case CODE_FOR_avx512f_getmantv8df_mask:
37212 case CODE_FOR_avx512f_getmantv16sf_mask:
37213 case CODE_FOR_avx512vl_getmantv8sf_mask:
37214 case CODE_FOR_avx512vl_getmantv4df_mask:
37215 case CODE_FOR_avx512vl_getmantv4sf_mask:
37216 case CODE_FOR_avx512vl_getmantv2df_mask:
37217 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37218 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37219 case CODE_FOR_avx512dq_rangepv4df_mask:
37220 case CODE_FOR_avx512dq_rangepv8sf_mask:
37221 case CODE_FOR_avx512dq_rangepv2df_mask:
37222 case CODE_FOR_avx512dq_rangepv4sf_mask:
37223 case CODE_FOR_avx_shufpd256_mask:
37224 error ("the last argument must be a 4-bit immediate");
37225 return const0_rtx;
37226
37227 case CODE_FOR_sha1rnds4:
37228 case CODE_FOR_sse4_1_blendpd:
37229 case CODE_FOR_avx_vpermilv2df:
37230 case CODE_FOR_avx_vpermilv2df_mask:
37231 case CODE_FOR_xop_vpermil2v2df3:
37232 case CODE_FOR_xop_vpermil2v4sf3:
37233 case CODE_FOR_xop_vpermil2v4df3:
37234 case CODE_FOR_xop_vpermil2v8sf3:
37235 case CODE_FOR_avx512f_vinsertf32x4_mask:
37236 case CODE_FOR_avx512f_vinserti32x4_mask:
37237 case CODE_FOR_avx512f_vextractf32x4_mask:
37238 case CODE_FOR_avx512f_vextracti32x4_mask:
37239 case CODE_FOR_sse2_shufpd:
37240 case CODE_FOR_sse2_shufpd_mask:
37241 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37242 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37243 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37244 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37245 error ("the last argument must be a 2-bit immediate");
37246 return const0_rtx;
37247
37248 case CODE_FOR_avx_vextractf128v4df:
37249 case CODE_FOR_avx_vextractf128v8sf:
37250 case CODE_FOR_avx_vextractf128v8si:
37251 case CODE_FOR_avx_vinsertf128v4df:
37252 case CODE_FOR_avx_vinsertf128v8sf:
37253 case CODE_FOR_avx_vinsertf128v8si:
37254 case CODE_FOR_avx512f_vinsertf64x4_mask:
37255 case CODE_FOR_avx512f_vinserti64x4_mask:
37256 case CODE_FOR_avx512f_vextractf64x4_mask:
37257 case CODE_FOR_avx512f_vextracti64x4_mask:
37258 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37259 case CODE_FOR_avx512dq_vinserti32x8_mask:
37260 case CODE_FOR_avx512vl_vinsertv4df:
37261 case CODE_FOR_avx512vl_vinsertv4di:
37262 case CODE_FOR_avx512vl_vinsertv8sf:
37263 case CODE_FOR_avx512vl_vinsertv8si:
37264 error ("the last argument must be a 1-bit immediate");
37265 return const0_rtx;
37266
37267 case CODE_FOR_avx_vmcmpv2df3:
37268 case CODE_FOR_avx_vmcmpv4sf3:
37269 case CODE_FOR_avx_cmpv2df3:
37270 case CODE_FOR_avx_cmpv4sf3:
37271 case CODE_FOR_avx_cmpv4df3:
37272 case CODE_FOR_avx_cmpv8sf3:
37273 case CODE_FOR_avx512f_cmpv8df3_mask:
37274 case CODE_FOR_avx512f_cmpv16sf3_mask:
37275 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37276 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37277 error ("the last argument must be a 5-bit immediate");
37278 return const0_rtx;
37279
37280 default:
37281 switch (nargs_constant)
37282 {
37283 case 2:
37284 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37285 (!mask_pos && (nargs - i) == nargs_constant))
37286 {
37287 error ("the next to last argument must be an 8-bit immediate");
37288 break;
37289 }
37290 case 1:
37291 error ("the last argument must be an 8-bit immediate");
37292 break;
37293 default:
37294 gcc_unreachable ();
37295 }
37296 return const0_rtx;
37297 }
37298 }
37299 else
37300 {
37301 if (VECTOR_MODE_P (mode))
37302 op = safe_vector_operand (op, mode);
37303
37304 /* If we aren't optimizing, only allow one memory operand to
37305 be generated. */
37306 if (memory_operand (op, mode))
37307 num_memory++;
37308
37309 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37310 {
37311 if (optimize || !match || num_memory > 1)
37312 op = copy_to_mode_reg (mode, op);
37313 }
37314 else
37315 {
37316 op = copy_to_reg (op);
37317 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37318 }
37319 }
37320
37321 args[i].op = op;
37322 args[i].mode = mode;
37323 }
37324
37325 switch (nargs)
37326 {
37327 case 1:
37328 pat = GEN_FCN (icode) (real_target, args[0].op);
37329 break;
37330 case 2:
37331 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37332 break;
37333 case 3:
37334 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37335 args[2].op);
37336 break;
37337 case 4:
37338 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37339 args[2].op, args[3].op);
37340 break;
37341 case 5:
37342 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37343 args[2].op, args[3].op, args[4].op);
37344 case 6:
37345 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37346 args[2].op, args[3].op, args[4].op,
37347 args[5].op);
37348 break;
37349 default:
37350 gcc_unreachable ();
37351 }
37352
37353 if (! pat)
37354 return 0;
37355
37356 emit_insn (pat);
37357 return target;
37358 }
37359
37360 /* Transform pattern of following layout:
37361 (parallel [
37362 set (A B)
37363 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37364 ])
37365 into:
37366 (set (A B))
37367
37368 Or:
37369 (parallel [ A B
37370 ...
37371 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37372 ...
37373 ])
37374 into:
37375 (parallel [ A B ... ]) */
37376
37377 static rtx
37378 ix86_erase_embedded_rounding (rtx pat)
37379 {
37380 if (GET_CODE (pat) == INSN)
37381 pat = PATTERN (pat);
37382
37383 gcc_assert (GET_CODE (pat) == PARALLEL);
37384
37385 if (XVECLEN (pat, 0) == 2)
37386 {
37387 rtx p0 = XVECEXP (pat, 0, 0);
37388 rtx p1 = XVECEXP (pat, 0, 1);
37389
37390 gcc_assert (GET_CODE (p0) == SET
37391 && GET_CODE (p1) == UNSPEC
37392 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37393
37394 return p0;
37395 }
37396 else
37397 {
37398 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37399 int i = 0;
37400 int j = 0;
37401
37402 for (; i < XVECLEN (pat, 0); ++i)
37403 {
37404 rtx elem = XVECEXP (pat, 0, i);
37405 if (GET_CODE (elem) != UNSPEC
37406 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37407 res [j++] = elem;
37408 }
37409
37410 /* No more than 1 occurence was removed. */
37411 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37412
37413 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37414 }
37415 }
37416
37417 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37418 with rounding. */
37419 static rtx
37420 ix86_expand_sse_comi_round (const struct builtin_description *d,
37421 tree exp, rtx target)
37422 {
37423 rtx pat, set_dst;
37424 tree arg0 = CALL_EXPR_ARG (exp, 0);
37425 tree arg1 = CALL_EXPR_ARG (exp, 1);
37426 tree arg2 = CALL_EXPR_ARG (exp, 2);
37427 tree arg3 = CALL_EXPR_ARG (exp, 3);
37428 rtx op0 = expand_normal (arg0);
37429 rtx op1 = expand_normal (arg1);
37430 rtx op2 = expand_normal (arg2);
37431 rtx op3 = expand_normal (arg3);
37432 enum insn_code icode = d->icode;
37433 const struct insn_data_d *insn_p = &insn_data[icode];
37434 machine_mode mode0 = insn_p->operand[0].mode;
37435 machine_mode mode1 = insn_p->operand[1].mode;
37436 enum rtx_code comparison = UNEQ;
37437 bool need_ucomi = false;
37438
37439 /* See avxintrin.h for values. */
37440 enum rtx_code comi_comparisons[32] =
37441 {
37442 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37443 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37444 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37445 };
37446 bool need_ucomi_values[32] =
37447 {
37448 true, false, false, true, true, false, false, true,
37449 true, false, false, true, true, false, false, true,
37450 false, true, true, false, false, true, true, false,
37451 false, true, true, false, false, true, true, false
37452 };
37453
37454 if (!CONST_INT_P (op2))
37455 {
37456 error ("the third argument must be comparison constant");
37457 return const0_rtx;
37458 }
37459 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37460 {
37461 error ("incorect comparison mode");
37462 return const0_rtx;
37463 }
37464
37465 if (!insn_p->operand[2].predicate (op3, SImode))
37466 {
37467 error ("incorrect rounding operand");
37468 return const0_rtx;
37469 }
37470
37471 comparison = comi_comparisons[INTVAL (op2)];
37472 need_ucomi = need_ucomi_values[INTVAL (op2)];
37473
37474 if (VECTOR_MODE_P (mode0))
37475 op0 = safe_vector_operand (op0, mode0);
37476 if (VECTOR_MODE_P (mode1))
37477 op1 = safe_vector_operand (op1, mode1);
37478
37479 target = gen_reg_rtx (SImode);
37480 emit_move_insn (target, const0_rtx);
37481 target = gen_rtx_SUBREG (QImode, target, 0);
37482
37483 if ((optimize && !register_operand (op0, mode0))
37484 || !insn_p->operand[0].predicate (op0, mode0))
37485 op0 = copy_to_mode_reg (mode0, op0);
37486 if ((optimize && !register_operand (op1, mode1))
37487 || !insn_p->operand[1].predicate (op1, mode1))
37488 op1 = copy_to_mode_reg (mode1, op1);
37489
37490 if (need_ucomi)
37491 icode = icode == CODE_FOR_sse_comi_round
37492 ? CODE_FOR_sse_ucomi_round
37493 : CODE_FOR_sse2_ucomi_round;
37494
37495 pat = GEN_FCN (icode) (op0, op1, op3);
37496 if (! pat)
37497 return 0;
37498
37499 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37500 if (INTVAL (op3) == NO_ROUND)
37501 {
37502 pat = ix86_erase_embedded_rounding (pat);
37503 if (! pat)
37504 return 0;
37505
37506 set_dst = SET_DEST (pat);
37507 }
37508 else
37509 {
37510 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37511 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37512 }
37513
37514 emit_insn (pat);
37515 emit_insn (gen_rtx_SET (VOIDmode,
37516 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37517 gen_rtx_fmt_ee (comparison, QImode,
37518 set_dst,
37519 const0_rtx)));
37520
37521 return SUBREG_REG (target);
37522 }
37523
37524 static rtx
37525 ix86_expand_round_builtin (const struct builtin_description *d,
37526 tree exp, rtx target)
37527 {
37528 rtx pat;
37529 unsigned int i, nargs;
37530 struct
37531 {
37532 rtx op;
37533 machine_mode mode;
37534 } args[6];
37535 enum insn_code icode = d->icode;
37536 const struct insn_data_d *insn_p = &insn_data[icode];
37537 machine_mode tmode = insn_p->operand[0].mode;
37538 unsigned int nargs_constant = 0;
37539 unsigned int redundant_embed_rnd = 0;
37540
37541 switch ((enum ix86_builtin_func_type) d->flag)
37542 {
37543 case UINT64_FTYPE_V2DF_INT:
37544 case UINT64_FTYPE_V4SF_INT:
37545 case UINT_FTYPE_V2DF_INT:
37546 case UINT_FTYPE_V4SF_INT:
37547 case INT64_FTYPE_V2DF_INT:
37548 case INT64_FTYPE_V4SF_INT:
37549 case INT_FTYPE_V2DF_INT:
37550 case INT_FTYPE_V4SF_INT:
37551 nargs = 2;
37552 break;
37553 case V4SF_FTYPE_V4SF_UINT_INT:
37554 case V4SF_FTYPE_V4SF_UINT64_INT:
37555 case V2DF_FTYPE_V2DF_UINT64_INT:
37556 case V4SF_FTYPE_V4SF_INT_INT:
37557 case V4SF_FTYPE_V4SF_INT64_INT:
37558 case V2DF_FTYPE_V2DF_INT64_INT:
37559 case V4SF_FTYPE_V4SF_V4SF_INT:
37560 case V2DF_FTYPE_V2DF_V2DF_INT:
37561 case V4SF_FTYPE_V4SF_V2DF_INT:
37562 case V2DF_FTYPE_V2DF_V4SF_INT:
37563 nargs = 3;
37564 break;
37565 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37566 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37567 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37568 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37569 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37570 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37571 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37572 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37573 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37574 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37575 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37576 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37577 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37578 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37579 nargs = 4;
37580 break;
37581 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37582 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37583 nargs_constant = 2;
37584 nargs = 4;
37585 break;
37586 case INT_FTYPE_V4SF_V4SF_INT_INT:
37587 case INT_FTYPE_V2DF_V2DF_INT_INT:
37588 return ix86_expand_sse_comi_round (d, exp, target);
37589 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37590 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37591 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37592 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37593 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37594 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37595 nargs = 5;
37596 break;
37597 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37598 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37599 nargs_constant = 4;
37600 nargs = 5;
37601 break;
37602 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37603 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37604 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37605 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37606 nargs_constant = 3;
37607 nargs = 5;
37608 break;
37609 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37610 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37611 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37612 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37613 nargs = 6;
37614 nargs_constant = 4;
37615 break;
37616 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37617 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37618 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37619 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37620 nargs = 6;
37621 nargs_constant = 3;
37622 break;
37623 default:
37624 gcc_unreachable ();
37625 }
37626 gcc_assert (nargs <= ARRAY_SIZE (args));
37627
37628 if (optimize
37629 || target == 0
37630 || GET_MODE (target) != tmode
37631 || !insn_p->operand[0].predicate (target, tmode))
37632 target = gen_reg_rtx (tmode);
37633
37634 for (i = 0; i < nargs; i++)
37635 {
37636 tree arg = CALL_EXPR_ARG (exp, i);
37637 rtx op = expand_normal (arg);
37638 machine_mode mode = insn_p->operand[i + 1].mode;
37639 bool match = insn_p->operand[i + 1].predicate (op, mode);
37640
37641 if (i == nargs - nargs_constant)
37642 {
37643 if (!match)
37644 {
37645 switch (icode)
37646 {
37647 case CODE_FOR_avx512f_getmantv8df_mask_round:
37648 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37649 case CODE_FOR_avx512f_vgetmantv2df_round:
37650 case CODE_FOR_avx512f_vgetmantv4sf_round:
37651 error ("the immediate argument must be a 4-bit immediate");
37652 return const0_rtx;
37653 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37654 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37655 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37656 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37657 error ("the immediate argument must be a 5-bit immediate");
37658 return const0_rtx;
37659 default:
37660 error ("the immediate argument must be an 8-bit immediate");
37661 return const0_rtx;
37662 }
37663 }
37664 }
37665 else if (i == nargs-1)
37666 {
37667 if (!insn_p->operand[nargs].predicate (op, SImode))
37668 {
37669 error ("incorrect rounding operand");
37670 return const0_rtx;
37671 }
37672
37673 /* If there is no rounding use normal version of the pattern. */
37674 if (INTVAL (op) == NO_ROUND)
37675 redundant_embed_rnd = 1;
37676 }
37677 else
37678 {
37679 if (VECTOR_MODE_P (mode))
37680 op = safe_vector_operand (op, mode);
37681
37682 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37683 {
37684 if (optimize || !match)
37685 op = copy_to_mode_reg (mode, op);
37686 }
37687 else
37688 {
37689 op = copy_to_reg (op);
37690 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37691 }
37692 }
37693
37694 args[i].op = op;
37695 args[i].mode = mode;
37696 }
37697
37698 switch (nargs)
37699 {
37700 case 1:
37701 pat = GEN_FCN (icode) (target, args[0].op);
37702 break;
37703 case 2:
37704 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37705 break;
37706 case 3:
37707 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37708 args[2].op);
37709 break;
37710 case 4:
37711 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37712 args[2].op, args[3].op);
37713 break;
37714 case 5:
37715 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37716 args[2].op, args[3].op, args[4].op);
37717 case 6:
37718 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37719 args[2].op, args[3].op, args[4].op,
37720 args[5].op);
37721 break;
37722 default:
37723 gcc_unreachable ();
37724 }
37725
37726 if (!pat)
37727 return 0;
37728
37729 if (redundant_embed_rnd)
37730 pat = ix86_erase_embedded_rounding (pat);
37731
37732 emit_insn (pat);
37733 return target;
37734 }
37735
37736 /* Subroutine of ix86_expand_builtin to take care of special insns
37737 with variable number of operands. */
37738
37739 static rtx
37740 ix86_expand_special_args_builtin (const struct builtin_description *d,
37741 tree exp, rtx target)
37742 {
37743 tree arg;
37744 rtx pat, op;
37745 unsigned int i, nargs, arg_adjust, memory;
37746 bool aligned_mem = false;
37747 struct
37748 {
37749 rtx op;
37750 machine_mode mode;
37751 } args[3];
37752 enum insn_code icode = d->icode;
37753 bool last_arg_constant = false;
37754 const struct insn_data_d *insn_p = &insn_data[icode];
37755 machine_mode tmode = insn_p->operand[0].mode;
37756 enum { load, store } klass;
37757
37758 switch ((enum ix86_builtin_func_type) d->flag)
37759 {
37760 case VOID_FTYPE_VOID:
37761 emit_insn (GEN_FCN (icode) (target));
37762 return 0;
37763 case VOID_FTYPE_UINT64:
37764 case VOID_FTYPE_UNSIGNED:
37765 nargs = 0;
37766 klass = store;
37767 memory = 0;
37768 break;
37769
37770 case INT_FTYPE_VOID:
37771 case USHORT_FTYPE_VOID:
37772 case UINT64_FTYPE_VOID:
37773 case UNSIGNED_FTYPE_VOID:
37774 nargs = 0;
37775 klass = load;
37776 memory = 0;
37777 break;
37778 case UINT64_FTYPE_PUNSIGNED:
37779 case V2DI_FTYPE_PV2DI:
37780 case V4DI_FTYPE_PV4DI:
37781 case V32QI_FTYPE_PCCHAR:
37782 case V16QI_FTYPE_PCCHAR:
37783 case V8SF_FTYPE_PCV4SF:
37784 case V8SF_FTYPE_PCFLOAT:
37785 case V4SF_FTYPE_PCFLOAT:
37786 case V4DF_FTYPE_PCV2DF:
37787 case V4DF_FTYPE_PCDOUBLE:
37788 case V2DF_FTYPE_PCDOUBLE:
37789 case VOID_FTYPE_PVOID:
37790 case V16SI_FTYPE_PV4SI:
37791 case V16SF_FTYPE_PV4SF:
37792 case V8DI_FTYPE_PV4DI:
37793 case V8DI_FTYPE_PV8DI:
37794 case V8DF_FTYPE_PV4DF:
37795 nargs = 1;
37796 klass = load;
37797 memory = 0;
37798 switch (icode)
37799 {
37800 case CODE_FOR_sse4_1_movntdqa:
37801 case CODE_FOR_avx2_movntdqa:
37802 case CODE_FOR_avx512f_movntdqa:
37803 aligned_mem = true;
37804 break;
37805 default:
37806 break;
37807 }
37808 break;
37809 case VOID_FTYPE_PV2SF_V4SF:
37810 case VOID_FTYPE_PV8DI_V8DI:
37811 case VOID_FTYPE_PV4DI_V4DI:
37812 case VOID_FTYPE_PV2DI_V2DI:
37813 case VOID_FTYPE_PCHAR_V32QI:
37814 case VOID_FTYPE_PCHAR_V16QI:
37815 case VOID_FTYPE_PFLOAT_V16SF:
37816 case VOID_FTYPE_PFLOAT_V8SF:
37817 case VOID_FTYPE_PFLOAT_V4SF:
37818 case VOID_FTYPE_PDOUBLE_V8DF:
37819 case VOID_FTYPE_PDOUBLE_V4DF:
37820 case VOID_FTYPE_PDOUBLE_V2DF:
37821 case VOID_FTYPE_PLONGLONG_LONGLONG:
37822 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37823 case VOID_FTYPE_PINT_INT:
37824 nargs = 1;
37825 klass = store;
37826 /* Reserve memory operand for target. */
37827 memory = ARRAY_SIZE (args);
37828 switch (icode)
37829 {
37830 /* These builtins and instructions require the memory
37831 to be properly aligned. */
37832 case CODE_FOR_avx_movntv4di:
37833 case CODE_FOR_sse2_movntv2di:
37834 case CODE_FOR_avx_movntv8sf:
37835 case CODE_FOR_sse_movntv4sf:
37836 case CODE_FOR_sse4a_vmmovntv4sf:
37837 case CODE_FOR_avx_movntv4df:
37838 case CODE_FOR_sse2_movntv2df:
37839 case CODE_FOR_sse4a_vmmovntv2df:
37840 case CODE_FOR_sse2_movntidi:
37841 case CODE_FOR_sse_movntq:
37842 case CODE_FOR_sse2_movntisi:
37843 case CODE_FOR_avx512f_movntv16sf:
37844 case CODE_FOR_avx512f_movntv8df:
37845 case CODE_FOR_avx512f_movntv8di:
37846 aligned_mem = true;
37847 break;
37848 default:
37849 break;
37850 }
37851 break;
37852 case V4SF_FTYPE_V4SF_PCV2SF:
37853 case V2DF_FTYPE_V2DF_PCDOUBLE:
37854 nargs = 2;
37855 klass = load;
37856 memory = 1;
37857 break;
37858 case V8SF_FTYPE_PCV8SF_V8SI:
37859 case V4DF_FTYPE_PCV4DF_V4DI:
37860 case V4SF_FTYPE_PCV4SF_V4SI:
37861 case V2DF_FTYPE_PCV2DF_V2DI:
37862 case V8SI_FTYPE_PCV8SI_V8SI:
37863 case V4DI_FTYPE_PCV4DI_V4DI:
37864 case V4SI_FTYPE_PCV4SI_V4SI:
37865 case V2DI_FTYPE_PCV2DI_V2DI:
37866 nargs = 2;
37867 klass = load;
37868 memory = 0;
37869 break;
37870 case VOID_FTYPE_PV8DF_V8DF_QI:
37871 case VOID_FTYPE_PV16SF_V16SF_HI:
37872 case VOID_FTYPE_PV8DI_V8DI_QI:
37873 case VOID_FTYPE_PV4DI_V4DI_QI:
37874 case VOID_FTYPE_PV2DI_V2DI_QI:
37875 case VOID_FTYPE_PV16SI_V16SI_HI:
37876 case VOID_FTYPE_PV8SI_V8SI_QI:
37877 case VOID_FTYPE_PV4SI_V4SI_QI:
37878 switch (icode)
37879 {
37880 /* These builtins and instructions require the memory
37881 to be properly aligned. */
37882 case CODE_FOR_avx512f_storev16sf_mask:
37883 case CODE_FOR_avx512f_storev16si_mask:
37884 case CODE_FOR_avx512f_storev8df_mask:
37885 case CODE_FOR_avx512f_storev8di_mask:
37886 case CODE_FOR_avx512vl_storev8sf_mask:
37887 case CODE_FOR_avx512vl_storev8si_mask:
37888 case CODE_FOR_avx512vl_storev4df_mask:
37889 case CODE_FOR_avx512vl_storev4di_mask:
37890 case CODE_FOR_avx512vl_storev4sf_mask:
37891 case CODE_FOR_avx512vl_storev4si_mask:
37892 case CODE_FOR_avx512vl_storev2df_mask:
37893 case CODE_FOR_avx512vl_storev2di_mask:
37894 aligned_mem = true;
37895 break;
37896 default:
37897 break;
37898 }
37899 /* FALLTHRU */
37900 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37901 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37902 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37903 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37904 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37905 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37906 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37907 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37908 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37909 case VOID_FTYPE_PFLOAT_V4SF_QI:
37910 case VOID_FTYPE_PV8SI_V8DI_QI:
37911 case VOID_FTYPE_PV8HI_V8DI_QI:
37912 case VOID_FTYPE_PV16HI_V16SI_HI:
37913 case VOID_FTYPE_PV16QI_V8DI_QI:
37914 case VOID_FTYPE_PV16QI_V16SI_HI:
37915 case VOID_FTYPE_PV4SI_V4DI_QI:
37916 case VOID_FTYPE_PV4SI_V2DI_QI:
37917 case VOID_FTYPE_PV8HI_V4DI_QI:
37918 case VOID_FTYPE_PV8HI_V2DI_QI:
37919 case VOID_FTYPE_PV8HI_V8SI_QI:
37920 case VOID_FTYPE_PV8HI_V4SI_QI:
37921 case VOID_FTYPE_PV16QI_V4DI_QI:
37922 case VOID_FTYPE_PV16QI_V2DI_QI:
37923 case VOID_FTYPE_PV16QI_V8SI_QI:
37924 case VOID_FTYPE_PV16QI_V4SI_QI:
37925 case VOID_FTYPE_PV8HI_V8HI_QI:
37926 case VOID_FTYPE_PV16HI_V16HI_HI:
37927 case VOID_FTYPE_PV32HI_V32HI_SI:
37928 case VOID_FTYPE_PV16QI_V16QI_HI:
37929 case VOID_FTYPE_PV32QI_V32QI_SI:
37930 case VOID_FTYPE_PV64QI_V64QI_DI:
37931 case VOID_FTYPE_PV4DF_V4DF_QI:
37932 case VOID_FTYPE_PV2DF_V2DF_QI:
37933 case VOID_FTYPE_PV8SF_V8SF_QI:
37934 case VOID_FTYPE_PV4SF_V4SF_QI:
37935 nargs = 2;
37936 klass = store;
37937 /* Reserve memory operand for target. */
37938 memory = ARRAY_SIZE (args);
37939 break;
37940 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37941 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37942 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37943 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37944 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37945 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37946 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37947 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37948 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37949 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37950 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37951 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37952 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37953 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37954 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37955 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37956 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37957 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37958 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37959 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37960 nargs = 3;
37961 klass = load;
37962 memory = 0;
37963 switch (icode)
37964 {
37965 /* These builtins and instructions require the memory
37966 to be properly aligned. */
37967 case CODE_FOR_avx512f_loadv16sf_mask:
37968 case CODE_FOR_avx512f_loadv16si_mask:
37969 case CODE_FOR_avx512f_loadv8df_mask:
37970 case CODE_FOR_avx512f_loadv8di_mask:
37971 case CODE_FOR_avx512vl_loadv8sf_mask:
37972 case CODE_FOR_avx512vl_loadv8si_mask:
37973 case CODE_FOR_avx512vl_loadv4df_mask:
37974 case CODE_FOR_avx512vl_loadv4di_mask:
37975 case CODE_FOR_avx512vl_loadv4sf_mask:
37976 case CODE_FOR_avx512vl_loadv4si_mask:
37977 case CODE_FOR_avx512vl_loadv2df_mask:
37978 case CODE_FOR_avx512vl_loadv2di_mask:
37979 case CODE_FOR_avx512bw_loadv64qi_mask:
37980 case CODE_FOR_avx512vl_loadv32qi_mask:
37981 case CODE_FOR_avx512vl_loadv16qi_mask:
37982 case CODE_FOR_avx512bw_loadv32hi_mask:
37983 case CODE_FOR_avx512vl_loadv16hi_mask:
37984 case CODE_FOR_avx512vl_loadv8hi_mask:
37985 aligned_mem = true;
37986 break;
37987 default:
37988 break;
37989 }
37990 break;
37991 case VOID_FTYPE_UINT_UINT_UINT:
37992 case VOID_FTYPE_UINT64_UINT_UINT:
37993 case UCHAR_FTYPE_UINT_UINT_UINT:
37994 case UCHAR_FTYPE_UINT64_UINT_UINT:
37995 nargs = 3;
37996 klass = load;
37997 memory = ARRAY_SIZE (args);
37998 last_arg_constant = true;
37999 break;
38000 default:
38001 gcc_unreachable ();
38002 }
38003
38004 gcc_assert (nargs <= ARRAY_SIZE (args));
38005
38006 if (klass == store)
38007 {
38008 arg = CALL_EXPR_ARG (exp, 0);
38009 op = expand_normal (arg);
38010 gcc_assert (target == 0);
38011 if (memory)
38012 {
38013 op = ix86_zero_extend_to_Pmode (op);
38014 target = gen_rtx_MEM (tmode, op);
38015 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38016 on it. Try to improve it using get_pointer_alignment,
38017 and if the special builtin is one that requires strict
38018 mode alignment, also from it's GET_MODE_ALIGNMENT.
38019 Failure to do so could lead to ix86_legitimate_combined_insn
38020 rejecting all changes to such insns. */
38021 unsigned int align = get_pointer_alignment (arg);
38022 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38023 align = GET_MODE_ALIGNMENT (tmode);
38024 if (MEM_ALIGN (target) < align)
38025 set_mem_align (target, align);
38026 }
38027 else
38028 target = force_reg (tmode, op);
38029 arg_adjust = 1;
38030 }
38031 else
38032 {
38033 arg_adjust = 0;
38034 if (optimize
38035 || target == 0
38036 || !register_operand (target, tmode)
38037 || GET_MODE (target) != tmode)
38038 target = gen_reg_rtx (tmode);
38039 }
38040
38041 for (i = 0; i < nargs; i++)
38042 {
38043 machine_mode mode = insn_p->operand[i + 1].mode;
38044 bool match;
38045
38046 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38047 op = expand_normal (arg);
38048 match = insn_p->operand[i + 1].predicate (op, mode);
38049
38050 if (last_arg_constant && (i + 1) == nargs)
38051 {
38052 if (!match)
38053 {
38054 if (icode == CODE_FOR_lwp_lwpvalsi3
38055 || icode == CODE_FOR_lwp_lwpinssi3
38056 || icode == CODE_FOR_lwp_lwpvaldi3
38057 || icode == CODE_FOR_lwp_lwpinsdi3)
38058 error ("the last argument must be a 32-bit immediate");
38059 else
38060 error ("the last argument must be an 8-bit immediate");
38061 return const0_rtx;
38062 }
38063 }
38064 else
38065 {
38066 if (i == memory)
38067 {
38068 /* This must be the memory operand. */
38069 op = ix86_zero_extend_to_Pmode (op);
38070 op = gen_rtx_MEM (mode, op);
38071 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38072 on it. Try to improve it using get_pointer_alignment,
38073 and if the special builtin is one that requires strict
38074 mode alignment, also from it's GET_MODE_ALIGNMENT.
38075 Failure to do so could lead to ix86_legitimate_combined_insn
38076 rejecting all changes to such insns. */
38077 unsigned int align = get_pointer_alignment (arg);
38078 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38079 align = GET_MODE_ALIGNMENT (mode);
38080 if (MEM_ALIGN (op) < align)
38081 set_mem_align (op, align);
38082 }
38083 else
38084 {
38085 /* This must be register. */
38086 if (VECTOR_MODE_P (mode))
38087 op = safe_vector_operand (op, mode);
38088
38089 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38090 op = copy_to_mode_reg (mode, op);
38091 else
38092 {
38093 op = copy_to_reg (op);
38094 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38095 }
38096 }
38097 }
38098
38099 args[i].op = op;
38100 args[i].mode = mode;
38101 }
38102
38103 switch (nargs)
38104 {
38105 case 0:
38106 pat = GEN_FCN (icode) (target);
38107 break;
38108 case 1:
38109 pat = GEN_FCN (icode) (target, args[0].op);
38110 break;
38111 case 2:
38112 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38113 break;
38114 case 3:
38115 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38116 break;
38117 default:
38118 gcc_unreachable ();
38119 }
38120
38121 if (! pat)
38122 return 0;
38123 emit_insn (pat);
38124 return klass == store ? 0 : target;
38125 }
38126
38127 /* Return the integer constant in ARG. Constrain it to be in the range
38128 of the subparts of VEC_TYPE; issue an error if not. */
38129
38130 static int
38131 get_element_number (tree vec_type, tree arg)
38132 {
38133 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38134
38135 if (!tree_fits_uhwi_p (arg)
38136 || (elt = tree_to_uhwi (arg), elt > max))
38137 {
38138 error ("selector must be an integer constant in the range 0..%wi", max);
38139 return 0;
38140 }
38141
38142 return elt;
38143 }
38144
38145 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38146 ix86_expand_vector_init. We DO have language-level syntax for this, in
38147 the form of (type){ init-list }. Except that since we can't place emms
38148 instructions from inside the compiler, we can't allow the use of MMX
38149 registers unless the user explicitly asks for it. So we do *not* define
38150 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38151 we have builtins invoked by mmintrin.h that gives us license to emit
38152 these sorts of instructions. */
38153
38154 static rtx
38155 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38156 {
38157 machine_mode tmode = TYPE_MODE (type);
38158 machine_mode inner_mode = GET_MODE_INNER (tmode);
38159 int i, n_elt = GET_MODE_NUNITS (tmode);
38160 rtvec v = rtvec_alloc (n_elt);
38161
38162 gcc_assert (VECTOR_MODE_P (tmode));
38163 gcc_assert (call_expr_nargs (exp) == n_elt);
38164
38165 for (i = 0; i < n_elt; ++i)
38166 {
38167 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38168 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38169 }
38170
38171 if (!target || !register_operand (target, tmode))
38172 target = gen_reg_rtx (tmode);
38173
38174 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38175 return target;
38176 }
38177
38178 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38179 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38180 had a language-level syntax for referencing vector elements. */
38181
38182 static rtx
38183 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38184 {
38185 machine_mode tmode, mode0;
38186 tree arg0, arg1;
38187 int elt;
38188 rtx op0;
38189
38190 arg0 = CALL_EXPR_ARG (exp, 0);
38191 arg1 = CALL_EXPR_ARG (exp, 1);
38192
38193 op0 = expand_normal (arg0);
38194 elt = get_element_number (TREE_TYPE (arg0), arg1);
38195
38196 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38197 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38198 gcc_assert (VECTOR_MODE_P (mode0));
38199
38200 op0 = force_reg (mode0, op0);
38201
38202 if (optimize || !target || !register_operand (target, tmode))
38203 target = gen_reg_rtx (tmode);
38204
38205 ix86_expand_vector_extract (true, target, op0, elt);
38206
38207 return target;
38208 }
38209
38210 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38211 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38212 a language-level syntax for referencing vector elements. */
38213
38214 static rtx
38215 ix86_expand_vec_set_builtin (tree exp)
38216 {
38217 machine_mode tmode, mode1;
38218 tree arg0, arg1, arg2;
38219 int elt;
38220 rtx op0, op1, target;
38221
38222 arg0 = CALL_EXPR_ARG (exp, 0);
38223 arg1 = CALL_EXPR_ARG (exp, 1);
38224 arg2 = CALL_EXPR_ARG (exp, 2);
38225
38226 tmode = TYPE_MODE (TREE_TYPE (arg0));
38227 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38228 gcc_assert (VECTOR_MODE_P (tmode));
38229
38230 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38231 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38232 elt = get_element_number (TREE_TYPE (arg0), arg2);
38233
38234 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38235 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38236
38237 op0 = force_reg (tmode, op0);
38238 op1 = force_reg (mode1, op1);
38239
38240 /* OP0 is the source of these builtin functions and shouldn't be
38241 modified. Create a copy, use it and return it as target. */
38242 target = gen_reg_rtx (tmode);
38243 emit_move_insn (target, op0);
38244 ix86_expand_vector_set (true, target, op1, elt);
38245
38246 return target;
38247 }
38248
38249 /* Emit conditional move of SRC to DST with condition
38250 OP1 CODE OP2. */
38251 static void
38252 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38253 {
38254 rtx t;
38255
38256 if (TARGET_CMOVE)
38257 {
38258 t = ix86_expand_compare (code, op1, op2);
38259 emit_insn (gen_rtx_SET (VOIDmode, dst,
38260 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38261 src, dst)));
38262 }
38263 else
38264 {
38265 rtx nomove = gen_label_rtx ();
38266 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38267 const0_rtx, GET_MODE (op1), 1, nomove);
38268 emit_move_insn (dst, src);
38269 emit_label (nomove);
38270 }
38271 }
38272
38273 /* Choose max of DST and SRC and put it to DST. */
38274 static void
38275 ix86_emit_move_max (rtx dst, rtx src)
38276 {
38277 ix86_emit_cmove (dst, src, LTU, dst, src);
38278 }
38279
38280 /* Expand an expression EXP that calls a built-in function,
38281 with result going to TARGET if that's convenient
38282 (and in mode MODE if that's convenient).
38283 SUBTARGET may be used as the target for computing one of EXP's operands.
38284 IGNORE is nonzero if the value is to be ignored. */
38285
38286 static rtx
38287 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38288 machine_mode mode, int ignore)
38289 {
38290 const struct builtin_description *d;
38291 size_t i;
38292 enum insn_code icode;
38293 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38294 tree arg0, arg1, arg2, arg3, arg4;
38295 rtx op0, op1, op2, op3, op4, pat, insn;
38296 machine_mode mode0, mode1, mode2, mode3, mode4;
38297 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38298
38299 /* For CPU builtins that can be folded, fold first and expand the fold. */
38300 switch (fcode)
38301 {
38302 case IX86_BUILTIN_CPU_INIT:
38303 {
38304 /* Make it call __cpu_indicator_init in libgcc. */
38305 tree call_expr, fndecl, type;
38306 type = build_function_type_list (integer_type_node, NULL_TREE);
38307 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38308 call_expr = build_call_expr (fndecl, 0);
38309 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38310 }
38311 case IX86_BUILTIN_CPU_IS:
38312 case IX86_BUILTIN_CPU_SUPPORTS:
38313 {
38314 tree arg0 = CALL_EXPR_ARG (exp, 0);
38315 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38316 gcc_assert (fold_expr != NULL_TREE);
38317 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38318 }
38319 }
38320
38321 /* Determine whether the builtin function is available under the current ISA.
38322 Originally the builtin was not created if it wasn't applicable to the
38323 current ISA based on the command line switches. With function specific
38324 options, we need to check in the context of the function making the call
38325 whether it is supported. */
38326 if (ix86_builtins_isa[fcode].isa
38327 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38328 {
38329 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38330 NULL, (enum fpmath_unit) 0, false);
38331
38332 if (!opts)
38333 error ("%qE needs unknown isa option", fndecl);
38334 else
38335 {
38336 gcc_assert (opts != NULL);
38337 error ("%qE needs isa option %s", fndecl, opts);
38338 free (opts);
38339 }
38340 return const0_rtx;
38341 }
38342
38343 switch (fcode)
38344 {
38345 case IX86_BUILTIN_BNDMK:
38346 if (!target
38347 || GET_MODE (target) != BNDmode
38348 || !register_operand (target, BNDmode))
38349 target = gen_reg_rtx (BNDmode);
38350
38351 arg0 = CALL_EXPR_ARG (exp, 0);
38352 arg1 = CALL_EXPR_ARG (exp, 1);
38353
38354 op0 = expand_normal (arg0);
38355 op1 = expand_normal (arg1);
38356
38357 if (!register_operand (op0, Pmode))
38358 op0 = ix86_zero_extend_to_Pmode (op0);
38359 if (!register_operand (op1, Pmode))
38360 op1 = ix86_zero_extend_to_Pmode (op1);
38361
38362 /* Builtin arg1 is size of block but instruction op1 should
38363 be (size - 1). */
38364 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38365 NULL_RTX, 1, OPTAB_DIRECT);
38366
38367 emit_insn (BNDmode == BND64mode
38368 ? gen_bnd64_mk (target, op0, op1)
38369 : gen_bnd32_mk (target, op0, op1));
38370 return target;
38371
38372 case IX86_BUILTIN_BNDSTX:
38373 arg0 = CALL_EXPR_ARG (exp, 0);
38374 arg1 = CALL_EXPR_ARG (exp, 1);
38375 arg2 = CALL_EXPR_ARG (exp, 2);
38376
38377 op0 = expand_normal (arg0);
38378 op1 = expand_normal (arg1);
38379 op2 = expand_normal (arg2);
38380
38381 if (!register_operand (op0, Pmode))
38382 op0 = ix86_zero_extend_to_Pmode (op0);
38383 if (!register_operand (op1, BNDmode))
38384 op1 = copy_to_mode_reg (BNDmode, op1);
38385 if (!register_operand (op2, Pmode))
38386 op2 = ix86_zero_extend_to_Pmode (op2);
38387
38388 emit_insn (BNDmode == BND64mode
38389 ? gen_bnd64_stx (op2, op0, op1)
38390 : gen_bnd32_stx (op2, op0, op1));
38391 return 0;
38392
38393 case IX86_BUILTIN_BNDLDX:
38394 if (!target
38395 || GET_MODE (target) != BNDmode
38396 || !register_operand (target, BNDmode))
38397 target = gen_reg_rtx (BNDmode);
38398
38399 arg0 = CALL_EXPR_ARG (exp, 0);
38400 arg1 = CALL_EXPR_ARG (exp, 1);
38401
38402 op0 = expand_normal (arg0);
38403 op1 = expand_normal (arg1);
38404
38405 if (!register_operand (op0, Pmode))
38406 op0 = ix86_zero_extend_to_Pmode (op0);
38407 if (!register_operand (op1, Pmode))
38408 op1 = ix86_zero_extend_to_Pmode (op1);
38409
38410 emit_insn (BNDmode == BND64mode
38411 ? gen_bnd64_ldx (target, op0, op1)
38412 : gen_bnd32_ldx (target, op0, op1));
38413 return target;
38414
38415 case IX86_BUILTIN_BNDCL:
38416 arg0 = CALL_EXPR_ARG (exp, 0);
38417 arg1 = CALL_EXPR_ARG (exp, 1);
38418
38419 op0 = expand_normal (arg0);
38420 op1 = expand_normal (arg1);
38421
38422 if (!register_operand (op0, Pmode))
38423 op0 = ix86_zero_extend_to_Pmode (op0);
38424 if (!register_operand (op1, BNDmode))
38425 op1 = copy_to_mode_reg (BNDmode, op1);
38426
38427 emit_insn (BNDmode == BND64mode
38428 ? gen_bnd64_cl (op1, op0)
38429 : gen_bnd32_cl (op1, op0));
38430 return 0;
38431
38432 case IX86_BUILTIN_BNDCU:
38433 arg0 = CALL_EXPR_ARG (exp, 0);
38434 arg1 = CALL_EXPR_ARG (exp, 1);
38435
38436 op0 = expand_normal (arg0);
38437 op1 = expand_normal (arg1);
38438
38439 if (!register_operand (op0, Pmode))
38440 op0 = ix86_zero_extend_to_Pmode (op0);
38441 if (!register_operand (op1, BNDmode))
38442 op1 = copy_to_mode_reg (BNDmode, op1);
38443
38444 emit_insn (BNDmode == BND64mode
38445 ? gen_bnd64_cu (op1, op0)
38446 : gen_bnd32_cu (op1, op0));
38447 return 0;
38448
38449 case IX86_BUILTIN_BNDRET:
38450 arg0 = CALL_EXPR_ARG (exp, 0);
38451 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38452 target = chkp_get_rtl_bounds (arg0);
38453
38454 /* If no bounds were specified for returned value,
38455 then use INIT bounds. It usually happens when
38456 some built-in function is expanded. */
38457 if (!target)
38458 {
38459 rtx t1 = gen_reg_rtx (Pmode);
38460 rtx t2 = gen_reg_rtx (Pmode);
38461 target = gen_reg_rtx (BNDmode);
38462 emit_move_insn (t1, const0_rtx);
38463 emit_move_insn (t2, constm1_rtx);
38464 emit_insn (BNDmode == BND64mode
38465 ? gen_bnd64_mk (target, t1, t2)
38466 : gen_bnd32_mk (target, t1, t2));
38467 }
38468
38469 gcc_assert (target && REG_P (target));
38470 return target;
38471
38472 case IX86_BUILTIN_BNDNARROW:
38473 {
38474 rtx m1, m1h1, m1h2, lb, ub, t1;
38475
38476 /* Return value and lb. */
38477 arg0 = CALL_EXPR_ARG (exp, 0);
38478 /* Bounds. */
38479 arg1 = CALL_EXPR_ARG (exp, 1);
38480 /* Size. */
38481 arg2 = CALL_EXPR_ARG (exp, 2);
38482
38483 lb = expand_normal (arg0);
38484 op1 = expand_normal (arg1);
38485 op2 = expand_normal (arg2);
38486
38487 /* Size was passed but we need to use (size - 1) as for bndmk. */
38488 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38489 NULL_RTX, 1, OPTAB_DIRECT);
38490
38491 /* Add LB to size and inverse to get UB. */
38492 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38493 op2, 1, OPTAB_DIRECT);
38494 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38495
38496 if (!register_operand (lb, Pmode))
38497 lb = ix86_zero_extend_to_Pmode (lb);
38498 if (!register_operand (ub, Pmode))
38499 ub = ix86_zero_extend_to_Pmode (ub);
38500
38501 /* We need to move bounds to memory before any computations. */
38502 if (MEM_P (op1))
38503 m1 = op1;
38504 else
38505 {
38506 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38507 emit_move_insn (m1, op1);
38508 }
38509
38510 /* Generate mem expression to be used for access to LB and UB. */
38511 m1h1 = adjust_address (m1, Pmode, 0);
38512 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38513
38514 t1 = gen_reg_rtx (Pmode);
38515
38516 /* Compute LB. */
38517 emit_move_insn (t1, m1h1);
38518 ix86_emit_move_max (t1, lb);
38519 emit_move_insn (m1h1, t1);
38520
38521 /* Compute UB. UB is stored in 1's complement form. Therefore
38522 we also use max here. */
38523 emit_move_insn (t1, m1h2);
38524 ix86_emit_move_max (t1, ub);
38525 emit_move_insn (m1h2, t1);
38526
38527 op2 = gen_reg_rtx (BNDmode);
38528 emit_move_insn (op2, m1);
38529
38530 return chkp_join_splitted_slot (lb, op2);
38531 }
38532
38533 case IX86_BUILTIN_BNDINT:
38534 {
38535 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38536
38537 if (!target
38538 || GET_MODE (target) != BNDmode
38539 || !register_operand (target, BNDmode))
38540 target = gen_reg_rtx (BNDmode);
38541
38542 arg0 = CALL_EXPR_ARG (exp, 0);
38543 arg1 = CALL_EXPR_ARG (exp, 1);
38544
38545 op0 = expand_normal (arg0);
38546 op1 = expand_normal (arg1);
38547
38548 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38549 rh1 = adjust_address (res, Pmode, 0);
38550 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38551
38552 /* Put first bounds to temporaries. */
38553 lb1 = gen_reg_rtx (Pmode);
38554 ub1 = gen_reg_rtx (Pmode);
38555 if (MEM_P (op0))
38556 {
38557 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38558 emit_move_insn (ub1, adjust_address (op0, Pmode,
38559 GET_MODE_SIZE (Pmode)));
38560 }
38561 else
38562 {
38563 emit_move_insn (res, op0);
38564 emit_move_insn (lb1, rh1);
38565 emit_move_insn (ub1, rh2);
38566 }
38567
38568 /* Put second bounds to temporaries. */
38569 lb2 = gen_reg_rtx (Pmode);
38570 ub2 = gen_reg_rtx (Pmode);
38571 if (MEM_P (op1))
38572 {
38573 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38574 emit_move_insn (ub2, adjust_address (op1, Pmode,
38575 GET_MODE_SIZE (Pmode)));
38576 }
38577 else
38578 {
38579 emit_move_insn (res, op1);
38580 emit_move_insn (lb2, rh1);
38581 emit_move_insn (ub2, rh2);
38582 }
38583
38584 /* Compute LB. */
38585 ix86_emit_move_max (lb1, lb2);
38586 emit_move_insn (rh1, lb1);
38587
38588 /* Compute UB. UB is stored in 1's complement form. Therefore
38589 we also use max here. */
38590 ix86_emit_move_max (ub1, ub2);
38591 emit_move_insn (rh2, ub1);
38592
38593 emit_move_insn (target, res);
38594
38595 return target;
38596 }
38597
38598 case IX86_BUILTIN_SIZEOF:
38599 {
38600 tree name;
38601 rtx symbol;
38602
38603 if (!target
38604 || GET_MODE (target) != Pmode
38605 || !register_operand (target, Pmode))
38606 target = gen_reg_rtx (Pmode);
38607
38608 arg0 = CALL_EXPR_ARG (exp, 0);
38609 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38610
38611 name = DECL_ASSEMBLER_NAME (arg0);
38612 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38613
38614 emit_insn (Pmode == SImode
38615 ? gen_move_size_reloc_si (target, symbol)
38616 : gen_move_size_reloc_di (target, symbol));
38617
38618 return target;
38619 }
38620
38621 case IX86_BUILTIN_BNDLOWER:
38622 {
38623 rtx mem, hmem;
38624
38625 if (!target
38626 || GET_MODE (target) != Pmode
38627 || !register_operand (target, Pmode))
38628 target = gen_reg_rtx (Pmode);
38629
38630 arg0 = CALL_EXPR_ARG (exp, 0);
38631 op0 = expand_normal (arg0);
38632
38633 /* We need to move bounds to memory first. */
38634 if (MEM_P (op0))
38635 mem = op0;
38636 else
38637 {
38638 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38639 emit_move_insn (mem, op0);
38640 }
38641
38642 /* Generate mem expression to access LB and load it. */
38643 hmem = adjust_address (mem, Pmode, 0);
38644 emit_move_insn (target, hmem);
38645
38646 return target;
38647 }
38648
38649 case IX86_BUILTIN_BNDUPPER:
38650 {
38651 rtx mem, hmem, res;
38652
38653 if (!target
38654 || GET_MODE (target) != Pmode
38655 || !register_operand (target, Pmode))
38656 target = gen_reg_rtx (Pmode);
38657
38658 arg0 = CALL_EXPR_ARG (exp, 0);
38659 op0 = expand_normal (arg0);
38660
38661 /* We need to move bounds to memory first. */
38662 if (MEM_P (op0))
38663 mem = op0;
38664 else
38665 {
38666 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38667 emit_move_insn (mem, op0);
38668 }
38669
38670 /* Generate mem expression to access UB. */
38671 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38672
38673 /* We need to inverse all bits of UB. */
38674 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38675
38676 if (res != target)
38677 emit_move_insn (target, res);
38678
38679 return target;
38680 }
38681
38682 case IX86_BUILTIN_MASKMOVQ:
38683 case IX86_BUILTIN_MASKMOVDQU:
38684 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38685 ? CODE_FOR_mmx_maskmovq
38686 : CODE_FOR_sse2_maskmovdqu);
38687 /* Note the arg order is different from the operand order. */
38688 arg1 = CALL_EXPR_ARG (exp, 0);
38689 arg2 = CALL_EXPR_ARG (exp, 1);
38690 arg0 = CALL_EXPR_ARG (exp, 2);
38691 op0 = expand_normal (arg0);
38692 op1 = expand_normal (arg1);
38693 op2 = expand_normal (arg2);
38694 mode0 = insn_data[icode].operand[0].mode;
38695 mode1 = insn_data[icode].operand[1].mode;
38696 mode2 = insn_data[icode].operand[2].mode;
38697
38698 op0 = ix86_zero_extend_to_Pmode (op0);
38699 op0 = gen_rtx_MEM (mode1, op0);
38700
38701 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38702 op0 = copy_to_mode_reg (mode0, op0);
38703 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38704 op1 = copy_to_mode_reg (mode1, op1);
38705 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38706 op2 = copy_to_mode_reg (mode2, op2);
38707 pat = GEN_FCN (icode) (op0, op1, op2);
38708 if (! pat)
38709 return 0;
38710 emit_insn (pat);
38711 return 0;
38712
38713 case IX86_BUILTIN_LDMXCSR:
38714 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38715 target = assign_386_stack_local (SImode, SLOT_TEMP);
38716 emit_move_insn (target, op0);
38717 emit_insn (gen_sse_ldmxcsr (target));
38718 return 0;
38719
38720 case IX86_BUILTIN_STMXCSR:
38721 target = assign_386_stack_local (SImode, SLOT_TEMP);
38722 emit_insn (gen_sse_stmxcsr (target));
38723 return copy_to_mode_reg (SImode, target);
38724
38725 case IX86_BUILTIN_CLFLUSH:
38726 arg0 = CALL_EXPR_ARG (exp, 0);
38727 op0 = expand_normal (arg0);
38728 icode = CODE_FOR_sse2_clflush;
38729 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38730 op0 = ix86_zero_extend_to_Pmode (op0);
38731
38732 emit_insn (gen_sse2_clflush (op0));
38733 return 0;
38734
38735 case IX86_BUILTIN_CLWB:
38736 arg0 = CALL_EXPR_ARG (exp, 0);
38737 op0 = expand_normal (arg0);
38738 icode = CODE_FOR_clwb;
38739 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38740 op0 = ix86_zero_extend_to_Pmode (op0);
38741
38742 emit_insn (gen_clwb (op0));
38743 return 0;
38744
38745 case IX86_BUILTIN_CLFLUSHOPT:
38746 arg0 = CALL_EXPR_ARG (exp, 0);
38747 op0 = expand_normal (arg0);
38748 icode = CODE_FOR_clflushopt;
38749 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38750 op0 = ix86_zero_extend_to_Pmode (op0);
38751
38752 emit_insn (gen_clflushopt (op0));
38753 return 0;
38754
38755 case IX86_BUILTIN_MONITOR:
38756 arg0 = CALL_EXPR_ARG (exp, 0);
38757 arg1 = CALL_EXPR_ARG (exp, 1);
38758 arg2 = CALL_EXPR_ARG (exp, 2);
38759 op0 = expand_normal (arg0);
38760 op1 = expand_normal (arg1);
38761 op2 = expand_normal (arg2);
38762 if (!REG_P (op0))
38763 op0 = ix86_zero_extend_to_Pmode (op0);
38764 if (!REG_P (op1))
38765 op1 = copy_to_mode_reg (SImode, op1);
38766 if (!REG_P (op2))
38767 op2 = copy_to_mode_reg (SImode, op2);
38768 emit_insn (ix86_gen_monitor (op0, op1, op2));
38769 return 0;
38770
38771 case IX86_BUILTIN_MWAIT:
38772 arg0 = CALL_EXPR_ARG (exp, 0);
38773 arg1 = CALL_EXPR_ARG (exp, 1);
38774 op0 = expand_normal (arg0);
38775 op1 = expand_normal (arg1);
38776 if (!REG_P (op0))
38777 op0 = copy_to_mode_reg (SImode, op0);
38778 if (!REG_P (op1))
38779 op1 = copy_to_mode_reg (SImode, op1);
38780 emit_insn (gen_sse3_mwait (op0, op1));
38781 return 0;
38782
38783 case IX86_BUILTIN_VEC_INIT_V2SI:
38784 case IX86_BUILTIN_VEC_INIT_V4HI:
38785 case IX86_BUILTIN_VEC_INIT_V8QI:
38786 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38787
38788 case IX86_BUILTIN_VEC_EXT_V2DF:
38789 case IX86_BUILTIN_VEC_EXT_V2DI:
38790 case IX86_BUILTIN_VEC_EXT_V4SF:
38791 case IX86_BUILTIN_VEC_EXT_V4SI:
38792 case IX86_BUILTIN_VEC_EXT_V8HI:
38793 case IX86_BUILTIN_VEC_EXT_V2SI:
38794 case IX86_BUILTIN_VEC_EXT_V4HI:
38795 case IX86_BUILTIN_VEC_EXT_V16QI:
38796 return ix86_expand_vec_ext_builtin (exp, target);
38797
38798 case IX86_BUILTIN_VEC_SET_V2DI:
38799 case IX86_BUILTIN_VEC_SET_V4SF:
38800 case IX86_BUILTIN_VEC_SET_V4SI:
38801 case IX86_BUILTIN_VEC_SET_V8HI:
38802 case IX86_BUILTIN_VEC_SET_V4HI:
38803 case IX86_BUILTIN_VEC_SET_V16QI:
38804 return ix86_expand_vec_set_builtin (exp);
38805
38806 case IX86_BUILTIN_INFQ:
38807 case IX86_BUILTIN_HUGE_VALQ:
38808 {
38809 REAL_VALUE_TYPE inf;
38810 rtx tmp;
38811
38812 real_inf (&inf);
38813 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38814
38815 tmp = validize_mem (force_const_mem (mode, tmp));
38816
38817 if (target == 0)
38818 target = gen_reg_rtx (mode);
38819
38820 emit_move_insn (target, tmp);
38821 return target;
38822 }
38823
38824 case IX86_BUILTIN_RDPMC:
38825 case IX86_BUILTIN_RDTSC:
38826 case IX86_BUILTIN_RDTSCP:
38827
38828 op0 = gen_reg_rtx (DImode);
38829 op1 = gen_reg_rtx (DImode);
38830
38831 if (fcode == IX86_BUILTIN_RDPMC)
38832 {
38833 arg0 = CALL_EXPR_ARG (exp, 0);
38834 op2 = expand_normal (arg0);
38835 if (!register_operand (op2, SImode))
38836 op2 = copy_to_mode_reg (SImode, op2);
38837
38838 insn = (TARGET_64BIT
38839 ? gen_rdpmc_rex64 (op0, op1, op2)
38840 : gen_rdpmc (op0, op2));
38841 emit_insn (insn);
38842 }
38843 else if (fcode == IX86_BUILTIN_RDTSC)
38844 {
38845 insn = (TARGET_64BIT
38846 ? gen_rdtsc_rex64 (op0, op1)
38847 : gen_rdtsc (op0));
38848 emit_insn (insn);
38849 }
38850 else
38851 {
38852 op2 = gen_reg_rtx (SImode);
38853
38854 insn = (TARGET_64BIT
38855 ? gen_rdtscp_rex64 (op0, op1, op2)
38856 : gen_rdtscp (op0, op2));
38857 emit_insn (insn);
38858
38859 arg0 = CALL_EXPR_ARG (exp, 0);
38860 op4 = expand_normal (arg0);
38861 if (!address_operand (op4, VOIDmode))
38862 {
38863 op4 = convert_memory_address (Pmode, op4);
38864 op4 = copy_addr_to_reg (op4);
38865 }
38866 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38867 }
38868
38869 if (target == 0)
38870 {
38871 /* mode is VOIDmode if __builtin_rd* has been called
38872 without lhs. */
38873 if (mode == VOIDmode)
38874 return target;
38875 target = gen_reg_rtx (mode);
38876 }
38877
38878 if (TARGET_64BIT)
38879 {
38880 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38881 op1, 1, OPTAB_DIRECT);
38882 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38883 op0, 1, OPTAB_DIRECT);
38884 }
38885
38886 emit_move_insn (target, op0);
38887 return target;
38888
38889 case IX86_BUILTIN_FXSAVE:
38890 case IX86_BUILTIN_FXRSTOR:
38891 case IX86_BUILTIN_FXSAVE64:
38892 case IX86_BUILTIN_FXRSTOR64:
38893 case IX86_BUILTIN_FNSTENV:
38894 case IX86_BUILTIN_FLDENV:
38895 mode0 = BLKmode;
38896 switch (fcode)
38897 {
38898 case IX86_BUILTIN_FXSAVE:
38899 icode = CODE_FOR_fxsave;
38900 break;
38901 case IX86_BUILTIN_FXRSTOR:
38902 icode = CODE_FOR_fxrstor;
38903 break;
38904 case IX86_BUILTIN_FXSAVE64:
38905 icode = CODE_FOR_fxsave64;
38906 break;
38907 case IX86_BUILTIN_FXRSTOR64:
38908 icode = CODE_FOR_fxrstor64;
38909 break;
38910 case IX86_BUILTIN_FNSTENV:
38911 icode = CODE_FOR_fnstenv;
38912 break;
38913 case IX86_BUILTIN_FLDENV:
38914 icode = CODE_FOR_fldenv;
38915 break;
38916 default:
38917 gcc_unreachable ();
38918 }
38919
38920 arg0 = CALL_EXPR_ARG (exp, 0);
38921 op0 = expand_normal (arg0);
38922
38923 if (!address_operand (op0, VOIDmode))
38924 {
38925 op0 = convert_memory_address (Pmode, op0);
38926 op0 = copy_addr_to_reg (op0);
38927 }
38928 op0 = gen_rtx_MEM (mode0, op0);
38929
38930 pat = GEN_FCN (icode) (op0);
38931 if (pat)
38932 emit_insn (pat);
38933 return 0;
38934
38935 case IX86_BUILTIN_XSAVE:
38936 case IX86_BUILTIN_XRSTOR:
38937 case IX86_BUILTIN_XSAVE64:
38938 case IX86_BUILTIN_XRSTOR64:
38939 case IX86_BUILTIN_XSAVEOPT:
38940 case IX86_BUILTIN_XSAVEOPT64:
38941 case IX86_BUILTIN_XSAVES:
38942 case IX86_BUILTIN_XRSTORS:
38943 case IX86_BUILTIN_XSAVES64:
38944 case IX86_BUILTIN_XRSTORS64:
38945 case IX86_BUILTIN_XSAVEC:
38946 case IX86_BUILTIN_XSAVEC64:
38947 arg0 = CALL_EXPR_ARG (exp, 0);
38948 arg1 = CALL_EXPR_ARG (exp, 1);
38949 op0 = expand_normal (arg0);
38950 op1 = expand_normal (arg1);
38951
38952 if (!address_operand (op0, VOIDmode))
38953 {
38954 op0 = convert_memory_address (Pmode, op0);
38955 op0 = copy_addr_to_reg (op0);
38956 }
38957 op0 = gen_rtx_MEM (BLKmode, op0);
38958
38959 op1 = force_reg (DImode, op1);
38960
38961 if (TARGET_64BIT)
38962 {
38963 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38964 NULL, 1, OPTAB_DIRECT);
38965 switch (fcode)
38966 {
38967 case IX86_BUILTIN_XSAVE:
38968 icode = CODE_FOR_xsave_rex64;
38969 break;
38970 case IX86_BUILTIN_XRSTOR:
38971 icode = CODE_FOR_xrstor_rex64;
38972 break;
38973 case IX86_BUILTIN_XSAVE64:
38974 icode = CODE_FOR_xsave64;
38975 break;
38976 case IX86_BUILTIN_XRSTOR64:
38977 icode = CODE_FOR_xrstor64;
38978 break;
38979 case IX86_BUILTIN_XSAVEOPT:
38980 icode = CODE_FOR_xsaveopt_rex64;
38981 break;
38982 case IX86_BUILTIN_XSAVEOPT64:
38983 icode = CODE_FOR_xsaveopt64;
38984 break;
38985 case IX86_BUILTIN_XSAVES:
38986 icode = CODE_FOR_xsaves_rex64;
38987 break;
38988 case IX86_BUILTIN_XRSTORS:
38989 icode = CODE_FOR_xrstors_rex64;
38990 break;
38991 case IX86_BUILTIN_XSAVES64:
38992 icode = CODE_FOR_xsaves64;
38993 break;
38994 case IX86_BUILTIN_XRSTORS64:
38995 icode = CODE_FOR_xrstors64;
38996 break;
38997 case IX86_BUILTIN_XSAVEC:
38998 icode = CODE_FOR_xsavec_rex64;
38999 break;
39000 case IX86_BUILTIN_XSAVEC64:
39001 icode = CODE_FOR_xsavec64;
39002 break;
39003 default:
39004 gcc_unreachable ();
39005 }
39006
39007 op2 = gen_lowpart (SImode, op2);
39008 op1 = gen_lowpart (SImode, op1);
39009 pat = GEN_FCN (icode) (op0, op1, op2);
39010 }
39011 else
39012 {
39013 switch (fcode)
39014 {
39015 case IX86_BUILTIN_XSAVE:
39016 icode = CODE_FOR_xsave;
39017 break;
39018 case IX86_BUILTIN_XRSTOR:
39019 icode = CODE_FOR_xrstor;
39020 break;
39021 case IX86_BUILTIN_XSAVEOPT:
39022 icode = CODE_FOR_xsaveopt;
39023 break;
39024 case IX86_BUILTIN_XSAVES:
39025 icode = CODE_FOR_xsaves;
39026 break;
39027 case IX86_BUILTIN_XRSTORS:
39028 icode = CODE_FOR_xrstors;
39029 break;
39030 case IX86_BUILTIN_XSAVEC:
39031 icode = CODE_FOR_xsavec;
39032 break;
39033 default:
39034 gcc_unreachable ();
39035 }
39036 pat = GEN_FCN (icode) (op0, op1);
39037 }
39038
39039 if (pat)
39040 emit_insn (pat);
39041 return 0;
39042
39043 case IX86_BUILTIN_LLWPCB:
39044 arg0 = CALL_EXPR_ARG (exp, 0);
39045 op0 = expand_normal (arg0);
39046 icode = CODE_FOR_lwp_llwpcb;
39047 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39048 op0 = ix86_zero_extend_to_Pmode (op0);
39049 emit_insn (gen_lwp_llwpcb (op0));
39050 return 0;
39051
39052 case IX86_BUILTIN_SLWPCB:
39053 icode = CODE_FOR_lwp_slwpcb;
39054 if (!target
39055 || !insn_data[icode].operand[0].predicate (target, Pmode))
39056 target = gen_reg_rtx (Pmode);
39057 emit_insn (gen_lwp_slwpcb (target));
39058 return target;
39059
39060 case IX86_BUILTIN_BEXTRI32:
39061 case IX86_BUILTIN_BEXTRI64:
39062 arg0 = CALL_EXPR_ARG (exp, 0);
39063 arg1 = CALL_EXPR_ARG (exp, 1);
39064 op0 = expand_normal (arg0);
39065 op1 = expand_normal (arg1);
39066 icode = (fcode == IX86_BUILTIN_BEXTRI32
39067 ? CODE_FOR_tbm_bextri_si
39068 : CODE_FOR_tbm_bextri_di);
39069 if (!CONST_INT_P (op1))
39070 {
39071 error ("last argument must be an immediate");
39072 return const0_rtx;
39073 }
39074 else
39075 {
39076 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39077 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39078 op1 = GEN_INT (length);
39079 op2 = GEN_INT (lsb_index);
39080 pat = GEN_FCN (icode) (target, op0, op1, op2);
39081 if (pat)
39082 emit_insn (pat);
39083 return target;
39084 }
39085
39086 case IX86_BUILTIN_RDRAND16_STEP:
39087 icode = CODE_FOR_rdrandhi_1;
39088 mode0 = HImode;
39089 goto rdrand_step;
39090
39091 case IX86_BUILTIN_RDRAND32_STEP:
39092 icode = CODE_FOR_rdrandsi_1;
39093 mode0 = SImode;
39094 goto rdrand_step;
39095
39096 case IX86_BUILTIN_RDRAND64_STEP:
39097 icode = CODE_FOR_rdranddi_1;
39098 mode0 = DImode;
39099
39100 rdrand_step:
39101 op0 = gen_reg_rtx (mode0);
39102 emit_insn (GEN_FCN (icode) (op0));
39103
39104 arg0 = CALL_EXPR_ARG (exp, 0);
39105 op1 = expand_normal (arg0);
39106 if (!address_operand (op1, VOIDmode))
39107 {
39108 op1 = convert_memory_address (Pmode, op1);
39109 op1 = copy_addr_to_reg (op1);
39110 }
39111 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39112
39113 op1 = gen_reg_rtx (SImode);
39114 emit_move_insn (op1, CONST1_RTX (SImode));
39115
39116 /* Emit SImode conditional move. */
39117 if (mode0 == HImode)
39118 {
39119 op2 = gen_reg_rtx (SImode);
39120 emit_insn (gen_zero_extendhisi2 (op2, op0));
39121 }
39122 else if (mode0 == SImode)
39123 op2 = op0;
39124 else
39125 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39126
39127 if (target == 0
39128 || !register_operand (target, SImode))
39129 target = gen_reg_rtx (SImode);
39130
39131 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39132 const0_rtx);
39133 emit_insn (gen_rtx_SET (VOIDmode, target,
39134 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39135 return target;
39136
39137 case IX86_BUILTIN_RDSEED16_STEP:
39138 icode = CODE_FOR_rdseedhi_1;
39139 mode0 = HImode;
39140 goto rdseed_step;
39141
39142 case IX86_BUILTIN_RDSEED32_STEP:
39143 icode = CODE_FOR_rdseedsi_1;
39144 mode0 = SImode;
39145 goto rdseed_step;
39146
39147 case IX86_BUILTIN_RDSEED64_STEP:
39148 icode = CODE_FOR_rdseeddi_1;
39149 mode0 = DImode;
39150
39151 rdseed_step:
39152 op0 = gen_reg_rtx (mode0);
39153 emit_insn (GEN_FCN (icode) (op0));
39154
39155 arg0 = CALL_EXPR_ARG (exp, 0);
39156 op1 = expand_normal (arg0);
39157 if (!address_operand (op1, VOIDmode))
39158 {
39159 op1 = convert_memory_address (Pmode, op1);
39160 op1 = copy_addr_to_reg (op1);
39161 }
39162 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39163
39164 op2 = gen_reg_rtx (QImode);
39165
39166 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39167 const0_rtx);
39168 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39169
39170 if (target == 0
39171 || !register_operand (target, SImode))
39172 target = gen_reg_rtx (SImode);
39173
39174 emit_insn (gen_zero_extendqisi2 (target, op2));
39175 return target;
39176
39177 case IX86_BUILTIN_SBB32:
39178 icode = CODE_FOR_subsi3_carry;
39179 mode0 = SImode;
39180 goto addcarryx;
39181
39182 case IX86_BUILTIN_SBB64:
39183 icode = CODE_FOR_subdi3_carry;
39184 mode0 = DImode;
39185 goto addcarryx;
39186
39187 case IX86_BUILTIN_ADDCARRYX32:
39188 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39189 mode0 = SImode;
39190 goto addcarryx;
39191
39192 case IX86_BUILTIN_ADDCARRYX64:
39193 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39194 mode0 = DImode;
39195
39196 addcarryx:
39197 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39198 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39199 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39200 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39201
39202 op0 = gen_reg_rtx (QImode);
39203
39204 /* Generate CF from input operand. */
39205 op1 = expand_normal (arg0);
39206 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39207 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39208
39209 /* Gen ADCX instruction to compute X+Y+CF. */
39210 op2 = expand_normal (arg1);
39211 op3 = expand_normal (arg2);
39212
39213 if (!REG_P (op2))
39214 op2 = copy_to_mode_reg (mode0, op2);
39215 if (!REG_P (op3))
39216 op3 = copy_to_mode_reg (mode0, op3);
39217
39218 op0 = gen_reg_rtx (mode0);
39219
39220 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39221 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39222 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39223
39224 /* Store the result. */
39225 op4 = expand_normal (arg3);
39226 if (!address_operand (op4, VOIDmode))
39227 {
39228 op4 = convert_memory_address (Pmode, op4);
39229 op4 = copy_addr_to_reg (op4);
39230 }
39231 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39232
39233 /* Return current CF value. */
39234 if (target == 0)
39235 target = gen_reg_rtx (QImode);
39236
39237 PUT_MODE (pat, QImode);
39238 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39239 return target;
39240
39241 case IX86_BUILTIN_READ_FLAGS:
39242 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39243
39244 if (optimize
39245 || target == NULL_RTX
39246 || !nonimmediate_operand (target, word_mode)
39247 || GET_MODE (target) != word_mode)
39248 target = gen_reg_rtx (word_mode);
39249
39250 emit_insn (gen_pop (target));
39251 return target;
39252
39253 case IX86_BUILTIN_WRITE_FLAGS:
39254
39255 arg0 = CALL_EXPR_ARG (exp, 0);
39256 op0 = expand_normal (arg0);
39257 if (!general_no_elim_operand (op0, word_mode))
39258 op0 = copy_to_mode_reg (word_mode, op0);
39259
39260 emit_insn (gen_push (op0));
39261 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39262 return 0;
39263
39264 case IX86_BUILTIN_KORTESTC16:
39265 icode = CODE_FOR_kortestchi;
39266 mode0 = HImode;
39267 mode1 = CCCmode;
39268 goto kortest;
39269
39270 case IX86_BUILTIN_KORTESTZ16:
39271 icode = CODE_FOR_kortestzhi;
39272 mode0 = HImode;
39273 mode1 = CCZmode;
39274
39275 kortest:
39276 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39277 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39278 op0 = expand_normal (arg0);
39279 op1 = expand_normal (arg1);
39280
39281 op0 = copy_to_reg (op0);
39282 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39283 op1 = copy_to_reg (op1);
39284 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39285
39286 target = gen_reg_rtx (QImode);
39287 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39288
39289 /* Emit kortest. */
39290 emit_insn (GEN_FCN (icode) (op0, op1));
39291 /* And use setcc to return result from flags. */
39292 ix86_expand_setcc (target, EQ,
39293 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39294 return target;
39295
39296 case IX86_BUILTIN_GATHERSIV2DF:
39297 icode = CODE_FOR_avx2_gathersiv2df;
39298 goto gather_gen;
39299 case IX86_BUILTIN_GATHERSIV4DF:
39300 icode = CODE_FOR_avx2_gathersiv4df;
39301 goto gather_gen;
39302 case IX86_BUILTIN_GATHERDIV2DF:
39303 icode = CODE_FOR_avx2_gatherdiv2df;
39304 goto gather_gen;
39305 case IX86_BUILTIN_GATHERDIV4DF:
39306 icode = CODE_FOR_avx2_gatherdiv4df;
39307 goto gather_gen;
39308 case IX86_BUILTIN_GATHERSIV4SF:
39309 icode = CODE_FOR_avx2_gathersiv4sf;
39310 goto gather_gen;
39311 case IX86_BUILTIN_GATHERSIV8SF:
39312 icode = CODE_FOR_avx2_gathersiv8sf;
39313 goto gather_gen;
39314 case IX86_BUILTIN_GATHERDIV4SF:
39315 icode = CODE_FOR_avx2_gatherdiv4sf;
39316 goto gather_gen;
39317 case IX86_BUILTIN_GATHERDIV8SF:
39318 icode = CODE_FOR_avx2_gatherdiv8sf;
39319 goto gather_gen;
39320 case IX86_BUILTIN_GATHERSIV2DI:
39321 icode = CODE_FOR_avx2_gathersiv2di;
39322 goto gather_gen;
39323 case IX86_BUILTIN_GATHERSIV4DI:
39324 icode = CODE_FOR_avx2_gathersiv4di;
39325 goto gather_gen;
39326 case IX86_BUILTIN_GATHERDIV2DI:
39327 icode = CODE_FOR_avx2_gatherdiv2di;
39328 goto gather_gen;
39329 case IX86_BUILTIN_GATHERDIV4DI:
39330 icode = CODE_FOR_avx2_gatherdiv4di;
39331 goto gather_gen;
39332 case IX86_BUILTIN_GATHERSIV4SI:
39333 icode = CODE_FOR_avx2_gathersiv4si;
39334 goto gather_gen;
39335 case IX86_BUILTIN_GATHERSIV8SI:
39336 icode = CODE_FOR_avx2_gathersiv8si;
39337 goto gather_gen;
39338 case IX86_BUILTIN_GATHERDIV4SI:
39339 icode = CODE_FOR_avx2_gatherdiv4si;
39340 goto gather_gen;
39341 case IX86_BUILTIN_GATHERDIV8SI:
39342 icode = CODE_FOR_avx2_gatherdiv8si;
39343 goto gather_gen;
39344 case IX86_BUILTIN_GATHERALTSIV4DF:
39345 icode = CODE_FOR_avx2_gathersiv4df;
39346 goto gather_gen;
39347 case IX86_BUILTIN_GATHERALTDIV8SF:
39348 icode = CODE_FOR_avx2_gatherdiv8sf;
39349 goto gather_gen;
39350 case IX86_BUILTIN_GATHERALTSIV4DI:
39351 icode = CODE_FOR_avx2_gathersiv4di;
39352 goto gather_gen;
39353 case IX86_BUILTIN_GATHERALTDIV8SI:
39354 icode = CODE_FOR_avx2_gatherdiv8si;
39355 goto gather_gen;
39356 case IX86_BUILTIN_GATHER3SIV16SF:
39357 icode = CODE_FOR_avx512f_gathersiv16sf;
39358 goto gather_gen;
39359 case IX86_BUILTIN_GATHER3SIV8DF:
39360 icode = CODE_FOR_avx512f_gathersiv8df;
39361 goto gather_gen;
39362 case IX86_BUILTIN_GATHER3DIV16SF:
39363 icode = CODE_FOR_avx512f_gatherdiv16sf;
39364 goto gather_gen;
39365 case IX86_BUILTIN_GATHER3DIV8DF:
39366 icode = CODE_FOR_avx512f_gatherdiv8df;
39367 goto gather_gen;
39368 case IX86_BUILTIN_GATHER3SIV16SI:
39369 icode = CODE_FOR_avx512f_gathersiv16si;
39370 goto gather_gen;
39371 case IX86_BUILTIN_GATHER3SIV8DI:
39372 icode = CODE_FOR_avx512f_gathersiv8di;
39373 goto gather_gen;
39374 case IX86_BUILTIN_GATHER3DIV16SI:
39375 icode = CODE_FOR_avx512f_gatherdiv16si;
39376 goto gather_gen;
39377 case IX86_BUILTIN_GATHER3DIV8DI:
39378 icode = CODE_FOR_avx512f_gatherdiv8di;
39379 goto gather_gen;
39380 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39381 icode = CODE_FOR_avx512f_gathersiv8df;
39382 goto gather_gen;
39383 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39384 icode = CODE_FOR_avx512f_gatherdiv16sf;
39385 goto gather_gen;
39386 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39387 icode = CODE_FOR_avx512f_gathersiv8di;
39388 goto gather_gen;
39389 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39390 icode = CODE_FOR_avx512f_gatherdiv16si;
39391 goto gather_gen;
39392 case IX86_BUILTIN_GATHER3SIV2DF:
39393 icode = CODE_FOR_avx512vl_gathersiv2df;
39394 goto gather_gen;
39395 case IX86_BUILTIN_GATHER3SIV4DF:
39396 icode = CODE_FOR_avx512vl_gathersiv4df;
39397 goto gather_gen;
39398 case IX86_BUILTIN_GATHER3DIV2DF:
39399 icode = CODE_FOR_avx512vl_gatherdiv2df;
39400 goto gather_gen;
39401 case IX86_BUILTIN_GATHER3DIV4DF:
39402 icode = CODE_FOR_avx512vl_gatherdiv4df;
39403 goto gather_gen;
39404 case IX86_BUILTIN_GATHER3SIV4SF:
39405 icode = CODE_FOR_avx512vl_gathersiv4sf;
39406 goto gather_gen;
39407 case IX86_BUILTIN_GATHER3SIV8SF:
39408 icode = CODE_FOR_avx512vl_gathersiv8sf;
39409 goto gather_gen;
39410 case IX86_BUILTIN_GATHER3DIV4SF:
39411 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39412 goto gather_gen;
39413 case IX86_BUILTIN_GATHER3DIV8SF:
39414 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39415 goto gather_gen;
39416 case IX86_BUILTIN_GATHER3SIV2DI:
39417 icode = CODE_FOR_avx512vl_gathersiv2di;
39418 goto gather_gen;
39419 case IX86_BUILTIN_GATHER3SIV4DI:
39420 icode = CODE_FOR_avx512vl_gathersiv4di;
39421 goto gather_gen;
39422 case IX86_BUILTIN_GATHER3DIV2DI:
39423 icode = CODE_FOR_avx512vl_gatherdiv2di;
39424 goto gather_gen;
39425 case IX86_BUILTIN_GATHER3DIV4DI:
39426 icode = CODE_FOR_avx512vl_gatherdiv4di;
39427 goto gather_gen;
39428 case IX86_BUILTIN_GATHER3SIV4SI:
39429 icode = CODE_FOR_avx512vl_gathersiv4si;
39430 goto gather_gen;
39431 case IX86_BUILTIN_GATHER3SIV8SI:
39432 icode = CODE_FOR_avx512vl_gathersiv8si;
39433 goto gather_gen;
39434 case IX86_BUILTIN_GATHER3DIV4SI:
39435 icode = CODE_FOR_avx512vl_gatherdiv4si;
39436 goto gather_gen;
39437 case IX86_BUILTIN_GATHER3DIV8SI:
39438 icode = CODE_FOR_avx512vl_gatherdiv8si;
39439 goto gather_gen;
39440 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39441 icode = CODE_FOR_avx512vl_gathersiv4df;
39442 goto gather_gen;
39443 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39444 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39445 goto gather_gen;
39446 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39447 icode = CODE_FOR_avx512vl_gathersiv4di;
39448 goto gather_gen;
39449 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39450 icode = CODE_FOR_avx512vl_gatherdiv8si;
39451 goto gather_gen;
39452 case IX86_BUILTIN_SCATTERSIV16SF:
39453 icode = CODE_FOR_avx512f_scattersiv16sf;
39454 goto scatter_gen;
39455 case IX86_BUILTIN_SCATTERSIV8DF:
39456 icode = CODE_FOR_avx512f_scattersiv8df;
39457 goto scatter_gen;
39458 case IX86_BUILTIN_SCATTERDIV16SF:
39459 icode = CODE_FOR_avx512f_scatterdiv16sf;
39460 goto scatter_gen;
39461 case IX86_BUILTIN_SCATTERDIV8DF:
39462 icode = CODE_FOR_avx512f_scatterdiv8df;
39463 goto scatter_gen;
39464 case IX86_BUILTIN_SCATTERSIV16SI:
39465 icode = CODE_FOR_avx512f_scattersiv16si;
39466 goto scatter_gen;
39467 case IX86_BUILTIN_SCATTERSIV8DI:
39468 icode = CODE_FOR_avx512f_scattersiv8di;
39469 goto scatter_gen;
39470 case IX86_BUILTIN_SCATTERDIV16SI:
39471 icode = CODE_FOR_avx512f_scatterdiv16si;
39472 goto scatter_gen;
39473 case IX86_BUILTIN_SCATTERDIV8DI:
39474 icode = CODE_FOR_avx512f_scatterdiv8di;
39475 goto scatter_gen;
39476 case IX86_BUILTIN_SCATTERSIV8SF:
39477 icode = CODE_FOR_avx512vl_scattersiv8sf;
39478 goto scatter_gen;
39479 case IX86_BUILTIN_SCATTERSIV4SF:
39480 icode = CODE_FOR_avx512vl_scattersiv4sf;
39481 goto scatter_gen;
39482 case IX86_BUILTIN_SCATTERSIV4DF:
39483 icode = CODE_FOR_avx512vl_scattersiv4df;
39484 goto scatter_gen;
39485 case IX86_BUILTIN_SCATTERSIV2DF:
39486 icode = CODE_FOR_avx512vl_scattersiv2df;
39487 goto scatter_gen;
39488 case IX86_BUILTIN_SCATTERDIV8SF:
39489 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39490 goto scatter_gen;
39491 case IX86_BUILTIN_SCATTERDIV4SF:
39492 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39493 goto scatter_gen;
39494 case IX86_BUILTIN_SCATTERDIV4DF:
39495 icode = CODE_FOR_avx512vl_scatterdiv4df;
39496 goto scatter_gen;
39497 case IX86_BUILTIN_SCATTERDIV2DF:
39498 icode = CODE_FOR_avx512vl_scatterdiv2df;
39499 goto scatter_gen;
39500 case IX86_BUILTIN_SCATTERSIV8SI:
39501 icode = CODE_FOR_avx512vl_scattersiv8si;
39502 goto scatter_gen;
39503 case IX86_BUILTIN_SCATTERSIV4SI:
39504 icode = CODE_FOR_avx512vl_scattersiv4si;
39505 goto scatter_gen;
39506 case IX86_BUILTIN_SCATTERSIV4DI:
39507 icode = CODE_FOR_avx512vl_scattersiv4di;
39508 goto scatter_gen;
39509 case IX86_BUILTIN_SCATTERSIV2DI:
39510 icode = CODE_FOR_avx512vl_scattersiv2di;
39511 goto scatter_gen;
39512 case IX86_BUILTIN_SCATTERDIV8SI:
39513 icode = CODE_FOR_avx512vl_scatterdiv8si;
39514 goto scatter_gen;
39515 case IX86_BUILTIN_SCATTERDIV4SI:
39516 icode = CODE_FOR_avx512vl_scatterdiv4si;
39517 goto scatter_gen;
39518 case IX86_BUILTIN_SCATTERDIV4DI:
39519 icode = CODE_FOR_avx512vl_scatterdiv4di;
39520 goto scatter_gen;
39521 case IX86_BUILTIN_SCATTERDIV2DI:
39522 icode = CODE_FOR_avx512vl_scatterdiv2di;
39523 goto scatter_gen;
39524 case IX86_BUILTIN_GATHERPFDPD:
39525 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39526 goto vec_prefetch_gen;
39527 case IX86_BUILTIN_GATHERPFDPS:
39528 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39529 goto vec_prefetch_gen;
39530 case IX86_BUILTIN_GATHERPFQPD:
39531 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39532 goto vec_prefetch_gen;
39533 case IX86_BUILTIN_GATHERPFQPS:
39534 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39535 goto vec_prefetch_gen;
39536 case IX86_BUILTIN_SCATTERPFDPD:
39537 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39538 goto vec_prefetch_gen;
39539 case IX86_BUILTIN_SCATTERPFDPS:
39540 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39541 goto vec_prefetch_gen;
39542 case IX86_BUILTIN_SCATTERPFQPD:
39543 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39544 goto vec_prefetch_gen;
39545 case IX86_BUILTIN_SCATTERPFQPS:
39546 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39547 goto vec_prefetch_gen;
39548
39549 gather_gen:
39550 rtx half;
39551 rtx (*gen) (rtx, rtx);
39552
39553 arg0 = CALL_EXPR_ARG (exp, 0);
39554 arg1 = CALL_EXPR_ARG (exp, 1);
39555 arg2 = CALL_EXPR_ARG (exp, 2);
39556 arg3 = CALL_EXPR_ARG (exp, 3);
39557 arg4 = CALL_EXPR_ARG (exp, 4);
39558 op0 = expand_normal (arg0);
39559 op1 = expand_normal (arg1);
39560 op2 = expand_normal (arg2);
39561 op3 = expand_normal (arg3);
39562 op4 = expand_normal (arg4);
39563 /* Note the arg order is different from the operand order. */
39564 mode0 = insn_data[icode].operand[1].mode;
39565 mode2 = insn_data[icode].operand[3].mode;
39566 mode3 = insn_data[icode].operand[4].mode;
39567 mode4 = insn_data[icode].operand[5].mode;
39568
39569 if (target == NULL_RTX
39570 || GET_MODE (target) != insn_data[icode].operand[0].mode
39571 || !insn_data[icode].operand[0].predicate (target,
39572 GET_MODE (target)))
39573 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39574 else
39575 subtarget = target;
39576
39577 switch (fcode)
39578 {
39579 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39580 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39581 half = gen_reg_rtx (V8SImode);
39582 if (!nonimmediate_operand (op2, V16SImode))
39583 op2 = copy_to_mode_reg (V16SImode, op2);
39584 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39585 op2 = half;
39586 break;
39587 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39588 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39589 case IX86_BUILTIN_GATHERALTSIV4DF:
39590 case IX86_BUILTIN_GATHERALTSIV4DI:
39591 half = gen_reg_rtx (V4SImode);
39592 if (!nonimmediate_operand (op2, V8SImode))
39593 op2 = copy_to_mode_reg (V8SImode, op2);
39594 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39595 op2 = half;
39596 break;
39597 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39598 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39599 half = gen_reg_rtx (mode0);
39600 if (mode0 == V8SFmode)
39601 gen = gen_vec_extract_lo_v16sf;
39602 else
39603 gen = gen_vec_extract_lo_v16si;
39604 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39605 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39606 emit_insn (gen (half, op0));
39607 op0 = half;
39608 if (GET_MODE (op3) != VOIDmode)
39609 {
39610 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39611 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39612 emit_insn (gen (half, op3));
39613 op3 = half;
39614 }
39615 break;
39616 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39617 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39618 case IX86_BUILTIN_GATHERALTDIV8SF:
39619 case IX86_BUILTIN_GATHERALTDIV8SI:
39620 half = gen_reg_rtx (mode0);
39621 if (mode0 == V4SFmode)
39622 gen = gen_vec_extract_lo_v8sf;
39623 else
39624 gen = gen_vec_extract_lo_v8si;
39625 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39626 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39627 emit_insn (gen (half, op0));
39628 op0 = half;
39629 if (GET_MODE (op3) != VOIDmode)
39630 {
39631 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39632 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39633 emit_insn (gen (half, op3));
39634 op3 = half;
39635 }
39636 break;
39637 default:
39638 break;
39639 }
39640
39641 /* Force memory operand only with base register here. But we
39642 don't want to do it on memory operand for other builtin
39643 functions. */
39644 op1 = ix86_zero_extend_to_Pmode (op1);
39645
39646 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39647 op0 = copy_to_mode_reg (mode0, op0);
39648 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39649 op1 = copy_to_mode_reg (Pmode, op1);
39650 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39651 op2 = copy_to_mode_reg (mode2, op2);
39652 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39653 {
39654 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39655 op3 = copy_to_mode_reg (mode3, op3);
39656 }
39657 else
39658 {
39659 op3 = copy_to_reg (op3);
39660 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39661 }
39662 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39663 {
39664 error ("the last argument must be scale 1, 2, 4, 8");
39665 return const0_rtx;
39666 }
39667
39668 /* Optimize. If mask is known to have all high bits set,
39669 replace op0 with pc_rtx to signal that the instruction
39670 overwrites the whole destination and doesn't use its
39671 previous contents. */
39672 if (optimize)
39673 {
39674 if (TREE_CODE (arg3) == INTEGER_CST)
39675 {
39676 if (integer_all_onesp (arg3))
39677 op0 = pc_rtx;
39678 }
39679 else if (TREE_CODE (arg3) == VECTOR_CST)
39680 {
39681 unsigned int negative = 0;
39682 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39683 {
39684 tree cst = VECTOR_CST_ELT (arg3, i);
39685 if (TREE_CODE (cst) == INTEGER_CST
39686 && tree_int_cst_sign_bit (cst))
39687 negative++;
39688 else if (TREE_CODE (cst) == REAL_CST
39689 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39690 negative++;
39691 }
39692 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39693 op0 = pc_rtx;
39694 }
39695 else if (TREE_CODE (arg3) == SSA_NAME
39696 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39697 {
39698 /* Recognize also when mask is like:
39699 __v2df src = _mm_setzero_pd ();
39700 __v2df mask = _mm_cmpeq_pd (src, src);
39701 or
39702 __v8sf src = _mm256_setzero_ps ();
39703 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39704 as that is a cheaper way to load all ones into
39705 a register than having to load a constant from
39706 memory. */
39707 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39708 if (is_gimple_call (def_stmt))
39709 {
39710 tree fndecl = gimple_call_fndecl (def_stmt);
39711 if (fndecl
39712 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39713 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39714 {
39715 case IX86_BUILTIN_CMPPD:
39716 case IX86_BUILTIN_CMPPS:
39717 case IX86_BUILTIN_CMPPD256:
39718 case IX86_BUILTIN_CMPPS256:
39719 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39720 break;
39721 /* FALLTHRU */
39722 case IX86_BUILTIN_CMPEQPD:
39723 case IX86_BUILTIN_CMPEQPS:
39724 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39725 && initializer_zerop (gimple_call_arg (def_stmt,
39726 1)))
39727 op0 = pc_rtx;
39728 break;
39729 default:
39730 break;
39731 }
39732 }
39733 }
39734 }
39735
39736 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39737 if (! pat)
39738 return const0_rtx;
39739 emit_insn (pat);
39740
39741 switch (fcode)
39742 {
39743 case IX86_BUILTIN_GATHER3DIV16SF:
39744 if (target == NULL_RTX)
39745 target = gen_reg_rtx (V8SFmode);
39746 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39747 break;
39748 case IX86_BUILTIN_GATHER3DIV16SI:
39749 if (target == NULL_RTX)
39750 target = gen_reg_rtx (V8SImode);
39751 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39752 break;
39753 case IX86_BUILTIN_GATHER3DIV8SF:
39754 case IX86_BUILTIN_GATHERDIV8SF:
39755 if (target == NULL_RTX)
39756 target = gen_reg_rtx (V4SFmode);
39757 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39758 break;
39759 case IX86_BUILTIN_GATHER3DIV8SI:
39760 case IX86_BUILTIN_GATHERDIV8SI:
39761 if (target == NULL_RTX)
39762 target = gen_reg_rtx (V4SImode);
39763 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39764 break;
39765 default:
39766 target = subtarget;
39767 break;
39768 }
39769 return target;
39770
39771 scatter_gen:
39772 arg0 = CALL_EXPR_ARG (exp, 0);
39773 arg1 = CALL_EXPR_ARG (exp, 1);
39774 arg2 = CALL_EXPR_ARG (exp, 2);
39775 arg3 = CALL_EXPR_ARG (exp, 3);
39776 arg4 = CALL_EXPR_ARG (exp, 4);
39777 op0 = expand_normal (arg0);
39778 op1 = expand_normal (arg1);
39779 op2 = expand_normal (arg2);
39780 op3 = expand_normal (arg3);
39781 op4 = expand_normal (arg4);
39782 mode1 = insn_data[icode].operand[1].mode;
39783 mode2 = insn_data[icode].operand[2].mode;
39784 mode3 = insn_data[icode].operand[3].mode;
39785 mode4 = insn_data[icode].operand[4].mode;
39786
39787 /* Force memory operand only with base register here. But we
39788 don't want to do it on memory operand for other builtin
39789 functions. */
39790 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39791
39792 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39793 op0 = copy_to_mode_reg (Pmode, op0);
39794
39795 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39796 {
39797 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39798 op1 = copy_to_mode_reg (mode1, op1);
39799 }
39800 else
39801 {
39802 op1 = copy_to_reg (op1);
39803 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39804 }
39805
39806 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39807 op2 = copy_to_mode_reg (mode2, op2);
39808
39809 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39810 op3 = copy_to_mode_reg (mode3, op3);
39811
39812 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39813 {
39814 error ("the last argument must be scale 1, 2, 4, 8");
39815 return const0_rtx;
39816 }
39817
39818 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39819 if (! pat)
39820 return const0_rtx;
39821
39822 emit_insn (pat);
39823 return 0;
39824
39825 vec_prefetch_gen:
39826 arg0 = CALL_EXPR_ARG (exp, 0);
39827 arg1 = CALL_EXPR_ARG (exp, 1);
39828 arg2 = CALL_EXPR_ARG (exp, 2);
39829 arg3 = CALL_EXPR_ARG (exp, 3);
39830 arg4 = CALL_EXPR_ARG (exp, 4);
39831 op0 = expand_normal (arg0);
39832 op1 = expand_normal (arg1);
39833 op2 = expand_normal (arg2);
39834 op3 = expand_normal (arg3);
39835 op4 = expand_normal (arg4);
39836 mode0 = insn_data[icode].operand[0].mode;
39837 mode1 = insn_data[icode].operand[1].mode;
39838 mode3 = insn_data[icode].operand[3].mode;
39839 mode4 = insn_data[icode].operand[4].mode;
39840
39841 if (GET_MODE (op0) == mode0
39842 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39843 {
39844 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39845 op0 = copy_to_mode_reg (mode0, op0);
39846 }
39847 else if (op0 != constm1_rtx)
39848 {
39849 op0 = copy_to_reg (op0);
39850 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39851 }
39852
39853 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39854 op1 = copy_to_mode_reg (mode1, op1);
39855
39856 /* Force memory operand only with base register here. But we
39857 don't want to do it on memory operand for other builtin
39858 functions. */
39859 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39860
39861 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39862 op2 = copy_to_mode_reg (Pmode, op2);
39863
39864 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39865 {
39866 error ("the forth argument must be scale 1, 2, 4, 8");
39867 return const0_rtx;
39868 }
39869
39870 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39871 {
39872 error ("incorrect hint operand");
39873 return const0_rtx;
39874 }
39875
39876 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39877 if (! pat)
39878 return const0_rtx;
39879
39880 emit_insn (pat);
39881
39882 return 0;
39883
39884 case IX86_BUILTIN_XABORT:
39885 icode = CODE_FOR_xabort;
39886 arg0 = CALL_EXPR_ARG (exp, 0);
39887 op0 = expand_normal (arg0);
39888 mode0 = insn_data[icode].operand[0].mode;
39889 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39890 {
39891 error ("the xabort's argument must be an 8-bit immediate");
39892 return const0_rtx;
39893 }
39894 emit_insn (gen_xabort (op0));
39895 return 0;
39896
39897 default:
39898 break;
39899 }
39900
39901 for (i = 0, d = bdesc_special_args;
39902 i < ARRAY_SIZE (bdesc_special_args);
39903 i++, d++)
39904 if (d->code == fcode)
39905 return ix86_expand_special_args_builtin (d, exp, target);
39906
39907 for (i = 0, d = bdesc_args;
39908 i < ARRAY_SIZE (bdesc_args);
39909 i++, d++)
39910 if (d->code == fcode)
39911 switch (fcode)
39912 {
39913 case IX86_BUILTIN_FABSQ:
39914 case IX86_BUILTIN_COPYSIGNQ:
39915 if (!TARGET_SSE)
39916 /* Emit a normal call if SSE isn't available. */
39917 return expand_call (exp, target, ignore);
39918 default:
39919 return ix86_expand_args_builtin (d, exp, target);
39920 }
39921
39922 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39923 if (d->code == fcode)
39924 return ix86_expand_sse_comi (d, exp, target);
39925
39926 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39927 if (d->code == fcode)
39928 return ix86_expand_round_builtin (d, exp, target);
39929
39930 for (i = 0, d = bdesc_pcmpestr;
39931 i < ARRAY_SIZE (bdesc_pcmpestr);
39932 i++, d++)
39933 if (d->code == fcode)
39934 return ix86_expand_sse_pcmpestr (d, exp, target);
39935
39936 for (i = 0, d = bdesc_pcmpistr;
39937 i < ARRAY_SIZE (bdesc_pcmpistr);
39938 i++, d++)
39939 if (d->code == fcode)
39940 return ix86_expand_sse_pcmpistr (d, exp, target);
39941
39942 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39943 if (d->code == fcode)
39944 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39945 (enum ix86_builtin_func_type)
39946 d->flag, d->comparison);
39947
39948 gcc_unreachable ();
39949 }
39950
39951 /* This returns the target-specific builtin with code CODE if
39952 current_function_decl has visibility on this builtin, which is checked
39953 using isa flags. Returns NULL_TREE otherwise. */
39954
39955 static tree ix86_get_builtin (enum ix86_builtins code)
39956 {
39957 struct cl_target_option *opts;
39958 tree target_tree = NULL_TREE;
39959
39960 /* Determine the isa flags of current_function_decl. */
39961
39962 if (current_function_decl)
39963 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39964
39965 if (target_tree == NULL)
39966 target_tree = target_option_default_node;
39967
39968 opts = TREE_TARGET_OPTION (target_tree);
39969
39970 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
39971 return ix86_builtin_decl (code, true);
39972 else
39973 return NULL_TREE;
39974 }
39975
39976 /* Return function decl for target specific builtin
39977 for given MPX builtin passed i FCODE. */
39978 static tree
39979 ix86_builtin_mpx_function (unsigned fcode)
39980 {
39981 switch (fcode)
39982 {
39983 case BUILT_IN_CHKP_BNDMK:
39984 return ix86_builtins[IX86_BUILTIN_BNDMK];
39985
39986 case BUILT_IN_CHKP_BNDSTX:
39987 return ix86_builtins[IX86_BUILTIN_BNDSTX];
39988
39989 case BUILT_IN_CHKP_BNDLDX:
39990 return ix86_builtins[IX86_BUILTIN_BNDLDX];
39991
39992 case BUILT_IN_CHKP_BNDCL:
39993 return ix86_builtins[IX86_BUILTIN_BNDCL];
39994
39995 case BUILT_IN_CHKP_BNDCU:
39996 return ix86_builtins[IX86_BUILTIN_BNDCU];
39997
39998 case BUILT_IN_CHKP_BNDRET:
39999 return ix86_builtins[IX86_BUILTIN_BNDRET];
40000
40001 case BUILT_IN_CHKP_INTERSECT:
40002 return ix86_builtins[IX86_BUILTIN_BNDINT];
40003
40004 case BUILT_IN_CHKP_NARROW:
40005 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40006
40007 case BUILT_IN_CHKP_SIZEOF:
40008 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40009
40010 case BUILT_IN_CHKP_EXTRACT_LOWER:
40011 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40012
40013 case BUILT_IN_CHKP_EXTRACT_UPPER:
40014 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40015
40016 default:
40017 return NULL_TREE;
40018 }
40019
40020 gcc_unreachable ();
40021 }
40022
40023 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40024
40025 Return an address to be used to load/store bounds for pointer
40026 passed in SLOT.
40027
40028 SLOT_NO is an integer constant holding number of a target
40029 dependent special slot to be used in case SLOT is not a memory.
40030
40031 SPECIAL_BASE is a pointer to be used as a base of fake address
40032 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40033 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40034
40035 static rtx
40036 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40037 {
40038 rtx addr = NULL;
40039
40040 /* NULL slot means we pass bounds for pointer not passed to the
40041 function at all. Register slot means we pass pointer in a
40042 register. In both these cases bounds are passed via Bounds
40043 Table. Since we do not have actual pointer stored in memory,
40044 we have to use fake addresses to access Bounds Table. We
40045 start with (special_base - sizeof (void*)) and decrease this
40046 address by pointer size to get addresses for other slots. */
40047 if (!slot || REG_P (slot))
40048 {
40049 gcc_assert (CONST_INT_P (slot_no));
40050 addr = plus_constant (Pmode, special_base,
40051 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40052 }
40053 /* If pointer is passed in a memory then its address is used to
40054 access Bounds Table. */
40055 else if (MEM_P (slot))
40056 {
40057 addr = XEXP (slot, 0);
40058 if (!register_operand (addr, Pmode))
40059 addr = copy_addr_to_reg (addr);
40060 }
40061 else
40062 gcc_unreachable ();
40063
40064 return addr;
40065 }
40066
40067 /* Expand pass uses this hook to load bounds for function parameter
40068 PTR passed in SLOT in case its bounds are not passed in a register.
40069
40070 If SLOT is a memory, then bounds are loaded as for regular pointer
40071 loaded from memory. PTR may be NULL in case SLOT is a memory.
40072 In such case value of PTR (if required) may be loaded from SLOT.
40073
40074 If SLOT is NULL or a register then SLOT_NO is an integer constant
40075 holding number of the target dependent special slot which should be
40076 used to obtain bounds.
40077
40078 Return loaded bounds. */
40079
40080 static rtx
40081 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40082 {
40083 rtx reg = gen_reg_rtx (BNDmode);
40084 rtx addr;
40085
40086 /* Get address to be used to access Bounds Table. Special slots start
40087 at the location of return address of the current function. */
40088 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40089
40090 /* Load pointer value from a memory if we don't have it. */
40091 if (!ptr)
40092 {
40093 gcc_assert (MEM_P (slot));
40094 ptr = copy_addr_to_reg (slot);
40095 }
40096
40097 emit_insn (BNDmode == BND64mode
40098 ? gen_bnd64_ldx (reg, addr, ptr)
40099 : gen_bnd32_ldx (reg, addr, ptr));
40100
40101 return reg;
40102 }
40103
40104 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40105 passed in SLOT in case BOUNDS are not passed in a register.
40106
40107 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40108 stored in memory. PTR may be NULL in case SLOT is a memory.
40109 In such case value of PTR (if required) may be loaded from SLOT.
40110
40111 If SLOT is NULL or a register then SLOT_NO is an integer constant
40112 holding number of the target dependent special slot which should be
40113 used to store BOUNDS. */
40114
40115 static void
40116 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40117 {
40118 rtx addr;
40119
40120 /* Get address to be used to access Bounds Table. Special slots start
40121 at the location of return address of a called function. */
40122 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40123
40124 /* Load pointer value from a memory if we don't have it. */
40125 if (!ptr)
40126 {
40127 gcc_assert (MEM_P (slot));
40128 ptr = copy_addr_to_reg (slot);
40129 }
40130
40131 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40132 if (!register_operand (bounds, BNDmode))
40133 bounds = copy_to_mode_reg (BNDmode, bounds);
40134
40135 emit_insn (BNDmode == BND64mode
40136 ? gen_bnd64_stx (addr, ptr, bounds)
40137 : gen_bnd32_stx (addr, ptr, bounds));
40138 }
40139
40140 /* Load and return bounds returned by function in SLOT. */
40141
40142 static rtx
40143 ix86_load_returned_bounds (rtx slot)
40144 {
40145 rtx res;
40146
40147 gcc_assert (REG_P (slot));
40148 res = gen_reg_rtx (BNDmode);
40149 emit_move_insn (res, slot);
40150
40151 return res;
40152 }
40153
40154 /* Store BOUNDS returned by function into SLOT. */
40155
40156 static void
40157 ix86_store_returned_bounds (rtx slot, rtx bounds)
40158 {
40159 gcc_assert (REG_P (slot));
40160 emit_move_insn (slot, bounds);
40161 }
40162
40163 /* Returns a function decl for a vectorized version of the builtin function
40164 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40165 if it is not available. */
40166
40167 static tree
40168 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40169 tree type_in)
40170 {
40171 machine_mode in_mode, out_mode;
40172 int in_n, out_n;
40173 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40174
40175 if (TREE_CODE (type_out) != VECTOR_TYPE
40176 || TREE_CODE (type_in) != VECTOR_TYPE
40177 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40178 return NULL_TREE;
40179
40180 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40181 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40182 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40183 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40184
40185 switch (fn)
40186 {
40187 case BUILT_IN_SQRT:
40188 if (out_mode == DFmode && in_mode == DFmode)
40189 {
40190 if (out_n == 2 && in_n == 2)
40191 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40192 else if (out_n == 4 && in_n == 4)
40193 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40194 else if (out_n == 8 && in_n == 8)
40195 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40196 }
40197 break;
40198
40199 case BUILT_IN_EXP2F:
40200 if (out_mode == SFmode && in_mode == SFmode)
40201 {
40202 if (out_n == 16 && in_n == 16)
40203 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40204 }
40205 break;
40206
40207 case BUILT_IN_SQRTF:
40208 if (out_mode == SFmode && in_mode == SFmode)
40209 {
40210 if (out_n == 4 && in_n == 4)
40211 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40212 else if (out_n == 8 && in_n == 8)
40213 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40214 else if (out_n == 16 && in_n == 16)
40215 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40216 }
40217 break;
40218
40219 case BUILT_IN_IFLOOR:
40220 case BUILT_IN_LFLOOR:
40221 case BUILT_IN_LLFLOOR:
40222 /* The round insn does not trap on denormals. */
40223 if (flag_trapping_math || !TARGET_ROUND)
40224 break;
40225
40226 if (out_mode == SImode && in_mode == DFmode)
40227 {
40228 if (out_n == 4 && in_n == 2)
40229 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40230 else if (out_n == 8 && in_n == 4)
40231 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40232 else if (out_n == 16 && in_n == 8)
40233 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40234 }
40235 break;
40236
40237 case BUILT_IN_IFLOORF:
40238 case BUILT_IN_LFLOORF:
40239 case BUILT_IN_LLFLOORF:
40240 /* The round insn does not trap on denormals. */
40241 if (flag_trapping_math || !TARGET_ROUND)
40242 break;
40243
40244 if (out_mode == SImode && in_mode == SFmode)
40245 {
40246 if (out_n == 4 && in_n == 4)
40247 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40248 else if (out_n == 8 && in_n == 8)
40249 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40250 }
40251 break;
40252
40253 case BUILT_IN_ICEIL:
40254 case BUILT_IN_LCEIL:
40255 case BUILT_IN_LLCEIL:
40256 /* The round insn does not trap on denormals. */
40257 if (flag_trapping_math || !TARGET_ROUND)
40258 break;
40259
40260 if (out_mode == SImode && in_mode == DFmode)
40261 {
40262 if (out_n == 4 && in_n == 2)
40263 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40264 else if (out_n == 8 && in_n == 4)
40265 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40266 else if (out_n == 16 && in_n == 8)
40267 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40268 }
40269 break;
40270
40271 case BUILT_IN_ICEILF:
40272 case BUILT_IN_LCEILF:
40273 case BUILT_IN_LLCEILF:
40274 /* The round insn does not trap on denormals. */
40275 if (flag_trapping_math || !TARGET_ROUND)
40276 break;
40277
40278 if (out_mode == SImode && in_mode == SFmode)
40279 {
40280 if (out_n == 4 && in_n == 4)
40281 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40282 else if (out_n == 8 && in_n == 8)
40283 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40284 }
40285 break;
40286
40287 case BUILT_IN_IRINT:
40288 case BUILT_IN_LRINT:
40289 case BUILT_IN_LLRINT:
40290 if (out_mode == SImode && in_mode == DFmode)
40291 {
40292 if (out_n == 4 && in_n == 2)
40293 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40294 else if (out_n == 8 && in_n == 4)
40295 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40296 }
40297 break;
40298
40299 case BUILT_IN_IRINTF:
40300 case BUILT_IN_LRINTF:
40301 case BUILT_IN_LLRINTF:
40302 if (out_mode == SImode && in_mode == SFmode)
40303 {
40304 if (out_n == 4 && in_n == 4)
40305 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40306 else if (out_n == 8 && in_n == 8)
40307 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40308 }
40309 break;
40310
40311 case BUILT_IN_IROUND:
40312 case BUILT_IN_LROUND:
40313 case BUILT_IN_LLROUND:
40314 /* The round insn does not trap on denormals. */
40315 if (flag_trapping_math || !TARGET_ROUND)
40316 break;
40317
40318 if (out_mode == SImode && in_mode == DFmode)
40319 {
40320 if (out_n == 4 && in_n == 2)
40321 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40322 else if (out_n == 8 && in_n == 4)
40323 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40324 else if (out_n == 16 && in_n == 8)
40325 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40326 }
40327 break;
40328
40329 case BUILT_IN_IROUNDF:
40330 case BUILT_IN_LROUNDF:
40331 case BUILT_IN_LLROUNDF:
40332 /* The round insn does not trap on denormals. */
40333 if (flag_trapping_math || !TARGET_ROUND)
40334 break;
40335
40336 if (out_mode == SImode && in_mode == SFmode)
40337 {
40338 if (out_n == 4 && in_n == 4)
40339 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40340 else if (out_n == 8 && in_n == 8)
40341 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40342 }
40343 break;
40344
40345 case BUILT_IN_COPYSIGN:
40346 if (out_mode == DFmode && in_mode == DFmode)
40347 {
40348 if (out_n == 2 && in_n == 2)
40349 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40350 else if (out_n == 4 && in_n == 4)
40351 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40352 else if (out_n == 8 && in_n == 8)
40353 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40354 }
40355 break;
40356
40357 case BUILT_IN_COPYSIGNF:
40358 if (out_mode == SFmode && in_mode == SFmode)
40359 {
40360 if (out_n == 4 && in_n == 4)
40361 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40362 else if (out_n == 8 && in_n == 8)
40363 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40364 else if (out_n == 16 && in_n == 16)
40365 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40366 }
40367 break;
40368
40369 case BUILT_IN_FLOOR:
40370 /* The round insn does not trap on denormals. */
40371 if (flag_trapping_math || !TARGET_ROUND)
40372 break;
40373
40374 if (out_mode == DFmode && in_mode == DFmode)
40375 {
40376 if (out_n == 2 && in_n == 2)
40377 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40378 else if (out_n == 4 && in_n == 4)
40379 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40380 }
40381 break;
40382
40383 case BUILT_IN_FLOORF:
40384 /* The round insn does not trap on denormals. */
40385 if (flag_trapping_math || !TARGET_ROUND)
40386 break;
40387
40388 if (out_mode == SFmode && in_mode == SFmode)
40389 {
40390 if (out_n == 4 && in_n == 4)
40391 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40392 else if (out_n == 8 && in_n == 8)
40393 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40394 }
40395 break;
40396
40397 case BUILT_IN_CEIL:
40398 /* The round insn does not trap on denormals. */
40399 if (flag_trapping_math || !TARGET_ROUND)
40400 break;
40401
40402 if (out_mode == DFmode && in_mode == DFmode)
40403 {
40404 if (out_n == 2 && in_n == 2)
40405 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40406 else if (out_n == 4 && in_n == 4)
40407 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40408 }
40409 break;
40410
40411 case BUILT_IN_CEILF:
40412 /* The round insn does not trap on denormals. */
40413 if (flag_trapping_math || !TARGET_ROUND)
40414 break;
40415
40416 if (out_mode == SFmode && in_mode == SFmode)
40417 {
40418 if (out_n == 4 && in_n == 4)
40419 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40420 else if (out_n == 8 && in_n == 8)
40421 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40422 }
40423 break;
40424
40425 case BUILT_IN_TRUNC:
40426 /* The round insn does not trap on denormals. */
40427 if (flag_trapping_math || !TARGET_ROUND)
40428 break;
40429
40430 if (out_mode == DFmode && in_mode == DFmode)
40431 {
40432 if (out_n == 2 && in_n == 2)
40433 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40434 else if (out_n == 4 && in_n == 4)
40435 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40436 }
40437 break;
40438
40439 case BUILT_IN_TRUNCF:
40440 /* The round insn does not trap on denormals. */
40441 if (flag_trapping_math || !TARGET_ROUND)
40442 break;
40443
40444 if (out_mode == SFmode && in_mode == SFmode)
40445 {
40446 if (out_n == 4 && in_n == 4)
40447 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40448 else if (out_n == 8 && in_n == 8)
40449 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40450 }
40451 break;
40452
40453 case BUILT_IN_RINT:
40454 /* The round insn does not trap on denormals. */
40455 if (flag_trapping_math || !TARGET_ROUND)
40456 break;
40457
40458 if (out_mode == DFmode && in_mode == DFmode)
40459 {
40460 if (out_n == 2 && in_n == 2)
40461 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40462 else if (out_n == 4 && in_n == 4)
40463 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40464 }
40465 break;
40466
40467 case BUILT_IN_RINTF:
40468 /* The round insn does not trap on denormals. */
40469 if (flag_trapping_math || !TARGET_ROUND)
40470 break;
40471
40472 if (out_mode == SFmode && in_mode == SFmode)
40473 {
40474 if (out_n == 4 && in_n == 4)
40475 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40476 else if (out_n == 8 && in_n == 8)
40477 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40478 }
40479 break;
40480
40481 case BUILT_IN_ROUND:
40482 /* The round insn does not trap on denormals. */
40483 if (flag_trapping_math || !TARGET_ROUND)
40484 break;
40485
40486 if (out_mode == DFmode && in_mode == DFmode)
40487 {
40488 if (out_n == 2 && in_n == 2)
40489 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40490 else if (out_n == 4 && in_n == 4)
40491 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40492 }
40493 break;
40494
40495 case BUILT_IN_ROUNDF:
40496 /* The round insn does not trap on denormals. */
40497 if (flag_trapping_math || !TARGET_ROUND)
40498 break;
40499
40500 if (out_mode == SFmode && in_mode == SFmode)
40501 {
40502 if (out_n == 4 && in_n == 4)
40503 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40504 else if (out_n == 8 && in_n == 8)
40505 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40506 }
40507 break;
40508
40509 case BUILT_IN_FMA:
40510 if (out_mode == DFmode && in_mode == DFmode)
40511 {
40512 if (out_n == 2 && in_n == 2)
40513 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40514 if (out_n == 4 && in_n == 4)
40515 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40516 }
40517 break;
40518
40519 case BUILT_IN_FMAF:
40520 if (out_mode == SFmode && in_mode == SFmode)
40521 {
40522 if (out_n == 4 && in_n == 4)
40523 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40524 if (out_n == 8 && in_n == 8)
40525 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40526 }
40527 break;
40528
40529 default:
40530 break;
40531 }
40532
40533 /* Dispatch to a handler for a vectorization library. */
40534 if (ix86_veclib_handler)
40535 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40536 type_in);
40537
40538 return NULL_TREE;
40539 }
40540
40541 /* Handler for an SVML-style interface to
40542 a library with vectorized intrinsics. */
40543
40544 static tree
40545 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40546 {
40547 char name[20];
40548 tree fntype, new_fndecl, args;
40549 unsigned arity;
40550 const char *bname;
40551 machine_mode el_mode, in_mode;
40552 int n, in_n;
40553
40554 /* The SVML is suitable for unsafe math only. */
40555 if (!flag_unsafe_math_optimizations)
40556 return NULL_TREE;
40557
40558 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40559 n = TYPE_VECTOR_SUBPARTS (type_out);
40560 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40561 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40562 if (el_mode != in_mode
40563 || n != in_n)
40564 return NULL_TREE;
40565
40566 switch (fn)
40567 {
40568 case BUILT_IN_EXP:
40569 case BUILT_IN_LOG:
40570 case BUILT_IN_LOG10:
40571 case BUILT_IN_POW:
40572 case BUILT_IN_TANH:
40573 case BUILT_IN_TAN:
40574 case BUILT_IN_ATAN:
40575 case BUILT_IN_ATAN2:
40576 case BUILT_IN_ATANH:
40577 case BUILT_IN_CBRT:
40578 case BUILT_IN_SINH:
40579 case BUILT_IN_SIN:
40580 case BUILT_IN_ASINH:
40581 case BUILT_IN_ASIN:
40582 case BUILT_IN_COSH:
40583 case BUILT_IN_COS:
40584 case BUILT_IN_ACOSH:
40585 case BUILT_IN_ACOS:
40586 if (el_mode != DFmode || n != 2)
40587 return NULL_TREE;
40588 break;
40589
40590 case BUILT_IN_EXPF:
40591 case BUILT_IN_LOGF:
40592 case BUILT_IN_LOG10F:
40593 case BUILT_IN_POWF:
40594 case BUILT_IN_TANHF:
40595 case BUILT_IN_TANF:
40596 case BUILT_IN_ATANF:
40597 case BUILT_IN_ATAN2F:
40598 case BUILT_IN_ATANHF:
40599 case BUILT_IN_CBRTF:
40600 case BUILT_IN_SINHF:
40601 case BUILT_IN_SINF:
40602 case BUILT_IN_ASINHF:
40603 case BUILT_IN_ASINF:
40604 case BUILT_IN_COSHF:
40605 case BUILT_IN_COSF:
40606 case BUILT_IN_ACOSHF:
40607 case BUILT_IN_ACOSF:
40608 if (el_mode != SFmode || n != 4)
40609 return NULL_TREE;
40610 break;
40611
40612 default:
40613 return NULL_TREE;
40614 }
40615
40616 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40617
40618 if (fn == BUILT_IN_LOGF)
40619 strcpy (name, "vmlsLn4");
40620 else if (fn == BUILT_IN_LOG)
40621 strcpy (name, "vmldLn2");
40622 else if (n == 4)
40623 {
40624 sprintf (name, "vmls%s", bname+10);
40625 name[strlen (name)-1] = '4';
40626 }
40627 else
40628 sprintf (name, "vmld%s2", bname+10);
40629
40630 /* Convert to uppercase. */
40631 name[4] &= ~0x20;
40632
40633 arity = 0;
40634 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40635 args;
40636 args = TREE_CHAIN (args))
40637 arity++;
40638
40639 if (arity == 1)
40640 fntype = build_function_type_list (type_out, type_in, NULL);
40641 else
40642 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40643
40644 /* Build a function declaration for the vectorized function. */
40645 new_fndecl = build_decl (BUILTINS_LOCATION,
40646 FUNCTION_DECL, get_identifier (name), fntype);
40647 TREE_PUBLIC (new_fndecl) = 1;
40648 DECL_EXTERNAL (new_fndecl) = 1;
40649 DECL_IS_NOVOPS (new_fndecl) = 1;
40650 TREE_READONLY (new_fndecl) = 1;
40651
40652 return new_fndecl;
40653 }
40654
40655 /* Handler for an ACML-style interface to
40656 a library with vectorized intrinsics. */
40657
40658 static tree
40659 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40660 {
40661 char name[20] = "__vr.._";
40662 tree fntype, new_fndecl, args;
40663 unsigned arity;
40664 const char *bname;
40665 machine_mode el_mode, in_mode;
40666 int n, in_n;
40667
40668 /* The ACML is 64bits only and suitable for unsafe math only as
40669 it does not correctly support parts of IEEE with the required
40670 precision such as denormals. */
40671 if (!TARGET_64BIT
40672 || !flag_unsafe_math_optimizations)
40673 return NULL_TREE;
40674
40675 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40676 n = TYPE_VECTOR_SUBPARTS (type_out);
40677 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40678 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40679 if (el_mode != in_mode
40680 || n != in_n)
40681 return NULL_TREE;
40682
40683 switch (fn)
40684 {
40685 case BUILT_IN_SIN:
40686 case BUILT_IN_COS:
40687 case BUILT_IN_EXP:
40688 case BUILT_IN_LOG:
40689 case BUILT_IN_LOG2:
40690 case BUILT_IN_LOG10:
40691 name[4] = 'd';
40692 name[5] = '2';
40693 if (el_mode != DFmode
40694 || n != 2)
40695 return NULL_TREE;
40696 break;
40697
40698 case BUILT_IN_SINF:
40699 case BUILT_IN_COSF:
40700 case BUILT_IN_EXPF:
40701 case BUILT_IN_POWF:
40702 case BUILT_IN_LOGF:
40703 case BUILT_IN_LOG2F:
40704 case BUILT_IN_LOG10F:
40705 name[4] = 's';
40706 name[5] = '4';
40707 if (el_mode != SFmode
40708 || n != 4)
40709 return NULL_TREE;
40710 break;
40711
40712 default:
40713 return NULL_TREE;
40714 }
40715
40716 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40717 sprintf (name + 7, "%s", bname+10);
40718
40719 arity = 0;
40720 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40721 args;
40722 args = TREE_CHAIN (args))
40723 arity++;
40724
40725 if (arity == 1)
40726 fntype = build_function_type_list (type_out, type_in, NULL);
40727 else
40728 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40729
40730 /* Build a function declaration for the vectorized function. */
40731 new_fndecl = build_decl (BUILTINS_LOCATION,
40732 FUNCTION_DECL, get_identifier (name), fntype);
40733 TREE_PUBLIC (new_fndecl) = 1;
40734 DECL_EXTERNAL (new_fndecl) = 1;
40735 DECL_IS_NOVOPS (new_fndecl) = 1;
40736 TREE_READONLY (new_fndecl) = 1;
40737
40738 return new_fndecl;
40739 }
40740
40741 /* Returns a decl of a function that implements gather load with
40742 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40743 Return NULL_TREE if it is not available. */
40744
40745 static tree
40746 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40747 const_tree index_type, int scale)
40748 {
40749 bool si;
40750 enum ix86_builtins code;
40751
40752 if (! TARGET_AVX2)
40753 return NULL_TREE;
40754
40755 if ((TREE_CODE (index_type) != INTEGER_TYPE
40756 && !POINTER_TYPE_P (index_type))
40757 || (TYPE_MODE (index_type) != SImode
40758 && TYPE_MODE (index_type) != DImode))
40759 return NULL_TREE;
40760
40761 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40762 return NULL_TREE;
40763
40764 /* v*gather* insn sign extends index to pointer mode. */
40765 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40766 && TYPE_UNSIGNED (index_type))
40767 return NULL_TREE;
40768
40769 if (scale <= 0
40770 || scale > 8
40771 || (scale & (scale - 1)) != 0)
40772 return NULL_TREE;
40773
40774 si = TYPE_MODE (index_type) == SImode;
40775 switch (TYPE_MODE (mem_vectype))
40776 {
40777 case V2DFmode:
40778 if (TARGET_AVX512VL)
40779 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40780 else
40781 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40782 break;
40783 case V4DFmode:
40784 if (TARGET_AVX512VL)
40785 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40786 else
40787 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40788 break;
40789 case V2DImode:
40790 if (TARGET_AVX512VL)
40791 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40792 else
40793 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40794 break;
40795 case V4DImode:
40796 if (TARGET_AVX512VL)
40797 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40798 else
40799 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40800 break;
40801 case V4SFmode:
40802 if (TARGET_AVX512VL)
40803 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40804 else
40805 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40806 break;
40807 case V8SFmode:
40808 if (TARGET_AVX512VL)
40809 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40810 else
40811 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40812 break;
40813 case V4SImode:
40814 if (TARGET_AVX512VL)
40815 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40816 else
40817 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40818 break;
40819 case V8SImode:
40820 if (TARGET_AVX512VL)
40821 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40822 else
40823 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40824 break;
40825 case V8DFmode:
40826 if (TARGET_AVX512F)
40827 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40828 else
40829 return NULL_TREE;
40830 break;
40831 case V8DImode:
40832 if (TARGET_AVX512F)
40833 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40834 else
40835 return NULL_TREE;
40836 break;
40837 case V16SFmode:
40838 if (TARGET_AVX512F)
40839 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40840 else
40841 return NULL_TREE;
40842 break;
40843 case V16SImode:
40844 if (TARGET_AVX512F)
40845 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40846 else
40847 return NULL_TREE;
40848 break;
40849 default:
40850 return NULL_TREE;
40851 }
40852
40853 return ix86_get_builtin (code);
40854 }
40855
40856 /* Returns a code for a target-specific builtin that implements
40857 reciprocal of the function, or NULL_TREE if not available. */
40858
40859 static tree
40860 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40861 {
40862 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40863 && flag_finite_math_only && !flag_trapping_math
40864 && flag_unsafe_math_optimizations))
40865 return NULL_TREE;
40866
40867 if (md_fn)
40868 /* Machine dependent builtins. */
40869 switch (fn)
40870 {
40871 /* Vectorized version of sqrt to rsqrt conversion. */
40872 case IX86_BUILTIN_SQRTPS_NR:
40873 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40874
40875 case IX86_BUILTIN_SQRTPS_NR256:
40876 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40877
40878 default:
40879 return NULL_TREE;
40880 }
40881 else
40882 /* Normal builtins. */
40883 switch (fn)
40884 {
40885 /* Sqrt to rsqrt conversion. */
40886 case BUILT_IN_SQRTF:
40887 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40888
40889 default:
40890 return NULL_TREE;
40891 }
40892 }
40893 \f
40894 /* Helper for avx_vpermilps256_operand et al. This is also used by
40895 the expansion functions to turn the parallel back into a mask.
40896 The return value is 0 for no match and the imm8+1 for a match. */
40897
40898 int
40899 avx_vpermilp_parallel (rtx par, machine_mode mode)
40900 {
40901 unsigned i, nelt = GET_MODE_NUNITS (mode);
40902 unsigned mask = 0;
40903 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40904
40905 if (XVECLEN (par, 0) != (int) nelt)
40906 return 0;
40907
40908 /* Validate that all of the elements are constants, and not totally
40909 out of range. Copy the data into an integral array to make the
40910 subsequent checks easier. */
40911 for (i = 0; i < nelt; ++i)
40912 {
40913 rtx er = XVECEXP (par, 0, i);
40914 unsigned HOST_WIDE_INT ei;
40915
40916 if (!CONST_INT_P (er))
40917 return 0;
40918 ei = INTVAL (er);
40919 if (ei >= nelt)
40920 return 0;
40921 ipar[i] = ei;
40922 }
40923
40924 switch (mode)
40925 {
40926 case V8DFmode:
40927 /* In the 512-bit DFmode case, we can only move elements within
40928 a 128-bit lane. First fill the second part of the mask,
40929 then fallthru. */
40930 for (i = 4; i < 6; ++i)
40931 {
40932 if (ipar[i] < 4 || ipar[i] >= 6)
40933 return 0;
40934 mask |= (ipar[i] - 4) << i;
40935 }
40936 for (i = 6; i < 8; ++i)
40937 {
40938 if (ipar[i] < 6)
40939 return 0;
40940 mask |= (ipar[i] - 6) << i;
40941 }
40942 /* FALLTHRU */
40943
40944 case V4DFmode:
40945 /* In the 256-bit DFmode case, we can only move elements within
40946 a 128-bit lane. */
40947 for (i = 0; i < 2; ++i)
40948 {
40949 if (ipar[i] >= 2)
40950 return 0;
40951 mask |= ipar[i] << i;
40952 }
40953 for (i = 2; i < 4; ++i)
40954 {
40955 if (ipar[i] < 2)
40956 return 0;
40957 mask |= (ipar[i] - 2) << i;
40958 }
40959 break;
40960
40961 case V16SFmode:
40962 /* In 512 bit SFmode case, permutation in the upper 256 bits
40963 must mirror the permutation in the lower 256-bits. */
40964 for (i = 0; i < 8; ++i)
40965 if (ipar[i] + 8 != ipar[i + 8])
40966 return 0;
40967 /* FALLTHRU */
40968
40969 case V8SFmode:
40970 /* In 256 bit SFmode case, we have full freedom of
40971 movement within the low 128-bit lane, but the high 128-bit
40972 lane must mirror the exact same pattern. */
40973 for (i = 0; i < 4; ++i)
40974 if (ipar[i] + 4 != ipar[i + 4])
40975 return 0;
40976 nelt = 4;
40977 /* FALLTHRU */
40978
40979 case V2DFmode:
40980 case V4SFmode:
40981 /* In the 128-bit case, we've full freedom in the placement of
40982 the elements from the source operand. */
40983 for (i = 0; i < nelt; ++i)
40984 mask |= ipar[i] << (i * (nelt / 2));
40985 break;
40986
40987 default:
40988 gcc_unreachable ();
40989 }
40990
40991 /* Make sure success has a non-zero value by adding one. */
40992 return mask + 1;
40993 }
40994
40995 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
40996 the expansion functions to turn the parallel back into a mask.
40997 The return value is 0 for no match and the imm8+1 for a match. */
40998
40999 int
41000 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41001 {
41002 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41003 unsigned mask = 0;
41004 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41005
41006 if (XVECLEN (par, 0) != (int) nelt)
41007 return 0;
41008
41009 /* Validate that all of the elements are constants, and not totally
41010 out of range. Copy the data into an integral array to make the
41011 subsequent checks easier. */
41012 for (i = 0; i < nelt; ++i)
41013 {
41014 rtx er = XVECEXP (par, 0, i);
41015 unsigned HOST_WIDE_INT ei;
41016
41017 if (!CONST_INT_P (er))
41018 return 0;
41019 ei = INTVAL (er);
41020 if (ei >= 2 * nelt)
41021 return 0;
41022 ipar[i] = ei;
41023 }
41024
41025 /* Validate that the halves of the permute are halves. */
41026 for (i = 0; i < nelt2 - 1; ++i)
41027 if (ipar[i] + 1 != ipar[i + 1])
41028 return 0;
41029 for (i = nelt2; i < nelt - 1; ++i)
41030 if (ipar[i] + 1 != ipar[i + 1])
41031 return 0;
41032
41033 /* Reconstruct the mask. */
41034 for (i = 0; i < 2; ++i)
41035 {
41036 unsigned e = ipar[i * nelt2];
41037 if (e % nelt2)
41038 return 0;
41039 e /= nelt2;
41040 mask |= e << (i * 4);
41041 }
41042
41043 /* Make sure success has a non-zero value by adding one. */
41044 return mask + 1;
41045 }
41046 \f
41047 /* Return a register priority for hard reg REGNO. */
41048 static int
41049 ix86_register_priority (int hard_regno)
41050 {
41051 /* ebp and r13 as the base always wants a displacement, r12 as the
41052 base always wants an index. So discourage their usage in an
41053 address. */
41054 if (hard_regno == R12_REG || hard_regno == R13_REG)
41055 return 0;
41056 if (hard_regno == BP_REG)
41057 return 1;
41058 /* New x86-64 int registers result in bigger code size. Discourage
41059 them. */
41060 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41061 return 2;
41062 /* New x86-64 SSE registers result in bigger code size. Discourage
41063 them. */
41064 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41065 return 2;
41066 /* Usage of AX register results in smaller code. Prefer it. */
41067 if (hard_regno == 0)
41068 return 4;
41069 return 3;
41070 }
41071
41072 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41073
41074 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41075 QImode must go into class Q_REGS.
41076 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41077 movdf to do mem-to-mem moves through integer regs. */
41078
41079 static reg_class_t
41080 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41081 {
41082 machine_mode mode = GET_MODE (x);
41083
41084 /* We're only allowed to return a subclass of CLASS. Many of the
41085 following checks fail for NO_REGS, so eliminate that early. */
41086 if (regclass == NO_REGS)
41087 return NO_REGS;
41088
41089 /* All classes can load zeros. */
41090 if (x == CONST0_RTX (mode))
41091 return regclass;
41092
41093 /* Force constants into memory if we are loading a (nonzero) constant into
41094 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41095 instructions to load from a constant. */
41096 if (CONSTANT_P (x)
41097 && (MAYBE_MMX_CLASS_P (regclass)
41098 || MAYBE_SSE_CLASS_P (regclass)
41099 || MAYBE_MASK_CLASS_P (regclass)))
41100 return NO_REGS;
41101
41102 /* Prefer SSE regs only, if we can use them for math. */
41103 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41104 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41105
41106 /* Floating-point constants need more complex checks. */
41107 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41108 {
41109 /* General regs can load everything. */
41110 if (reg_class_subset_p (regclass, GENERAL_REGS))
41111 return regclass;
41112
41113 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41114 zero above. We only want to wind up preferring 80387 registers if
41115 we plan on doing computation with them. */
41116 if (TARGET_80387
41117 && standard_80387_constant_p (x) > 0)
41118 {
41119 /* Limit class to non-sse. */
41120 if (regclass == FLOAT_SSE_REGS)
41121 return FLOAT_REGS;
41122 if (regclass == FP_TOP_SSE_REGS)
41123 return FP_TOP_REG;
41124 if (regclass == FP_SECOND_SSE_REGS)
41125 return FP_SECOND_REG;
41126 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41127 return regclass;
41128 }
41129
41130 return NO_REGS;
41131 }
41132
41133 /* Generally when we see PLUS here, it's the function invariant
41134 (plus soft-fp const_int). Which can only be computed into general
41135 regs. */
41136 if (GET_CODE (x) == PLUS)
41137 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41138
41139 /* QImode constants are easy to load, but non-constant QImode data
41140 must go into Q_REGS. */
41141 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41142 {
41143 if (reg_class_subset_p (regclass, Q_REGS))
41144 return regclass;
41145 if (reg_class_subset_p (Q_REGS, regclass))
41146 return Q_REGS;
41147 return NO_REGS;
41148 }
41149
41150 return regclass;
41151 }
41152
41153 /* Discourage putting floating-point values in SSE registers unless
41154 SSE math is being used, and likewise for the 387 registers. */
41155 static reg_class_t
41156 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41157 {
41158 machine_mode mode = GET_MODE (x);
41159
41160 /* Restrict the output reload class to the register bank that we are doing
41161 math on. If we would like not to return a subset of CLASS, reject this
41162 alternative: if reload cannot do this, it will still use its choice. */
41163 mode = GET_MODE (x);
41164 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41165 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41166
41167 if (X87_FLOAT_MODE_P (mode))
41168 {
41169 if (regclass == FP_TOP_SSE_REGS)
41170 return FP_TOP_REG;
41171 else if (regclass == FP_SECOND_SSE_REGS)
41172 return FP_SECOND_REG;
41173 else
41174 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41175 }
41176
41177 return regclass;
41178 }
41179
41180 static reg_class_t
41181 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41182 machine_mode mode, secondary_reload_info *sri)
41183 {
41184 /* Double-word spills from general registers to non-offsettable memory
41185 references (zero-extended addresses) require special handling. */
41186 if (TARGET_64BIT
41187 && MEM_P (x)
41188 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41189 && INTEGER_CLASS_P (rclass)
41190 && !offsettable_memref_p (x))
41191 {
41192 sri->icode = (in_p
41193 ? CODE_FOR_reload_noff_load
41194 : CODE_FOR_reload_noff_store);
41195 /* Add the cost of moving address to a temporary. */
41196 sri->extra_cost = 1;
41197
41198 return NO_REGS;
41199 }
41200
41201 /* QImode spills from non-QI registers require
41202 intermediate register on 32bit targets. */
41203 if (mode == QImode
41204 && (MAYBE_MASK_CLASS_P (rclass)
41205 || (!TARGET_64BIT && !in_p
41206 && INTEGER_CLASS_P (rclass)
41207 && MAYBE_NON_Q_CLASS_P (rclass))))
41208 {
41209 int regno;
41210
41211 if (REG_P (x))
41212 regno = REGNO (x);
41213 else
41214 regno = -1;
41215
41216 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41217 regno = true_regnum (x);
41218
41219 /* Return Q_REGS if the operand is in memory. */
41220 if (regno == -1)
41221 return Q_REGS;
41222 }
41223
41224 /* This condition handles corner case where an expression involving
41225 pointers gets vectorized. We're trying to use the address of a
41226 stack slot as a vector initializer.
41227
41228 (set (reg:V2DI 74 [ vect_cst_.2 ])
41229 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41230
41231 Eventually frame gets turned into sp+offset like this:
41232
41233 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41234 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41235 (const_int 392 [0x188]))))
41236
41237 That later gets turned into:
41238
41239 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41240 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41241 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41242
41243 We'll have the following reload recorded:
41244
41245 Reload 0: reload_in (DI) =
41246 (plus:DI (reg/f:DI 7 sp)
41247 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41248 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41249 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41250 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41251 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41252 reload_reg_rtx: (reg:V2DI 22 xmm1)
41253
41254 Which isn't going to work since SSE instructions can't handle scalar
41255 additions. Returning GENERAL_REGS forces the addition into integer
41256 register and reload can handle subsequent reloads without problems. */
41257
41258 if (in_p && GET_CODE (x) == PLUS
41259 && SSE_CLASS_P (rclass)
41260 && SCALAR_INT_MODE_P (mode))
41261 return GENERAL_REGS;
41262
41263 return NO_REGS;
41264 }
41265
41266 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41267
41268 static bool
41269 ix86_class_likely_spilled_p (reg_class_t rclass)
41270 {
41271 switch (rclass)
41272 {
41273 case AREG:
41274 case DREG:
41275 case CREG:
41276 case BREG:
41277 case AD_REGS:
41278 case SIREG:
41279 case DIREG:
41280 case SSE_FIRST_REG:
41281 case FP_TOP_REG:
41282 case FP_SECOND_REG:
41283 case BND_REGS:
41284 return true;
41285
41286 default:
41287 break;
41288 }
41289
41290 return false;
41291 }
41292
41293 /* If we are copying between general and FP registers, we need a memory
41294 location. The same is true for SSE and MMX registers.
41295
41296 To optimize register_move_cost performance, allow inline variant.
41297
41298 The macro can't work reliably when one of the CLASSES is class containing
41299 registers from multiple units (SSE, MMX, integer). We avoid this by never
41300 combining those units in single alternative in the machine description.
41301 Ensure that this constraint holds to avoid unexpected surprises.
41302
41303 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41304 enforce these sanity checks. */
41305
41306 static inline bool
41307 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41308 machine_mode mode, int strict)
41309 {
41310 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41311 return false;
41312 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41313 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41314 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41315 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41316 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41317 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41318 {
41319 gcc_assert (!strict || lra_in_progress);
41320 return true;
41321 }
41322
41323 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41324 return true;
41325
41326 /* Between mask and general, we have moves no larger than word size. */
41327 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41328 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41329 return true;
41330
41331 /* ??? This is a lie. We do have moves between mmx/general, and for
41332 mmx/sse2. But by saying we need secondary memory we discourage the
41333 register allocator from using the mmx registers unless needed. */
41334 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41335 return true;
41336
41337 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41338 {
41339 /* SSE1 doesn't have any direct moves from other classes. */
41340 if (!TARGET_SSE2)
41341 return true;
41342
41343 /* If the target says that inter-unit moves are more expensive
41344 than moving through memory, then don't generate them. */
41345 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41346 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41347 return true;
41348
41349 /* Between SSE and general, we have moves no larger than word size. */
41350 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41351 return true;
41352 }
41353
41354 return false;
41355 }
41356
41357 bool
41358 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41359 machine_mode mode, int strict)
41360 {
41361 return inline_secondary_memory_needed (class1, class2, mode, strict);
41362 }
41363
41364 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41365
41366 On the 80386, this is the size of MODE in words,
41367 except in the FP regs, where a single reg is always enough. */
41368
41369 static unsigned char
41370 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41371 {
41372 if (MAYBE_INTEGER_CLASS_P (rclass))
41373 {
41374 if (mode == XFmode)
41375 return (TARGET_64BIT ? 2 : 3);
41376 else if (mode == XCmode)
41377 return (TARGET_64BIT ? 4 : 6);
41378 else
41379 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41380 }
41381 else
41382 {
41383 if (COMPLEX_MODE_P (mode))
41384 return 2;
41385 else
41386 return 1;
41387 }
41388 }
41389
41390 /* Return true if the registers in CLASS cannot represent the change from
41391 modes FROM to TO. */
41392
41393 bool
41394 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41395 enum reg_class regclass)
41396 {
41397 if (from == to)
41398 return false;
41399
41400 /* x87 registers can't do subreg at all, as all values are reformatted
41401 to extended precision. */
41402 if (MAYBE_FLOAT_CLASS_P (regclass))
41403 return true;
41404
41405 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41406 {
41407 /* Vector registers do not support QI or HImode loads. If we don't
41408 disallow a change to these modes, reload will assume it's ok to
41409 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41410 the vec_dupv4hi pattern. */
41411 if (GET_MODE_SIZE (from) < 4)
41412 return true;
41413 }
41414
41415 return false;
41416 }
41417
41418 /* Return the cost of moving data of mode M between a
41419 register and memory. A value of 2 is the default; this cost is
41420 relative to those in `REGISTER_MOVE_COST'.
41421
41422 This function is used extensively by register_move_cost that is used to
41423 build tables at startup. Make it inline in this case.
41424 When IN is 2, return maximum of in and out move cost.
41425
41426 If moving between registers and memory is more expensive than
41427 between two registers, you should define this macro to express the
41428 relative cost.
41429
41430 Model also increased moving costs of QImode registers in non
41431 Q_REGS classes.
41432 */
41433 static inline int
41434 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41435 int in)
41436 {
41437 int cost;
41438 if (FLOAT_CLASS_P (regclass))
41439 {
41440 int index;
41441 switch (mode)
41442 {
41443 case SFmode:
41444 index = 0;
41445 break;
41446 case DFmode:
41447 index = 1;
41448 break;
41449 case XFmode:
41450 index = 2;
41451 break;
41452 default:
41453 return 100;
41454 }
41455 if (in == 2)
41456 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41457 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41458 }
41459 if (SSE_CLASS_P (regclass))
41460 {
41461 int index;
41462 switch (GET_MODE_SIZE (mode))
41463 {
41464 case 4:
41465 index = 0;
41466 break;
41467 case 8:
41468 index = 1;
41469 break;
41470 case 16:
41471 index = 2;
41472 break;
41473 default:
41474 return 100;
41475 }
41476 if (in == 2)
41477 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41478 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41479 }
41480 if (MMX_CLASS_P (regclass))
41481 {
41482 int index;
41483 switch (GET_MODE_SIZE (mode))
41484 {
41485 case 4:
41486 index = 0;
41487 break;
41488 case 8:
41489 index = 1;
41490 break;
41491 default:
41492 return 100;
41493 }
41494 if (in)
41495 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41496 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41497 }
41498 switch (GET_MODE_SIZE (mode))
41499 {
41500 case 1:
41501 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41502 {
41503 if (!in)
41504 return ix86_cost->int_store[0];
41505 if (TARGET_PARTIAL_REG_DEPENDENCY
41506 && optimize_function_for_speed_p (cfun))
41507 cost = ix86_cost->movzbl_load;
41508 else
41509 cost = ix86_cost->int_load[0];
41510 if (in == 2)
41511 return MAX (cost, ix86_cost->int_store[0]);
41512 return cost;
41513 }
41514 else
41515 {
41516 if (in == 2)
41517 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41518 if (in)
41519 return ix86_cost->movzbl_load;
41520 else
41521 return ix86_cost->int_store[0] + 4;
41522 }
41523 break;
41524 case 2:
41525 if (in == 2)
41526 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41527 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41528 default:
41529 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41530 if (mode == TFmode)
41531 mode = XFmode;
41532 if (in == 2)
41533 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41534 else if (in)
41535 cost = ix86_cost->int_load[2];
41536 else
41537 cost = ix86_cost->int_store[2];
41538 return (cost * (((int) GET_MODE_SIZE (mode)
41539 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41540 }
41541 }
41542
41543 static int
41544 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41545 bool in)
41546 {
41547 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41548 }
41549
41550
41551 /* Return the cost of moving data from a register in class CLASS1 to
41552 one in class CLASS2.
41553
41554 It is not required that the cost always equal 2 when FROM is the same as TO;
41555 on some machines it is expensive to move between registers if they are not
41556 general registers. */
41557
41558 static int
41559 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41560 reg_class_t class2_i)
41561 {
41562 enum reg_class class1 = (enum reg_class) class1_i;
41563 enum reg_class class2 = (enum reg_class) class2_i;
41564
41565 /* In case we require secondary memory, compute cost of the store followed
41566 by load. In order to avoid bad register allocation choices, we need
41567 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41568
41569 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41570 {
41571 int cost = 1;
41572
41573 cost += inline_memory_move_cost (mode, class1, 2);
41574 cost += inline_memory_move_cost (mode, class2, 2);
41575
41576 /* In case of copying from general_purpose_register we may emit multiple
41577 stores followed by single load causing memory size mismatch stall.
41578 Count this as arbitrarily high cost of 20. */
41579 if (targetm.class_max_nregs (class1, mode)
41580 > targetm.class_max_nregs (class2, mode))
41581 cost += 20;
41582
41583 /* In the case of FP/MMX moves, the registers actually overlap, and we
41584 have to switch modes in order to treat them differently. */
41585 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41586 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41587 cost += 20;
41588
41589 return cost;
41590 }
41591
41592 /* Moves between SSE/MMX and integer unit are expensive. */
41593 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41594 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41595
41596 /* ??? By keeping returned value relatively high, we limit the number
41597 of moves between integer and MMX/SSE registers for all targets.
41598 Additionally, high value prevents problem with x86_modes_tieable_p(),
41599 where integer modes in MMX/SSE registers are not tieable
41600 because of missing QImode and HImode moves to, from or between
41601 MMX/SSE registers. */
41602 return MAX (8, ix86_cost->mmxsse_to_integer);
41603
41604 if (MAYBE_FLOAT_CLASS_P (class1))
41605 return ix86_cost->fp_move;
41606 if (MAYBE_SSE_CLASS_P (class1))
41607 return ix86_cost->sse_move;
41608 if (MAYBE_MMX_CLASS_P (class1))
41609 return ix86_cost->mmx_move;
41610 return 2;
41611 }
41612
41613 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41614 MODE. */
41615
41616 bool
41617 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41618 {
41619 /* Flags and only flags can only hold CCmode values. */
41620 if (CC_REGNO_P (regno))
41621 return GET_MODE_CLASS (mode) == MODE_CC;
41622 if (GET_MODE_CLASS (mode) == MODE_CC
41623 || GET_MODE_CLASS (mode) == MODE_RANDOM
41624 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41625 return false;
41626 if (STACK_REGNO_P (regno))
41627 return VALID_FP_MODE_P (mode);
41628 if (MASK_REGNO_P (regno))
41629 return (VALID_MASK_REG_MODE (mode)
41630 || ((TARGET_AVX512BW || TARGET_AVX512VBMI)
41631 && VALID_MASK_AVX512BW_MODE (mode)));
41632 if (BND_REGNO_P (regno))
41633 return VALID_BND_REG_MODE (mode);
41634 if (SSE_REGNO_P (regno))
41635 {
41636 /* We implement the move patterns for all vector modes into and
41637 out of SSE registers, even when no operation instructions
41638 are available. */
41639
41640 /* For AVX-512 we allow, regardless of regno:
41641 - XI mode
41642 - any of 512-bit wide vector mode
41643 - any scalar mode. */
41644 if (TARGET_AVX512F
41645 && (mode == XImode
41646 || VALID_AVX512F_REG_MODE (mode)
41647 || VALID_AVX512F_SCALAR_MODE (mode)))
41648 return true;
41649
41650 /* TODO check for QI/HI scalars. */
41651 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41652 if (TARGET_AVX512VL
41653 && (mode == OImode
41654 || mode == TImode
41655 || VALID_AVX256_REG_MODE (mode)
41656 || VALID_AVX512VL_128_REG_MODE (mode)))
41657 return true;
41658
41659 /* xmm16-xmm31 are only available for AVX-512. */
41660 if (EXT_REX_SSE_REGNO_P (regno))
41661 return false;
41662
41663 /* OImode and AVX modes are available only when AVX is enabled. */
41664 return ((TARGET_AVX
41665 && VALID_AVX256_REG_OR_OI_MODE (mode))
41666 || VALID_SSE_REG_MODE (mode)
41667 || VALID_SSE2_REG_MODE (mode)
41668 || VALID_MMX_REG_MODE (mode)
41669 || VALID_MMX_REG_MODE_3DNOW (mode));
41670 }
41671 if (MMX_REGNO_P (regno))
41672 {
41673 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41674 so if the register is available at all, then we can move data of
41675 the given mode into or out of it. */
41676 return (VALID_MMX_REG_MODE (mode)
41677 || VALID_MMX_REG_MODE_3DNOW (mode));
41678 }
41679
41680 if (mode == QImode)
41681 {
41682 /* Take care for QImode values - they can be in non-QI regs,
41683 but then they do cause partial register stalls. */
41684 if (ANY_QI_REGNO_P (regno))
41685 return true;
41686 if (!TARGET_PARTIAL_REG_STALL)
41687 return true;
41688 /* LRA checks if the hard register is OK for the given mode.
41689 QImode values can live in non-QI regs, so we allow all
41690 registers here. */
41691 if (lra_in_progress)
41692 return true;
41693 return !can_create_pseudo_p ();
41694 }
41695 /* We handle both integer and floats in the general purpose registers. */
41696 else if (VALID_INT_MODE_P (mode))
41697 return true;
41698 else if (VALID_FP_MODE_P (mode))
41699 return true;
41700 else if (VALID_DFP_MODE_P (mode))
41701 return true;
41702 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41703 on to use that value in smaller contexts, this can easily force a
41704 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41705 supporting DImode, allow it. */
41706 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41707 return true;
41708
41709 return false;
41710 }
41711
41712 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41713 tieable integer mode. */
41714
41715 static bool
41716 ix86_tieable_integer_mode_p (machine_mode mode)
41717 {
41718 switch (mode)
41719 {
41720 case HImode:
41721 case SImode:
41722 return true;
41723
41724 case QImode:
41725 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41726
41727 case DImode:
41728 return TARGET_64BIT;
41729
41730 default:
41731 return false;
41732 }
41733 }
41734
41735 /* Return true if MODE1 is accessible in a register that can hold MODE2
41736 without copying. That is, all register classes that can hold MODE2
41737 can also hold MODE1. */
41738
41739 bool
41740 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41741 {
41742 if (mode1 == mode2)
41743 return true;
41744
41745 if (ix86_tieable_integer_mode_p (mode1)
41746 && ix86_tieable_integer_mode_p (mode2))
41747 return true;
41748
41749 /* MODE2 being XFmode implies fp stack or general regs, which means we
41750 can tie any smaller floating point modes to it. Note that we do not
41751 tie this with TFmode. */
41752 if (mode2 == XFmode)
41753 return mode1 == SFmode || mode1 == DFmode;
41754
41755 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41756 that we can tie it with SFmode. */
41757 if (mode2 == DFmode)
41758 return mode1 == SFmode;
41759
41760 /* If MODE2 is only appropriate for an SSE register, then tie with
41761 any other mode acceptable to SSE registers. */
41762 if (GET_MODE_SIZE (mode2) == 32
41763 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41764 return (GET_MODE_SIZE (mode1) == 32
41765 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41766 if (GET_MODE_SIZE (mode2) == 16
41767 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41768 return (GET_MODE_SIZE (mode1) == 16
41769 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41770
41771 /* If MODE2 is appropriate for an MMX register, then tie
41772 with any other mode acceptable to MMX registers. */
41773 if (GET_MODE_SIZE (mode2) == 8
41774 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41775 return (GET_MODE_SIZE (mode1) == 8
41776 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41777
41778 return false;
41779 }
41780
41781 /* Return the cost of moving between two registers of mode MODE. */
41782
41783 static int
41784 ix86_set_reg_reg_cost (machine_mode mode)
41785 {
41786 unsigned int units = UNITS_PER_WORD;
41787
41788 switch (GET_MODE_CLASS (mode))
41789 {
41790 default:
41791 break;
41792
41793 case MODE_CC:
41794 units = GET_MODE_SIZE (CCmode);
41795 break;
41796
41797 case MODE_FLOAT:
41798 if ((TARGET_SSE && mode == TFmode)
41799 || (TARGET_80387 && mode == XFmode)
41800 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41801 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41802 units = GET_MODE_SIZE (mode);
41803 break;
41804
41805 case MODE_COMPLEX_FLOAT:
41806 if ((TARGET_SSE && mode == TCmode)
41807 || (TARGET_80387 && mode == XCmode)
41808 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41809 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41810 units = GET_MODE_SIZE (mode);
41811 break;
41812
41813 case MODE_VECTOR_INT:
41814 case MODE_VECTOR_FLOAT:
41815 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41816 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41817 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41818 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41819 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41820 units = GET_MODE_SIZE (mode);
41821 }
41822
41823 /* Return the cost of moving between two registers of mode MODE,
41824 assuming that the move will be in pieces of at most UNITS bytes. */
41825 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41826 }
41827
41828 /* Compute a (partial) cost for rtx X. Return true if the complete
41829 cost has been computed, and false if subexpressions should be
41830 scanned. In either case, *TOTAL contains the cost result. */
41831
41832 static bool
41833 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41834 bool speed)
41835 {
41836 rtx mask;
41837 enum rtx_code code = (enum rtx_code) code_i;
41838 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41839 machine_mode mode = GET_MODE (x);
41840 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41841
41842 switch (code)
41843 {
41844 case SET:
41845 if (register_operand (SET_DEST (x), VOIDmode)
41846 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41847 {
41848 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41849 return true;
41850 }
41851 return false;
41852
41853 case CONST_INT:
41854 case CONST:
41855 case LABEL_REF:
41856 case SYMBOL_REF:
41857 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41858 *total = 3;
41859 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41860 *total = 2;
41861 else if (flag_pic && SYMBOLIC_CONST (x)
41862 && !(TARGET_64BIT
41863 && (GET_CODE (x) == LABEL_REF
41864 || (GET_CODE (x) == SYMBOL_REF
41865 && SYMBOL_REF_LOCAL_P (x)))))
41866 *total = 1;
41867 else
41868 *total = 0;
41869 return true;
41870
41871 case CONST_DOUBLE:
41872 if (mode == VOIDmode)
41873 {
41874 *total = 0;
41875 return true;
41876 }
41877 switch (standard_80387_constant_p (x))
41878 {
41879 case 1: /* 0.0 */
41880 *total = 1;
41881 return true;
41882 default: /* Other constants */
41883 *total = 2;
41884 return true;
41885 case 0:
41886 case -1:
41887 break;
41888 }
41889 if (SSE_FLOAT_MODE_P (mode))
41890 {
41891 case CONST_VECTOR:
41892 switch (standard_sse_constant_p (x))
41893 {
41894 case 0:
41895 break;
41896 case 1: /* 0: xor eliminates false dependency */
41897 *total = 0;
41898 return true;
41899 default: /* -1: cmp contains false dependency */
41900 *total = 1;
41901 return true;
41902 }
41903 }
41904 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41905 it'll probably end up. Add a penalty for size. */
41906 *total = (COSTS_N_INSNS (1)
41907 + (flag_pic != 0 && !TARGET_64BIT)
41908 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41909 return true;
41910
41911 case ZERO_EXTEND:
41912 /* The zero extensions is often completely free on x86_64, so make
41913 it as cheap as possible. */
41914 if (TARGET_64BIT && mode == DImode
41915 && GET_MODE (XEXP (x, 0)) == SImode)
41916 *total = 1;
41917 else if (TARGET_ZERO_EXTEND_WITH_AND)
41918 *total = cost->add;
41919 else
41920 *total = cost->movzx;
41921 return false;
41922
41923 case SIGN_EXTEND:
41924 *total = cost->movsx;
41925 return false;
41926
41927 case ASHIFT:
41928 if (SCALAR_INT_MODE_P (mode)
41929 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41930 && CONST_INT_P (XEXP (x, 1)))
41931 {
41932 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41933 if (value == 1)
41934 {
41935 *total = cost->add;
41936 return false;
41937 }
41938 if ((value == 2 || value == 3)
41939 && cost->lea <= cost->shift_const)
41940 {
41941 *total = cost->lea;
41942 return false;
41943 }
41944 }
41945 /* FALLTHRU */
41946
41947 case ROTATE:
41948 case ASHIFTRT:
41949 case LSHIFTRT:
41950 case ROTATERT:
41951 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41952 {
41953 /* ??? Should be SSE vector operation cost. */
41954 /* At least for published AMD latencies, this really is the same
41955 as the latency for a simple fpu operation like fabs. */
41956 /* V*QImode is emulated with 1-11 insns. */
41957 if (mode == V16QImode || mode == V32QImode)
41958 {
41959 int count = 11;
41960 if (TARGET_XOP && mode == V16QImode)
41961 {
41962 /* For XOP we use vpshab, which requires a broadcast of the
41963 value to the variable shift insn. For constants this
41964 means a V16Q const in mem; even when we can perform the
41965 shift with one insn set the cost to prefer paddb. */
41966 if (CONSTANT_P (XEXP (x, 1)))
41967 {
41968 *total = (cost->fabs
41969 + rtx_cost (XEXP (x, 0), code, 0, speed)
41970 + (speed ? 2 : COSTS_N_BYTES (16)));
41971 return true;
41972 }
41973 count = 3;
41974 }
41975 else if (TARGET_SSSE3)
41976 count = 7;
41977 *total = cost->fabs * count;
41978 }
41979 else
41980 *total = cost->fabs;
41981 }
41982 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41983 {
41984 if (CONST_INT_P (XEXP (x, 1)))
41985 {
41986 if (INTVAL (XEXP (x, 1)) > 32)
41987 *total = cost->shift_const + COSTS_N_INSNS (2);
41988 else
41989 *total = cost->shift_const * 2;
41990 }
41991 else
41992 {
41993 if (GET_CODE (XEXP (x, 1)) == AND)
41994 *total = cost->shift_var * 2;
41995 else
41996 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
41997 }
41998 }
41999 else
42000 {
42001 if (CONST_INT_P (XEXP (x, 1)))
42002 *total = cost->shift_const;
42003 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42004 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42005 {
42006 /* Return the cost after shift-and truncation. */
42007 *total = cost->shift_var;
42008 return true;
42009 }
42010 else
42011 *total = cost->shift_var;
42012 }
42013 return false;
42014
42015 case FMA:
42016 {
42017 rtx sub;
42018
42019 gcc_assert (FLOAT_MODE_P (mode));
42020 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42021
42022 /* ??? SSE scalar/vector cost should be used here. */
42023 /* ??? Bald assumption that fma has the same cost as fmul. */
42024 *total = cost->fmul;
42025 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42026
42027 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42028 sub = XEXP (x, 0);
42029 if (GET_CODE (sub) == NEG)
42030 sub = XEXP (sub, 0);
42031 *total += rtx_cost (sub, FMA, 0, speed);
42032
42033 sub = XEXP (x, 2);
42034 if (GET_CODE (sub) == NEG)
42035 sub = XEXP (sub, 0);
42036 *total += rtx_cost (sub, FMA, 2, speed);
42037 return true;
42038 }
42039
42040 case MULT:
42041 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42042 {
42043 /* ??? SSE scalar cost should be used here. */
42044 *total = cost->fmul;
42045 return false;
42046 }
42047 else if (X87_FLOAT_MODE_P (mode))
42048 {
42049 *total = cost->fmul;
42050 return false;
42051 }
42052 else if (FLOAT_MODE_P (mode))
42053 {
42054 /* ??? SSE vector cost should be used here. */
42055 *total = cost->fmul;
42056 return false;
42057 }
42058 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42059 {
42060 /* V*QImode is emulated with 7-13 insns. */
42061 if (mode == V16QImode || mode == V32QImode)
42062 {
42063 int extra = 11;
42064 if (TARGET_XOP && mode == V16QImode)
42065 extra = 5;
42066 else if (TARGET_SSSE3)
42067 extra = 6;
42068 *total = cost->fmul * 2 + cost->fabs * extra;
42069 }
42070 /* V*DImode is emulated with 5-8 insns. */
42071 else if (mode == V2DImode || mode == V4DImode)
42072 {
42073 if (TARGET_XOP && mode == V2DImode)
42074 *total = cost->fmul * 2 + cost->fabs * 3;
42075 else
42076 *total = cost->fmul * 3 + cost->fabs * 5;
42077 }
42078 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42079 insns, including two PMULUDQ. */
42080 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42081 *total = cost->fmul * 2 + cost->fabs * 5;
42082 else
42083 *total = cost->fmul;
42084 return false;
42085 }
42086 else
42087 {
42088 rtx op0 = XEXP (x, 0);
42089 rtx op1 = XEXP (x, 1);
42090 int nbits;
42091 if (CONST_INT_P (XEXP (x, 1)))
42092 {
42093 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42094 for (nbits = 0; value != 0; value &= value - 1)
42095 nbits++;
42096 }
42097 else
42098 /* This is arbitrary. */
42099 nbits = 7;
42100
42101 /* Compute costs correctly for widening multiplication. */
42102 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42103 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42104 == GET_MODE_SIZE (mode))
42105 {
42106 int is_mulwiden = 0;
42107 machine_mode inner_mode = GET_MODE (op0);
42108
42109 if (GET_CODE (op0) == GET_CODE (op1))
42110 is_mulwiden = 1, op1 = XEXP (op1, 0);
42111 else if (CONST_INT_P (op1))
42112 {
42113 if (GET_CODE (op0) == SIGN_EXTEND)
42114 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42115 == INTVAL (op1);
42116 else
42117 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42118 }
42119
42120 if (is_mulwiden)
42121 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42122 }
42123
42124 *total = (cost->mult_init[MODE_INDEX (mode)]
42125 + nbits * cost->mult_bit
42126 + rtx_cost (op0, outer_code, opno, speed)
42127 + rtx_cost (op1, outer_code, opno, speed));
42128
42129 return true;
42130 }
42131
42132 case DIV:
42133 case UDIV:
42134 case MOD:
42135 case UMOD:
42136 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42137 /* ??? SSE cost should be used here. */
42138 *total = cost->fdiv;
42139 else if (X87_FLOAT_MODE_P (mode))
42140 *total = cost->fdiv;
42141 else if (FLOAT_MODE_P (mode))
42142 /* ??? SSE vector cost should be used here. */
42143 *total = cost->fdiv;
42144 else
42145 *total = cost->divide[MODE_INDEX (mode)];
42146 return false;
42147
42148 case PLUS:
42149 if (GET_MODE_CLASS (mode) == MODE_INT
42150 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42151 {
42152 if (GET_CODE (XEXP (x, 0)) == PLUS
42153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42154 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42155 && CONSTANT_P (XEXP (x, 1)))
42156 {
42157 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42158 if (val == 2 || val == 4 || val == 8)
42159 {
42160 *total = cost->lea;
42161 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42162 outer_code, opno, speed);
42163 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42164 outer_code, opno, speed);
42165 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42166 return true;
42167 }
42168 }
42169 else if (GET_CODE (XEXP (x, 0)) == MULT
42170 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42171 {
42172 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42173 if (val == 2 || val == 4 || val == 8)
42174 {
42175 *total = cost->lea;
42176 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42177 outer_code, opno, speed);
42178 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42179 return true;
42180 }
42181 }
42182 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42183 {
42184 *total = cost->lea;
42185 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42186 outer_code, opno, speed);
42187 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42188 outer_code, opno, speed);
42189 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42190 return true;
42191 }
42192 }
42193 /* FALLTHRU */
42194
42195 case MINUS:
42196 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42197 {
42198 /* ??? SSE cost should be used here. */
42199 *total = cost->fadd;
42200 return false;
42201 }
42202 else if (X87_FLOAT_MODE_P (mode))
42203 {
42204 *total = cost->fadd;
42205 return false;
42206 }
42207 else if (FLOAT_MODE_P (mode))
42208 {
42209 /* ??? SSE vector cost should be used here. */
42210 *total = cost->fadd;
42211 return false;
42212 }
42213 /* FALLTHRU */
42214
42215 case AND:
42216 case IOR:
42217 case XOR:
42218 if (GET_MODE_CLASS (mode) == MODE_INT
42219 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42220 {
42221 *total = (cost->add * 2
42222 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42223 << (GET_MODE (XEXP (x, 0)) != DImode))
42224 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42225 << (GET_MODE (XEXP (x, 1)) != DImode)));
42226 return true;
42227 }
42228 /* FALLTHRU */
42229
42230 case NEG:
42231 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42232 {
42233 /* ??? SSE cost should be used here. */
42234 *total = cost->fchs;
42235 return false;
42236 }
42237 else if (X87_FLOAT_MODE_P (mode))
42238 {
42239 *total = cost->fchs;
42240 return false;
42241 }
42242 else if (FLOAT_MODE_P (mode))
42243 {
42244 /* ??? SSE vector cost should be used here. */
42245 *total = cost->fchs;
42246 return false;
42247 }
42248 /* FALLTHRU */
42249
42250 case NOT:
42251 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42252 {
42253 /* ??? Should be SSE vector operation cost. */
42254 /* At least for published AMD latencies, this really is the same
42255 as the latency for a simple fpu operation like fabs. */
42256 *total = cost->fabs;
42257 }
42258 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42259 *total = cost->add * 2;
42260 else
42261 *total = cost->add;
42262 return false;
42263
42264 case COMPARE:
42265 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42266 && XEXP (XEXP (x, 0), 1) == const1_rtx
42267 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42268 && XEXP (x, 1) == const0_rtx)
42269 {
42270 /* This kind of construct is implemented using test[bwl].
42271 Treat it as if we had an AND. */
42272 *total = (cost->add
42273 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42274 + rtx_cost (const1_rtx, outer_code, opno, speed));
42275 return true;
42276 }
42277 return false;
42278
42279 case FLOAT_EXTEND:
42280 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42281 *total = 0;
42282 return false;
42283
42284 case ABS:
42285 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42286 /* ??? SSE cost should be used here. */
42287 *total = cost->fabs;
42288 else if (X87_FLOAT_MODE_P (mode))
42289 *total = cost->fabs;
42290 else if (FLOAT_MODE_P (mode))
42291 /* ??? SSE vector cost should be used here. */
42292 *total = cost->fabs;
42293 return false;
42294
42295 case SQRT:
42296 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42297 /* ??? SSE cost should be used here. */
42298 *total = cost->fsqrt;
42299 else if (X87_FLOAT_MODE_P (mode))
42300 *total = cost->fsqrt;
42301 else if (FLOAT_MODE_P (mode))
42302 /* ??? SSE vector cost should be used here. */
42303 *total = cost->fsqrt;
42304 return false;
42305
42306 case UNSPEC:
42307 if (XINT (x, 1) == UNSPEC_TP)
42308 *total = 0;
42309 return false;
42310
42311 case VEC_SELECT:
42312 case VEC_CONCAT:
42313 case VEC_DUPLICATE:
42314 /* ??? Assume all of these vector manipulation patterns are
42315 recognizable. In which case they all pretty much have the
42316 same cost. */
42317 *total = cost->fabs;
42318 return true;
42319 case VEC_MERGE:
42320 mask = XEXP (x, 2);
42321 /* This is masked instruction, assume the same cost,
42322 as nonmasked variant. */
42323 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42324 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42325 else
42326 *total = cost->fabs;
42327 return true;
42328
42329 default:
42330 return false;
42331 }
42332 }
42333
42334 #if TARGET_MACHO
42335
42336 static int current_machopic_label_num;
42337
42338 /* Given a symbol name and its associated stub, write out the
42339 definition of the stub. */
42340
42341 void
42342 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42343 {
42344 unsigned int length;
42345 char *binder_name, *symbol_name, lazy_ptr_name[32];
42346 int label = ++current_machopic_label_num;
42347
42348 /* For 64-bit we shouldn't get here. */
42349 gcc_assert (!TARGET_64BIT);
42350
42351 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42352 symb = targetm.strip_name_encoding (symb);
42353
42354 length = strlen (stub);
42355 binder_name = XALLOCAVEC (char, length + 32);
42356 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42357
42358 length = strlen (symb);
42359 symbol_name = XALLOCAVEC (char, length + 32);
42360 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42361
42362 sprintf (lazy_ptr_name, "L%d$lz", label);
42363
42364 if (MACHOPIC_ATT_STUB)
42365 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42366 else if (MACHOPIC_PURE)
42367 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42368 else
42369 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42370
42371 fprintf (file, "%s:\n", stub);
42372 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42373
42374 if (MACHOPIC_ATT_STUB)
42375 {
42376 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42377 }
42378 else if (MACHOPIC_PURE)
42379 {
42380 /* PIC stub. */
42381 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42382 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42383 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42384 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42385 label, lazy_ptr_name, label);
42386 fprintf (file, "\tjmp\t*%%ecx\n");
42387 }
42388 else
42389 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42390
42391 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42392 it needs no stub-binding-helper. */
42393 if (MACHOPIC_ATT_STUB)
42394 return;
42395
42396 fprintf (file, "%s:\n", binder_name);
42397
42398 if (MACHOPIC_PURE)
42399 {
42400 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42401 fprintf (file, "\tpushl\t%%ecx\n");
42402 }
42403 else
42404 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42405
42406 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42407
42408 /* N.B. Keep the correspondence of these
42409 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42410 old-pic/new-pic/non-pic stubs; altering this will break
42411 compatibility with existing dylibs. */
42412 if (MACHOPIC_PURE)
42413 {
42414 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42415 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42416 }
42417 else
42418 /* 16-byte -mdynamic-no-pic stub. */
42419 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42420
42421 fprintf (file, "%s:\n", lazy_ptr_name);
42422 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42423 fprintf (file, ASM_LONG "%s\n", binder_name);
42424 }
42425 #endif /* TARGET_MACHO */
42426
42427 /* Order the registers for register allocator. */
42428
42429 void
42430 x86_order_regs_for_local_alloc (void)
42431 {
42432 int pos = 0;
42433 int i;
42434
42435 /* First allocate the local general purpose registers. */
42436 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42437 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42438 reg_alloc_order [pos++] = i;
42439
42440 /* Global general purpose registers. */
42441 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42442 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42443 reg_alloc_order [pos++] = i;
42444
42445 /* x87 registers come first in case we are doing FP math
42446 using them. */
42447 if (!TARGET_SSE_MATH)
42448 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42449 reg_alloc_order [pos++] = i;
42450
42451 /* SSE registers. */
42452 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42453 reg_alloc_order [pos++] = i;
42454 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42455 reg_alloc_order [pos++] = i;
42456
42457 /* Extended REX SSE registers. */
42458 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42459 reg_alloc_order [pos++] = i;
42460
42461 /* Mask register. */
42462 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42463 reg_alloc_order [pos++] = i;
42464
42465 /* MPX bound registers. */
42466 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42467 reg_alloc_order [pos++] = i;
42468
42469 /* x87 registers. */
42470 if (TARGET_SSE_MATH)
42471 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42472 reg_alloc_order [pos++] = i;
42473
42474 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42475 reg_alloc_order [pos++] = i;
42476
42477 /* Initialize the rest of array as we do not allocate some registers
42478 at all. */
42479 while (pos < FIRST_PSEUDO_REGISTER)
42480 reg_alloc_order [pos++] = 0;
42481 }
42482
42483 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42484 in struct attribute_spec handler. */
42485 static tree
42486 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42487 tree args,
42488 int,
42489 bool *no_add_attrs)
42490 {
42491 if (TREE_CODE (*node) != FUNCTION_TYPE
42492 && TREE_CODE (*node) != METHOD_TYPE
42493 && TREE_CODE (*node) != FIELD_DECL
42494 && TREE_CODE (*node) != TYPE_DECL)
42495 {
42496 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42497 name);
42498 *no_add_attrs = true;
42499 return NULL_TREE;
42500 }
42501 if (TARGET_64BIT)
42502 {
42503 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42504 name);
42505 *no_add_attrs = true;
42506 return NULL_TREE;
42507 }
42508 if (is_attribute_p ("callee_pop_aggregate_return", name))
42509 {
42510 tree cst;
42511
42512 cst = TREE_VALUE (args);
42513 if (TREE_CODE (cst) != INTEGER_CST)
42514 {
42515 warning (OPT_Wattributes,
42516 "%qE attribute requires an integer constant argument",
42517 name);
42518 *no_add_attrs = true;
42519 }
42520 else if (compare_tree_int (cst, 0) != 0
42521 && compare_tree_int (cst, 1) != 0)
42522 {
42523 warning (OPT_Wattributes,
42524 "argument to %qE attribute is neither zero, nor one",
42525 name);
42526 *no_add_attrs = true;
42527 }
42528
42529 return NULL_TREE;
42530 }
42531
42532 return NULL_TREE;
42533 }
42534
42535 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42536 struct attribute_spec.handler. */
42537 static tree
42538 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42539 bool *no_add_attrs)
42540 {
42541 if (TREE_CODE (*node) != FUNCTION_TYPE
42542 && TREE_CODE (*node) != METHOD_TYPE
42543 && TREE_CODE (*node) != FIELD_DECL
42544 && TREE_CODE (*node) != TYPE_DECL)
42545 {
42546 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42547 name);
42548 *no_add_attrs = true;
42549 return NULL_TREE;
42550 }
42551
42552 /* Can combine regparm with all attributes but fastcall. */
42553 if (is_attribute_p ("ms_abi", name))
42554 {
42555 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42556 {
42557 error ("ms_abi and sysv_abi attributes are not compatible");
42558 }
42559
42560 return NULL_TREE;
42561 }
42562 else if (is_attribute_p ("sysv_abi", name))
42563 {
42564 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42565 {
42566 error ("ms_abi and sysv_abi attributes are not compatible");
42567 }
42568
42569 return NULL_TREE;
42570 }
42571
42572 return NULL_TREE;
42573 }
42574
42575 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42576 struct attribute_spec.handler. */
42577 static tree
42578 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42579 bool *no_add_attrs)
42580 {
42581 tree *type = NULL;
42582 if (DECL_P (*node))
42583 {
42584 if (TREE_CODE (*node) == TYPE_DECL)
42585 type = &TREE_TYPE (*node);
42586 }
42587 else
42588 type = node;
42589
42590 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42591 {
42592 warning (OPT_Wattributes, "%qE attribute ignored",
42593 name);
42594 *no_add_attrs = true;
42595 }
42596
42597 else if ((is_attribute_p ("ms_struct", name)
42598 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42599 || ((is_attribute_p ("gcc_struct", name)
42600 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42601 {
42602 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42603 name);
42604 *no_add_attrs = true;
42605 }
42606
42607 return NULL_TREE;
42608 }
42609
42610 static tree
42611 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42612 bool *no_add_attrs)
42613 {
42614 if (TREE_CODE (*node) != FUNCTION_DECL)
42615 {
42616 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42617 name);
42618 *no_add_attrs = true;
42619 }
42620 return NULL_TREE;
42621 }
42622
42623 static bool
42624 ix86_ms_bitfield_layout_p (const_tree record_type)
42625 {
42626 return ((TARGET_MS_BITFIELD_LAYOUT
42627 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42628 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42629 }
42630
42631 /* Returns an expression indicating where the this parameter is
42632 located on entry to the FUNCTION. */
42633
42634 static rtx
42635 x86_this_parameter (tree function)
42636 {
42637 tree type = TREE_TYPE (function);
42638 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42639 int nregs;
42640
42641 if (TARGET_64BIT)
42642 {
42643 const int *parm_regs;
42644
42645 if (ix86_function_type_abi (type) == MS_ABI)
42646 parm_regs = x86_64_ms_abi_int_parameter_registers;
42647 else
42648 parm_regs = x86_64_int_parameter_registers;
42649 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42650 }
42651
42652 nregs = ix86_function_regparm (type, function);
42653
42654 if (nregs > 0 && !stdarg_p (type))
42655 {
42656 int regno;
42657 unsigned int ccvt = ix86_get_callcvt (type);
42658
42659 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42660 regno = aggr ? DX_REG : CX_REG;
42661 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42662 {
42663 regno = CX_REG;
42664 if (aggr)
42665 return gen_rtx_MEM (SImode,
42666 plus_constant (Pmode, stack_pointer_rtx, 4));
42667 }
42668 else
42669 {
42670 regno = AX_REG;
42671 if (aggr)
42672 {
42673 regno = DX_REG;
42674 if (nregs == 1)
42675 return gen_rtx_MEM (SImode,
42676 plus_constant (Pmode,
42677 stack_pointer_rtx, 4));
42678 }
42679 }
42680 return gen_rtx_REG (SImode, regno);
42681 }
42682
42683 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42684 aggr ? 8 : 4));
42685 }
42686
42687 /* Determine whether x86_output_mi_thunk can succeed. */
42688
42689 static bool
42690 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42691 const_tree function)
42692 {
42693 /* 64-bit can handle anything. */
42694 if (TARGET_64BIT)
42695 return true;
42696
42697 /* For 32-bit, everything's fine if we have one free register. */
42698 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42699 return true;
42700
42701 /* Need a free register for vcall_offset. */
42702 if (vcall_offset)
42703 return false;
42704
42705 /* Need a free register for GOT references. */
42706 if (flag_pic && !targetm.binds_local_p (function))
42707 return false;
42708
42709 /* Otherwise ok. */
42710 return true;
42711 }
42712
42713 /* Output the assembler code for a thunk function. THUNK_DECL is the
42714 declaration for the thunk function itself, FUNCTION is the decl for
42715 the target function. DELTA is an immediate constant offset to be
42716 added to THIS. If VCALL_OFFSET is nonzero, the word at
42717 *(*this + vcall_offset) should be added to THIS. */
42718
42719 static void
42720 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42721 HOST_WIDE_INT vcall_offset, tree function)
42722 {
42723 rtx this_param = x86_this_parameter (function);
42724 rtx this_reg, tmp, fnaddr;
42725 unsigned int tmp_regno;
42726 rtx_insn *insn;
42727
42728 if (TARGET_64BIT)
42729 tmp_regno = R10_REG;
42730 else
42731 {
42732 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42733 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42734 tmp_regno = AX_REG;
42735 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42736 tmp_regno = DX_REG;
42737 else
42738 tmp_regno = CX_REG;
42739 }
42740
42741 emit_note (NOTE_INSN_PROLOGUE_END);
42742
42743 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42744 pull it in now and let DELTA benefit. */
42745 if (REG_P (this_param))
42746 this_reg = this_param;
42747 else if (vcall_offset)
42748 {
42749 /* Put the this parameter into %eax. */
42750 this_reg = gen_rtx_REG (Pmode, AX_REG);
42751 emit_move_insn (this_reg, this_param);
42752 }
42753 else
42754 this_reg = NULL_RTX;
42755
42756 /* Adjust the this parameter by a fixed constant. */
42757 if (delta)
42758 {
42759 rtx delta_rtx = GEN_INT (delta);
42760 rtx delta_dst = this_reg ? this_reg : this_param;
42761
42762 if (TARGET_64BIT)
42763 {
42764 if (!x86_64_general_operand (delta_rtx, Pmode))
42765 {
42766 tmp = gen_rtx_REG (Pmode, tmp_regno);
42767 emit_move_insn (tmp, delta_rtx);
42768 delta_rtx = tmp;
42769 }
42770 }
42771
42772 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42773 }
42774
42775 /* Adjust the this parameter by a value stored in the vtable. */
42776 if (vcall_offset)
42777 {
42778 rtx vcall_addr, vcall_mem, this_mem;
42779
42780 tmp = gen_rtx_REG (Pmode, tmp_regno);
42781
42782 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42783 if (Pmode != ptr_mode)
42784 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42785 emit_move_insn (tmp, this_mem);
42786
42787 /* Adjust the this parameter. */
42788 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42789 if (TARGET_64BIT
42790 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42791 {
42792 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42793 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42794 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42795 }
42796
42797 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42798 if (Pmode != ptr_mode)
42799 emit_insn (gen_addsi_1_zext (this_reg,
42800 gen_rtx_REG (ptr_mode,
42801 REGNO (this_reg)),
42802 vcall_mem));
42803 else
42804 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42805 }
42806
42807 /* If necessary, drop THIS back to its stack slot. */
42808 if (this_reg && this_reg != this_param)
42809 emit_move_insn (this_param, this_reg);
42810
42811 fnaddr = XEXP (DECL_RTL (function), 0);
42812 if (TARGET_64BIT)
42813 {
42814 if (!flag_pic || targetm.binds_local_p (function)
42815 || TARGET_PECOFF)
42816 ;
42817 else
42818 {
42819 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42820 tmp = gen_rtx_CONST (Pmode, tmp);
42821 fnaddr = gen_const_mem (Pmode, tmp);
42822 }
42823 }
42824 else
42825 {
42826 if (!flag_pic || targetm.binds_local_p (function))
42827 ;
42828 #if TARGET_MACHO
42829 else if (TARGET_MACHO)
42830 {
42831 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42832 fnaddr = XEXP (fnaddr, 0);
42833 }
42834 #endif /* TARGET_MACHO */
42835 else
42836 {
42837 tmp = gen_rtx_REG (Pmode, CX_REG);
42838 output_set_got (tmp, NULL_RTX);
42839
42840 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42841 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42842 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42843 fnaddr = gen_const_mem (Pmode, fnaddr);
42844 }
42845 }
42846
42847 /* Our sibling call patterns do not allow memories, because we have no
42848 predicate that can distinguish between frame and non-frame memory.
42849 For our purposes here, we can get away with (ab)using a jump pattern,
42850 because we're going to do no optimization. */
42851 if (MEM_P (fnaddr))
42852 {
42853 if (sibcall_insn_operand (fnaddr, word_mode))
42854 {
42855 fnaddr = XEXP (DECL_RTL (function), 0);
42856 tmp = gen_rtx_MEM (QImode, fnaddr);
42857 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42858 tmp = emit_call_insn (tmp);
42859 SIBLING_CALL_P (tmp) = 1;
42860 }
42861 else
42862 emit_jump_insn (gen_indirect_jump (fnaddr));
42863 }
42864 else
42865 {
42866 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42867 {
42868 // CM_LARGE_PIC always uses pseudo PIC register which is
42869 // uninitialized. Since FUNCTION is local and calling it
42870 // doesn't go through PLT, we use scratch register %r11 as
42871 // PIC register and initialize it here.
42872 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42873 ix86_init_large_pic_reg (tmp_regno);
42874 fnaddr = legitimize_pic_address (fnaddr,
42875 gen_rtx_REG (Pmode, tmp_regno));
42876 }
42877
42878 if (!sibcall_insn_operand (fnaddr, word_mode))
42879 {
42880 tmp = gen_rtx_REG (word_mode, tmp_regno);
42881 if (GET_MODE (fnaddr) != word_mode)
42882 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42883 emit_move_insn (tmp, fnaddr);
42884 fnaddr = tmp;
42885 }
42886
42887 tmp = gen_rtx_MEM (QImode, fnaddr);
42888 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42889 tmp = emit_call_insn (tmp);
42890 SIBLING_CALL_P (tmp) = 1;
42891 }
42892 emit_barrier ();
42893
42894 /* Emit just enough of rest_of_compilation to get the insns emitted.
42895 Note that use_thunk calls assemble_start_function et al. */
42896 insn = get_insns ();
42897 shorten_branches (insn);
42898 final_start_function (insn, file, 1);
42899 final (insn, file, 1);
42900 final_end_function ();
42901 }
42902
42903 static void
42904 x86_file_start (void)
42905 {
42906 default_file_start ();
42907 if (TARGET_16BIT)
42908 fputs ("\t.code16gcc\n", asm_out_file);
42909 #if TARGET_MACHO
42910 darwin_file_start ();
42911 #endif
42912 if (X86_FILE_START_VERSION_DIRECTIVE)
42913 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42914 if (X86_FILE_START_FLTUSED)
42915 fputs ("\t.global\t__fltused\n", asm_out_file);
42916 if (ix86_asm_dialect == ASM_INTEL)
42917 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42918 }
42919
42920 int
42921 x86_field_alignment (tree field, int computed)
42922 {
42923 machine_mode mode;
42924 tree type = TREE_TYPE (field);
42925
42926 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42927 return computed;
42928 mode = TYPE_MODE (strip_array_types (type));
42929 if (mode == DFmode || mode == DCmode
42930 || GET_MODE_CLASS (mode) == MODE_INT
42931 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42932 return MIN (32, computed);
42933 return computed;
42934 }
42935
42936 /* Print call to TARGET to FILE. */
42937
42938 static void
42939 x86_print_call_or_nop (FILE *file, const char *target)
42940 {
42941 if (flag_nop_mcount)
42942 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42943 else
42944 fprintf (file, "1:\tcall\t%s\n", target);
42945 }
42946
42947 /* Output assembler code to FILE to increment profiler label # LABELNO
42948 for profiling a function entry. */
42949 void
42950 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42951 {
42952 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42953 : MCOUNT_NAME);
42954 if (TARGET_64BIT)
42955 {
42956 #ifndef NO_PROFILE_COUNTERS
42957 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42958 #endif
42959
42960 if (!TARGET_PECOFF && flag_pic)
42961 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
42962 else
42963 x86_print_call_or_nop (file, mcount_name);
42964 }
42965 else if (flag_pic)
42966 {
42967 #ifndef NO_PROFILE_COUNTERS
42968 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
42969 LPREFIX, labelno);
42970 #endif
42971 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
42972 }
42973 else
42974 {
42975 #ifndef NO_PROFILE_COUNTERS
42976 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
42977 LPREFIX, labelno);
42978 #endif
42979 x86_print_call_or_nop (file, mcount_name);
42980 }
42981
42982 if (flag_record_mcount)
42983 {
42984 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
42985 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
42986 fprintf (file, "\t.previous\n");
42987 }
42988 }
42989
42990 /* We don't have exact information about the insn sizes, but we may assume
42991 quite safely that we are informed about all 1 byte insns and memory
42992 address sizes. This is enough to eliminate unnecessary padding in
42993 99% of cases. */
42994
42995 static int
42996 min_insn_size (rtx_insn *insn)
42997 {
42998 int l = 0, len;
42999
43000 if (!INSN_P (insn) || !active_insn_p (insn))
43001 return 0;
43002
43003 /* Discard alignments we've emit and jump instructions. */
43004 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43005 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43006 return 0;
43007
43008 /* Important case - calls are always 5 bytes.
43009 It is common to have many calls in the row. */
43010 if (CALL_P (insn)
43011 && symbolic_reference_mentioned_p (PATTERN (insn))
43012 && !SIBLING_CALL_P (insn))
43013 return 5;
43014 len = get_attr_length (insn);
43015 if (len <= 1)
43016 return 1;
43017
43018 /* For normal instructions we rely on get_attr_length being exact,
43019 with a few exceptions. */
43020 if (!JUMP_P (insn))
43021 {
43022 enum attr_type type = get_attr_type (insn);
43023
43024 switch (type)
43025 {
43026 case TYPE_MULTI:
43027 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43028 || asm_noperands (PATTERN (insn)) >= 0)
43029 return 0;
43030 break;
43031 case TYPE_OTHER:
43032 case TYPE_FCMP:
43033 break;
43034 default:
43035 /* Otherwise trust get_attr_length. */
43036 return len;
43037 }
43038
43039 l = get_attr_length_address (insn);
43040 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43041 l = 4;
43042 }
43043 if (l)
43044 return 1+l;
43045 else
43046 return 2;
43047 }
43048
43049 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43050
43051 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43052 window. */
43053
43054 static void
43055 ix86_avoid_jump_mispredicts (void)
43056 {
43057 rtx_insn *insn, *start = get_insns ();
43058 int nbytes = 0, njumps = 0;
43059 int isjump = 0;
43060
43061 /* Look for all minimal intervals of instructions containing 4 jumps.
43062 The intervals are bounded by START and INSN. NBYTES is the total
43063 size of instructions in the interval including INSN and not including
43064 START. When the NBYTES is smaller than 16 bytes, it is possible
43065 that the end of START and INSN ends up in the same 16byte page.
43066
43067 The smallest offset in the page INSN can start is the case where START
43068 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43069 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43070
43071 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43072 have to, control transfer to label(s) can be performed through other
43073 means, and also we estimate minimum length of all asm stmts as 0. */
43074 for (insn = start; insn; insn = NEXT_INSN (insn))
43075 {
43076 int min_size;
43077
43078 if (LABEL_P (insn))
43079 {
43080 int align = label_to_alignment (insn);
43081 int max_skip = label_to_max_skip (insn);
43082
43083 if (max_skip > 15)
43084 max_skip = 15;
43085 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43086 already in the current 16 byte page, because otherwise
43087 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43088 bytes to reach 16 byte boundary. */
43089 if (align <= 0
43090 || (align <= 3 && max_skip != (1 << align) - 1))
43091 max_skip = 0;
43092 if (dump_file)
43093 fprintf (dump_file, "Label %i with max_skip %i\n",
43094 INSN_UID (insn), max_skip);
43095 if (max_skip)
43096 {
43097 while (nbytes + max_skip >= 16)
43098 {
43099 start = NEXT_INSN (start);
43100 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43101 || CALL_P (start))
43102 njumps--, isjump = 1;
43103 else
43104 isjump = 0;
43105 nbytes -= min_insn_size (start);
43106 }
43107 }
43108 continue;
43109 }
43110
43111 min_size = min_insn_size (insn);
43112 nbytes += min_size;
43113 if (dump_file)
43114 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43115 INSN_UID (insn), min_size);
43116 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43117 || CALL_P (insn))
43118 njumps++;
43119 else
43120 continue;
43121
43122 while (njumps > 3)
43123 {
43124 start = NEXT_INSN (start);
43125 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43126 || CALL_P (start))
43127 njumps--, isjump = 1;
43128 else
43129 isjump = 0;
43130 nbytes -= min_insn_size (start);
43131 }
43132 gcc_assert (njumps >= 0);
43133 if (dump_file)
43134 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43135 INSN_UID (start), INSN_UID (insn), nbytes);
43136
43137 if (njumps == 3 && isjump && nbytes < 16)
43138 {
43139 int padsize = 15 - nbytes + min_insn_size (insn);
43140
43141 if (dump_file)
43142 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43143 INSN_UID (insn), padsize);
43144 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43145 }
43146 }
43147 }
43148 #endif
43149
43150 /* AMD Athlon works faster
43151 when RET is not destination of conditional jump or directly preceded
43152 by other jump instruction. We avoid the penalty by inserting NOP just
43153 before the RET instructions in such cases. */
43154 static void
43155 ix86_pad_returns (void)
43156 {
43157 edge e;
43158 edge_iterator ei;
43159
43160 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43161 {
43162 basic_block bb = e->src;
43163 rtx_insn *ret = BB_END (bb);
43164 rtx_insn *prev;
43165 bool replace = false;
43166
43167 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43168 || optimize_bb_for_size_p (bb))
43169 continue;
43170 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43171 if (active_insn_p (prev) || LABEL_P (prev))
43172 break;
43173 if (prev && LABEL_P (prev))
43174 {
43175 edge e;
43176 edge_iterator ei;
43177
43178 FOR_EACH_EDGE (e, ei, bb->preds)
43179 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43180 && !(e->flags & EDGE_FALLTHRU))
43181 {
43182 replace = true;
43183 break;
43184 }
43185 }
43186 if (!replace)
43187 {
43188 prev = prev_active_insn (ret);
43189 if (prev
43190 && ((JUMP_P (prev) && any_condjump_p (prev))
43191 || CALL_P (prev)))
43192 replace = true;
43193 /* Empty functions get branch mispredict even when
43194 the jump destination is not visible to us. */
43195 if (!prev && !optimize_function_for_size_p (cfun))
43196 replace = true;
43197 }
43198 if (replace)
43199 {
43200 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43201 delete_insn (ret);
43202 }
43203 }
43204 }
43205
43206 /* Count the minimum number of instructions in BB. Return 4 if the
43207 number of instructions >= 4. */
43208
43209 static int
43210 ix86_count_insn_bb (basic_block bb)
43211 {
43212 rtx_insn *insn;
43213 int insn_count = 0;
43214
43215 /* Count number of instructions in this block. Return 4 if the number
43216 of instructions >= 4. */
43217 FOR_BB_INSNS (bb, insn)
43218 {
43219 /* Only happen in exit blocks. */
43220 if (JUMP_P (insn)
43221 && ANY_RETURN_P (PATTERN (insn)))
43222 break;
43223
43224 if (NONDEBUG_INSN_P (insn)
43225 && GET_CODE (PATTERN (insn)) != USE
43226 && GET_CODE (PATTERN (insn)) != CLOBBER)
43227 {
43228 insn_count++;
43229 if (insn_count >= 4)
43230 return insn_count;
43231 }
43232 }
43233
43234 return insn_count;
43235 }
43236
43237
43238 /* Count the minimum number of instructions in code path in BB.
43239 Return 4 if the number of instructions >= 4. */
43240
43241 static int
43242 ix86_count_insn (basic_block bb)
43243 {
43244 edge e;
43245 edge_iterator ei;
43246 int min_prev_count;
43247
43248 /* Only bother counting instructions along paths with no
43249 more than 2 basic blocks between entry and exit. Given
43250 that BB has an edge to exit, determine if a predecessor
43251 of BB has an edge from entry. If so, compute the number
43252 of instructions in the predecessor block. If there
43253 happen to be multiple such blocks, compute the minimum. */
43254 min_prev_count = 4;
43255 FOR_EACH_EDGE (e, ei, bb->preds)
43256 {
43257 edge prev_e;
43258 edge_iterator prev_ei;
43259
43260 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43261 {
43262 min_prev_count = 0;
43263 break;
43264 }
43265 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43266 {
43267 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43268 {
43269 int count = ix86_count_insn_bb (e->src);
43270 if (count < min_prev_count)
43271 min_prev_count = count;
43272 break;
43273 }
43274 }
43275 }
43276
43277 if (min_prev_count < 4)
43278 min_prev_count += ix86_count_insn_bb (bb);
43279
43280 return min_prev_count;
43281 }
43282
43283 /* Pad short function to 4 instructions. */
43284
43285 static void
43286 ix86_pad_short_function (void)
43287 {
43288 edge e;
43289 edge_iterator ei;
43290
43291 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43292 {
43293 rtx_insn *ret = BB_END (e->src);
43294 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43295 {
43296 int insn_count = ix86_count_insn (e->src);
43297
43298 /* Pad short function. */
43299 if (insn_count < 4)
43300 {
43301 rtx_insn *insn = ret;
43302
43303 /* Find epilogue. */
43304 while (insn
43305 && (!NOTE_P (insn)
43306 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43307 insn = PREV_INSN (insn);
43308
43309 if (!insn)
43310 insn = ret;
43311
43312 /* Two NOPs count as one instruction. */
43313 insn_count = 2 * (4 - insn_count);
43314 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43315 }
43316 }
43317 }
43318 }
43319
43320 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43321 the epilogue, the Windows system unwinder will apply epilogue logic and
43322 produce incorrect offsets. This can be avoided by adding a nop between
43323 the last insn that can throw and the first insn of the epilogue. */
43324
43325 static void
43326 ix86_seh_fixup_eh_fallthru (void)
43327 {
43328 edge e;
43329 edge_iterator ei;
43330
43331 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43332 {
43333 rtx_insn *insn, *next;
43334
43335 /* Find the beginning of the epilogue. */
43336 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43337 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43338 break;
43339 if (insn == NULL)
43340 continue;
43341
43342 /* We only care about preceding insns that can throw. */
43343 insn = prev_active_insn (insn);
43344 if (insn == NULL || !can_throw_internal (insn))
43345 continue;
43346
43347 /* Do not separate calls from their debug information. */
43348 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43349 if (NOTE_P (next)
43350 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43351 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43352 insn = next;
43353 else
43354 break;
43355
43356 emit_insn_after (gen_nops (const1_rtx), insn);
43357 }
43358 }
43359
43360 /* Implement machine specific optimizations. We implement padding of returns
43361 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43362 static void
43363 ix86_reorg (void)
43364 {
43365 /* We are freeing block_for_insn in the toplev to keep compatibility
43366 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43367 compute_bb_for_insn ();
43368
43369 if (TARGET_SEH && current_function_has_exception_handlers ())
43370 ix86_seh_fixup_eh_fallthru ();
43371
43372 if (optimize && optimize_function_for_speed_p (cfun))
43373 {
43374 if (TARGET_PAD_SHORT_FUNCTION)
43375 ix86_pad_short_function ();
43376 else if (TARGET_PAD_RETURNS)
43377 ix86_pad_returns ();
43378 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43379 if (TARGET_FOUR_JUMP_LIMIT)
43380 ix86_avoid_jump_mispredicts ();
43381 #endif
43382 }
43383 }
43384
43385 /* Return nonzero when QImode register that must be represented via REX prefix
43386 is used. */
43387 bool
43388 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43389 {
43390 int i;
43391 extract_insn_cached (insn);
43392 for (i = 0; i < recog_data.n_operands; i++)
43393 if (GENERAL_REG_P (recog_data.operand[i])
43394 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43395 return true;
43396 return false;
43397 }
43398
43399 /* Return true when INSN mentions register that must be encoded using REX
43400 prefix. */
43401 bool
43402 x86_extended_reg_mentioned_p (rtx insn)
43403 {
43404 subrtx_iterator::array_type array;
43405 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43406 {
43407 const_rtx x = *iter;
43408 if (REG_P (x)
43409 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43410 return true;
43411 }
43412 return false;
43413 }
43414
43415 /* If profitable, negate (without causing overflow) integer constant
43416 of mode MODE at location LOC. Return true in this case. */
43417 bool
43418 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43419 {
43420 HOST_WIDE_INT val;
43421
43422 if (!CONST_INT_P (*loc))
43423 return false;
43424
43425 switch (mode)
43426 {
43427 case DImode:
43428 /* DImode x86_64 constants must fit in 32 bits. */
43429 gcc_assert (x86_64_immediate_operand (*loc, mode));
43430
43431 mode = SImode;
43432 break;
43433
43434 case SImode:
43435 case HImode:
43436 case QImode:
43437 break;
43438
43439 default:
43440 gcc_unreachable ();
43441 }
43442
43443 /* Avoid overflows. */
43444 if (mode_signbit_p (mode, *loc))
43445 return false;
43446
43447 val = INTVAL (*loc);
43448
43449 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43450 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43451 if ((val < 0 && val != -128)
43452 || val == 128)
43453 {
43454 *loc = GEN_INT (-val);
43455 return true;
43456 }
43457
43458 return false;
43459 }
43460
43461 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43462 optabs would emit if we didn't have TFmode patterns. */
43463
43464 void
43465 x86_emit_floatuns (rtx operands[2])
43466 {
43467 rtx_code_label *neglab, *donelab;
43468 rtx i0, i1, f0, in, out;
43469 machine_mode mode, inmode;
43470
43471 inmode = GET_MODE (operands[1]);
43472 gcc_assert (inmode == SImode || inmode == DImode);
43473
43474 out = operands[0];
43475 in = force_reg (inmode, operands[1]);
43476 mode = GET_MODE (out);
43477 neglab = gen_label_rtx ();
43478 donelab = gen_label_rtx ();
43479 f0 = gen_reg_rtx (mode);
43480
43481 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43482
43483 expand_float (out, in, 0);
43484
43485 emit_jump_insn (gen_jump (donelab));
43486 emit_barrier ();
43487
43488 emit_label (neglab);
43489
43490 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43491 1, OPTAB_DIRECT);
43492 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43493 1, OPTAB_DIRECT);
43494 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43495
43496 expand_float (f0, i0, 0);
43497
43498 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43499
43500 emit_label (donelab);
43501 }
43502 \f
43503 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43504 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43505 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43506 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43507
43508 /* Get a vector mode of the same size as the original but with elements
43509 twice as wide. This is only guaranteed to apply to integral vectors. */
43510
43511 static inline machine_mode
43512 get_mode_wider_vector (machine_mode o)
43513 {
43514 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43515 machine_mode n = GET_MODE_WIDER_MODE (o);
43516 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43517 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43518 return n;
43519 }
43520
43521 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43522 fill target with val via vec_duplicate. */
43523
43524 static bool
43525 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43526 {
43527 bool ok;
43528 rtx_insn *insn;
43529 rtx dup;
43530
43531 /* First attempt to recognize VAL as-is. */
43532 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43533 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43534 if (recog_memoized (insn) < 0)
43535 {
43536 rtx_insn *seq;
43537 /* If that fails, force VAL into a register. */
43538
43539 start_sequence ();
43540 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43541 seq = get_insns ();
43542 end_sequence ();
43543 if (seq)
43544 emit_insn_before (seq, insn);
43545
43546 ok = recog_memoized (insn) >= 0;
43547 gcc_assert (ok);
43548 }
43549 return true;
43550 }
43551
43552 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43553 with all elements equal to VAR. Return true if successful. */
43554
43555 static bool
43556 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43557 rtx target, rtx val)
43558 {
43559 bool ok;
43560
43561 switch (mode)
43562 {
43563 case V2SImode:
43564 case V2SFmode:
43565 if (!mmx_ok)
43566 return false;
43567 /* FALLTHRU */
43568
43569 case V4DFmode:
43570 case V4DImode:
43571 case V8SFmode:
43572 case V8SImode:
43573 case V2DFmode:
43574 case V2DImode:
43575 case V4SFmode:
43576 case V4SImode:
43577 case V16SImode:
43578 case V8DImode:
43579 case V16SFmode:
43580 case V8DFmode:
43581 return ix86_vector_duplicate_value (mode, target, val);
43582
43583 case V4HImode:
43584 if (!mmx_ok)
43585 return false;
43586 if (TARGET_SSE || TARGET_3DNOW_A)
43587 {
43588 rtx x;
43589
43590 val = gen_lowpart (SImode, val);
43591 x = gen_rtx_TRUNCATE (HImode, val);
43592 x = gen_rtx_VEC_DUPLICATE (mode, x);
43593 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43594 return true;
43595 }
43596 goto widen;
43597
43598 case V8QImode:
43599 if (!mmx_ok)
43600 return false;
43601 goto widen;
43602
43603 case V8HImode:
43604 if (TARGET_AVX2)
43605 return ix86_vector_duplicate_value (mode, target, val);
43606
43607 if (TARGET_SSE2)
43608 {
43609 struct expand_vec_perm_d dperm;
43610 rtx tmp1, tmp2;
43611
43612 permute:
43613 memset (&dperm, 0, sizeof (dperm));
43614 dperm.target = target;
43615 dperm.vmode = mode;
43616 dperm.nelt = GET_MODE_NUNITS (mode);
43617 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43618 dperm.one_operand_p = true;
43619
43620 /* Extend to SImode using a paradoxical SUBREG. */
43621 tmp1 = gen_reg_rtx (SImode);
43622 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43623
43624 /* Insert the SImode value as low element of a V4SImode vector. */
43625 tmp2 = gen_reg_rtx (V4SImode);
43626 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43627 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43628
43629 ok = (expand_vec_perm_1 (&dperm)
43630 || expand_vec_perm_broadcast_1 (&dperm));
43631 gcc_assert (ok);
43632 return ok;
43633 }
43634 goto widen;
43635
43636 case V16QImode:
43637 if (TARGET_AVX2)
43638 return ix86_vector_duplicate_value (mode, target, val);
43639
43640 if (TARGET_SSE2)
43641 goto permute;
43642 goto widen;
43643
43644 widen:
43645 /* Replicate the value once into the next wider mode and recurse. */
43646 {
43647 machine_mode smode, wsmode, wvmode;
43648 rtx x;
43649
43650 smode = GET_MODE_INNER (mode);
43651 wvmode = get_mode_wider_vector (mode);
43652 wsmode = GET_MODE_INNER (wvmode);
43653
43654 val = convert_modes (wsmode, smode, val, true);
43655 x = expand_simple_binop (wsmode, ASHIFT, val,
43656 GEN_INT (GET_MODE_BITSIZE (smode)),
43657 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43658 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43659
43660 x = gen_reg_rtx (wvmode);
43661 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43662 gcc_assert (ok);
43663 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43664 return ok;
43665 }
43666
43667 case V16HImode:
43668 case V32QImode:
43669 if (TARGET_AVX2)
43670 return ix86_vector_duplicate_value (mode, target, val);
43671 else
43672 {
43673 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43674 rtx x = gen_reg_rtx (hvmode);
43675
43676 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43677 gcc_assert (ok);
43678
43679 x = gen_rtx_VEC_CONCAT (mode, x, x);
43680 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43681 }
43682 return true;
43683
43684 case V64QImode:
43685 case V32HImode:
43686 if (TARGET_AVX512BW)
43687 return ix86_vector_duplicate_value (mode, target, val);
43688 else
43689 {
43690 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43691 rtx x = gen_reg_rtx (hvmode);
43692
43693 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43694 gcc_assert (ok);
43695
43696 x = gen_rtx_VEC_CONCAT (mode, x, x);
43697 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43698 }
43699 return true;
43700
43701 default:
43702 return false;
43703 }
43704 }
43705
43706 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43707 whose ONE_VAR element is VAR, and other elements are zero. Return true
43708 if successful. */
43709
43710 static bool
43711 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43712 rtx target, rtx var, int one_var)
43713 {
43714 machine_mode vsimode;
43715 rtx new_target;
43716 rtx x, tmp;
43717 bool use_vector_set = false;
43718
43719 switch (mode)
43720 {
43721 case V2DImode:
43722 /* For SSE4.1, we normally use vector set. But if the second
43723 element is zero and inter-unit moves are OK, we use movq
43724 instead. */
43725 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43726 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43727 && one_var == 0));
43728 break;
43729 case V16QImode:
43730 case V4SImode:
43731 case V4SFmode:
43732 use_vector_set = TARGET_SSE4_1;
43733 break;
43734 case V8HImode:
43735 use_vector_set = TARGET_SSE2;
43736 break;
43737 case V4HImode:
43738 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43739 break;
43740 case V32QImode:
43741 case V16HImode:
43742 case V8SImode:
43743 case V8SFmode:
43744 case V4DFmode:
43745 use_vector_set = TARGET_AVX;
43746 break;
43747 case V4DImode:
43748 /* Use ix86_expand_vector_set in 64bit mode only. */
43749 use_vector_set = TARGET_AVX && TARGET_64BIT;
43750 break;
43751 default:
43752 break;
43753 }
43754
43755 if (use_vector_set)
43756 {
43757 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43758 var = force_reg (GET_MODE_INNER (mode), var);
43759 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43760 return true;
43761 }
43762
43763 switch (mode)
43764 {
43765 case V2SFmode:
43766 case V2SImode:
43767 if (!mmx_ok)
43768 return false;
43769 /* FALLTHRU */
43770
43771 case V2DFmode:
43772 case V2DImode:
43773 if (one_var != 0)
43774 return false;
43775 var = force_reg (GET_MODE_INNER (mode), var);
43776 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43777 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43778 return true;
43779
43780 case V4SFmode:
43781 case V4SImode:
43782 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43783 new_target = gen_reg_rtx (mode);
43784 else
43785 new_target = target;
43786 var = force_reg (GET_MODE_INNER (mode), var);
43787 x = gen_rtx_VEC_DUPLICATE (mode, var);
43788 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43789 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43790 if (one_var != 0)
43791 {
43792 /* We need to shuffle the value to the correct position, so
43793 create a new pseudo to store the intermediate result. */
43794
43795 /* With SSE2, we can use the integer shuffle insns. */
43796 if (mode != V4SFmode && TARGET_SSE2)
43797 {
43798 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43799 const1_rtx,
43800 GEN_INT (one_var == 1 ? 0 : 1),
43801 GEN_INT (one_var == 2 ? 0 : 1),
43802 GEN_INT (one_var == 3 ? 0 : 1)));
43803 if (target != new_target)
43804 emit_move_insn (target, new_target);
43805 return true;
43806 }
43807
43808 /* Otherwise convert the intermediate result to V4SFmode and
43809 use the SSE1 shuffle instructions. */
43810 if (mode != V4SFmode)
43811 {
43812 tmp = gen_reg_rtx (V4SFmode);
43813 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43814 }
43815 else
43816 tmp = new_target;
43817
43818 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43819 const1_rtx,
43820 GEN_INT (one_var == 1 ? 0 : 1),
43821 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43822 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43823
43824 if (mode != V4SFmode)
43825 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43826 else if (tmp != target)
43827 emit_move_insn (target, tmp);
43828 }
43829 else if (target != new_target)
43830 emit_move_insn (target, new_target);
43831 return true;
43832
43833 case V8HImode:
43834 case V16QImode:
43835 vsimode = V4SImode;
43836 goto widen;
43837 case V4HImode:
43838 case V8QImode:
43839 if (!mmx_ok)
43840 return false;
43841 vsimode = V2SImode;
43842 goto widen;
43843 widen:
43844 if (one_var != 0)
43845 return false;
43846
43847 /* Zero extend the variable element to SImode and recurse. */
43848 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43849
43850 x = gen_reg_rtx (vsimode);
43851 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43852 var, one_var))
43853 gcc_unreachable ();
43854
43855 emit_move_insn (target, gen_lowpart (mode, x));
43856 return true;
43857
43858 default:
43859 return false;
43860 }
43861 }
43862
43863 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43864 consisting of the values in VALS. It is known that all elements
43865 except ONE_VAR are constants. Return true if successful. */
43866
43867 static bool
43868 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43869 rtx target, rtx vals, int one_var)
43870 {
43871 rtx var = XVECEXP (vals, 0, one_var);
43872 machine_mode wmode;
43873 rtx const_vec, x;
43874
43875 const_vec = copy_rtx (vals);
43876 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43877 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43878
43879 switch (mode)
43880 {
43881 case V2DFmode:
43882 case V2DImode:
43883 case V2SFmode:
43884 case V2SImode:
43885 /* For the two element vectors, it's just as easy to use
43886 the general case. */
43887 return false;
43888
43889 case V4DImode:
43890 /* Use ix86_expand_vector_set in 64bit mode only. */
43891 if (!TARGET_64BIT)
43892 return false;
43893 case V4DFmode:
43894 case V8SFmode:
43895 case V8SImode:
43896 case V16HImode:
43897 case V32QImode:
43898 case V4SFmode:
43899 case V4SImode:
43900 case V8HImode:
43901 case V4HImode:
43902 break;
43903
43904 case V16QImode:
43905 if (TARGET_SSE4_1)
43906 break;
43907 wmode = V8HImode;
43908 goto widen;
43909 case V8QImode:
43910 wmode = V4HImode;
43911 goto widen;
43912 widen:
43913 /* There's no way to set one QImode entry easily. Combine
43914 the variable value with its adjacent constant value, and
43915 promote to an HImode set. */
43916 x = XVECEXP (vals, 0, one_var ^ 1);
43917 if (one_var & 1)
43918 {
43919 var = convert_modes (HImode, QImode, var, true);
43920 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43921 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43922 x = GEN_INT (INTVAL (x) & 0xff);
43923 }
43924 else
43925 {
43926 var = convert_modes (HImode, QImode, var, true);
43927 x = gen_int_mode (INTVAL (x) << 8, HImode);
43928 }
43929 if (x != const0_rtx)
43930 var = expand_simple_binop (HImode, IOR, var, x, var,
43931 1, OPTAB_LIB_WIDEN);
43932
43933 x = gen_reg_rtx (wmode);
43934 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43935 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43936
43937 emit_move_insn (target, gen_lowpart (mode, x));
43938 return true;
43939
43940 default:
43941 return false;
43942 }
43943
43944 emit_move_insn (target, const_vec);
43945 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43946 return true;
43947 }
43948
43949 /* A subroutine of ix86_expand_vector_init_general. Use vector
43950 concatenate to handle the most general case: all values variable,
43951 and none identical. */
43952
43953 static void
43954 ix86_expand_vector_init_concat (machine_mode mode,
43955 rtx target, rtx *ops, int n)
43956 {
43957 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43958 rtx first[16], second[8], third[4];
43959 rtvec v;
43960 int i, j;
43961
43962 switch (n)
43963 {
43964 case 2:
43965 switch (mode)
43966 {
43967 case V16SImode:
43968 cmode = V8SImode;
43969 break;
43970 case V16SFmode:
43971 cmode = V8SFmode;
43972 break;
43973 case V8DImode:
43974 cmode = V4DImode;
43975 break;
43976 case V8DFmode:
43977 cmode = V4DFmode;
43978 break;
43979 case V8SImode:
43980 cmode = V4SImode;
43981 break;
43982 case V8SFmode:
43983 cmode = V4SFmode;
43984 break;
43985 case V4DImode:
43986 cmode = V2DImode;
43987 break;
43988 case V4DFmode:
43989 cmode = V2DFmode;
43990 break;
43991 case V4SImode:
43992 cmode = V2SImode;
43993 break;
43994 case V4SFmode:
43995 cmode = V2SFmode;
43996 break;
43997 case V2DImode:
43998 cmode = DImode;
43999 break;
44000 case V2SImode:
44001 cmode = SImode;
44002 break;
44003 case V2DFmode:
44004 cmode = DFmode;
44005 break;
44006 case V2SFmode:
44007 cmode = SFmode;
44008 break;
44009 default:
44010 gcc_unreachable ();
44011 }
44012
44013 if (!register_operand (ops[1], cmode))
44014 ops[1] = force_reg (cmode, ops[1]);
44015 if (!register_operand (ops[0], cmode))
44016 ops[0] = force_reg (cmode, ops[0]);
44017 emit_insn (gen_rtx_SET (VOIDmode, target,
44018 gen_rtx_VEC_CONCAT (mode, ops[0],
44019 ops[1])));
44020 break;
44021
44022 case 4:
44023 switch (mode)
44024 {
44025 case V4DImode:
44026 cmode = V2DImode;
44027 break;
44028 case V4DFmode:
44029 cmode = V2DFmode;
44030 break;
44031 case V4SImode:
44032 cmode = V2SImode;
44033 break;
44034 case V4SFmode:
44035 cmode = V2SFmode;
44036 break;
44037 default:
44038 gcc_unreachable ();
44039 }
44040 goto half;
44041
44042 case 8:
44043 switch (mode)
44044 {
44045 case V8DImode:
44046 cmode = V2DImode;
44047 hmode = V4DImode;
44048 break;
44049 case V8DFmode:
44050 cmode = V2DFmode;
44051 hmode = V4DFmode;
44052 break;
44053 case V8SImode:
44054 cmode = V2SImode;
44055 hmode = V4SImode;
44056 break;
44057 case V8SFmode:
44058 cmode = V2SFmode;
44059 hmode = V4SFmode;
44060 break;
44061 default:
44062 gcc_unreachable ();
44063 }
44064 goto half;
44065
44066 case 16:
44067 switch (mode)
44068 {
44069 case V16SImode:
44070 cmode = V2SImode;
44071 hmode = V4SImode;
44072 gmode = V8SImode;
44073 break;
44074 case V16SFmode:
44075 cmode = V2SFmode;
44076 hmode = V4SFmode;
44077 gmode = V8SFmode;
44078 break;
44079 default:
44080 gcc_unreachable ();
44081 }
44082 goto half;
44083
44084 half:
44085 /* FIXME: We process inputs backward to help RA. PR 36222. */
44086 i = n - 1;
44087 j = (n >> 1) - 1;
44088 for (; i > 0; i -= 2, j--)
44089 {
44090 first[j] = gen_reg_rtx (cmode);
44091 v = gen_rtvec (2, ops[i - 1], ops[i]);
44092 ix86_expand_vector_init (false, first[j],
44093 gen_rtx_PARALLEL (cmode, v));
44094 }
44095
44096 n >>= 1;
44097 if (n > 4)
44098 {
44099 gcc_assert (hmode != VOIDmode);
44100 gcc_assert (gmode != VOIDmode);
44101 for (i = j = 0; i < n; i += 2, j++)
44102 {
44103 second[j] = gen_reg_rtx (hmode);
44104 ix86_expand_vector_init_concat (hmode, second [j],
44105 &first [i], 2);
44106 }
44107 n >>= 1;
44108 for (i = j = 0; i < n; i += 2, j++)
44109 {
44110 third[j] = gen_reg_rtx (gmode);
44111 ix86_expand_vector_init_concat (gmode, third[j],
44112 &second[i], 2);
44113 }
44114 n >>= 1;
44115 ix86_expand_vector_init_concat (mode, target, third, n);
44116 }
44117 else if (n > 2)
44118 {
44119 gcc_assert (hmode != VOIDmode);
44120 for (i = j = 0; i < n; i += 2, j++)
44121 {
44122 second[j] = gen_reg_rtx (hmode);
44123 ix86_expand_vector_init_concat (hmode, second [j],
44124 &first [i], 2);
44125 }
44126 n >>= 1;
44127 ix86_expand_vector_init_concat (mode, target, second, n);
44128 }
44129 else
44130 ix86_expand_vector_init_concat (mode, target, first, n);
44131 break;
44132
44133 default:
44134 gcc_unreachable ();
44135 }
44136 }
44137
44138 /* A subroutine of ix86_expand_vector_init_general. Use vector
44139 interleave to handle the most general case: all values variable,
44140 and none identical. */
44141
44142 static void
44143 ix86_expand_vector_init_interleave (machine_mode mode,
44144 rtx target, rtx *ops, int n)
44145 {
44146 machine_mode first_imode, second_imode, third_imode, inner_mode;
44147 int i, j;
44148 rtx op0, op1;
44149 rtx (*gen_load_even) (rtx, rtx, rtx);
44150 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44151 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44152
44153 switch (mode)
44154 {
44155 case V8HImode:
44156 gen_load_even = gen_vec_setv8hi;
44157 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44158 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44159 inner_mode = HImode;
44160 first_imode = V4SImode;
44161 second_imode = V2DImode;
44162 third_imode = VOIDmode;
44163 break;
44164 case V16QImode:
44165 gen_load_even = gen_vec_setv16qi;
44166 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44167 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44168 inner_mode = QImode;
44169 first_imode = V8HImode;
44170 second_imode = V4SImode;
44171 third_imode = V2DImode;
44172 break;
44173 default:
44174 gcc_unreachable ();
44175 }
44176
44177 for (i = 0; i < n; i++)
44178 {
44179 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44180 op0 = gen_reg_rtx (SImode);
44181 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44182
44183 /* Insert the SImode value as low element of V4SImode vector. */
44184 op1 = gen_reg_rtx (V4SImode);
44185 op0 = gen_rtx_VEC_MERGE (V4SImode,
44186 gen_rtx_VEC_DUPLICATE (V4SImode,
44187 op0),
44188 CONST0_RTX (V4SImode),
44189 const1_rtx);
44190 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44191
44192 /* Cast the V4SImode vector back to a vector in orignal mode. */
44193 op0 = gen_reg_rtx (mode);
44194 emit_move_insn (op0, gen_lowpart (mode, op1));
44195
44196 /* Load even elements into the second position. */
44197 emit_insn (gen_load_even (op0,
44198 force_reg (inner_mode,
44199 ops [i + i + 1]),
44200 const1_rtx));
44201
44202 /* Cast vector to FIRST_IMODE vector. */
44203 ops[i] = gen_reg_rtx (first_imode);
44204 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44205 }
44206
44207 /* Interleave low FIRST_IMODE vectors. */
44208 for (i = j = 0; i < n; i += 2, j++)
44209 {
44210 op0 = gen_reg_rtx (first_imode);
44211 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44212
44213 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44214 ops[j] = gen_reg_rtx (second_imode);
44215 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44216 }
44217
44218 /* Interleave low SECOND_IMODE vectors. */
44219 switch (second_imode)
44220 {
44221 case V4SImode:
44222 for (i = j = 0; i < n / 2; i += 2, j++)
44223 {
44224 op0 = gen_reg_rtx (second_imode);
44225 emit_insn (gen_interleave_second_low (op0, ops[i],
44226 ops[i + 1]));
44227
44228 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44229 vector. */
44230 ops[j] = gen_reg_rtx (third_imode);
44231 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44232 }
44233 second_imode = V2DImode;
44234 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44235 /* FALLTHRU */
44236
44237 case V2DImode:
44238 op0 = gen_reg_rtx (second_imode);
44239 emit_insn (gen_interleave_second_low (op0, ops[0],
44240 ops[1]));
44241
44242 /* Cast the SECOND_IMODE vector back to a vector on original
44243 mode. */
44244 emit_insn (gen_rtx_SET (VOIDmode, target,
44245 gen_lowpart (mode, op0)));
44246 break;
44247
44248 default:
44249 gcc_unreachable ();
44250 }
44251 }
44252
44253 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44254 all values variable, and none identical. */
44255
44256 static void
44257 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44258 rtx target, rtx vals)
44259 {
44260 rtx ops[64], op0, op1, op2, op3, op4, op5;
44261 machine_mode half_mode = VOIDmode;
44262 machine_mode quarter_mode = VOIDmode;
44263 int n, i;
44264
44265 switch (mode)
44266 {
44267 case V2SFmode:
44268 case V2SImode:
44269 if (!mmx_ok && !TARGET_SSE)
44270 break;
44271 /* FALLTHRU */
44272
44273 case V16SImode:
44274 case V16SFmode:
44275 case V8DFmode:
44276 case V8DImode:
44277 case V8SFmode:
44278 case V8SImode:
44279 case V4DFmode:
44280 case V4DImode:
44281 case V4SFmode:
44282 case V4SImode:
44283 case V2DFmode:
44284 case V2DImode:
44285 n = GET_MODE_NUNITS (mode);
44286 for (i = 0; i < n; i++)
44287 ops[i] = XVECEXP (vals, 0, i);
44288 ix86_expand_vector_init_concat (mode, target, ops, n);
44289 return;
44290
44291 case V32QImode:
44292 half_mode = V16QImode;
44293 goto half;
44294
44295 case V16HImode:
44296 half_mode = V8HImode;
44297 goto half;
44298
44299 half:
44300 n = GET_MODE_NUNITS (mode);
44301 for (i = 0; i < n; i++)
44302 ops[i] = XVECEXP (vals, 0, i);
44303 op0 = gen_reg_rtx (half_mode);
44304 op1 = gen_reg_rtx (half_mode);
44305 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44306 n >> 2);
44307 ix86_expand_vector_init_interleave (half_mode, op1,
44308 &ops [n >> 1], n >> 2);
44309 emit_insn (gen_rtx_SET (VOIDmode, target,
44310 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44311 return;
44312
44313 case V64QImode:
44314 quarter_mode = V16QImode;
44315 half_mode = V32QImode;
44316 goto quarter;
44317
44318 case V32HImode:
44319 quarter_mode = V8HImode;
44320 half_mode = V16HImode;
44321 goto quarter;
44322
44323 quarter:
44324 n = GET_MODE_NUNITS (mode);
44325 for (i = 0; i < n; i++)
44326 ops[i] = XVECEXP (vals, 0, i);
44327 op0 = gen_reg_rtx (quarter_mode);
44328 op1 = gen_reg_rtx (quarter_mode);
44329 op2 = gen_reg_rtx (quarter_mode);
44330 op3 = gen_reg_rtx (quarter_mode);
44331 op4 = gen_reg_rtx (half_mode);
44332 op5 = gen_reg_rtx (half_mode);
44333 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44334 n >> 3);
44335 ix86_expand_vector_init_interleave (quarter_mode, op1,
44336 &ops [n >> 2], n >> 3);
44337 ix86_expand_vector_init_interleave (quarter_mode, op2,
44338 &ops [n >> 1], n >> 3);
44339 ix86_expand_vector_init_interleave (quarter_mode, op3,
44340 &ops [(n >> 1) | (n >> 2)], n >> 3);
44341 emit_insn (gen_rtx_SET (VOIDmode, op4,
44342 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44343 emit_insn (gen_rtx_SET (VOIDmode, op5,
44344 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44345 emit_insn (gen_rtx_SET (VOIDmode, target,
44346 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44347 return;
44348
44349 case V16QImode:
44350 if (!TARGET_SSE4_1)
44351 break;
44352 /* FALLTHRU */
44353
44354 case V8HImode:
44355 if (!TARGET_SSE2)
44356 break;
44357
44358 /* Don't use ix86_expand_vector_init_interleave if we can't
44359 move from GPR to SSE register directly. */
44360 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44361 break;
44362
44363 n = GET_MODE_NUNITS (mode);
44364 for (i = 0; i < n; i++)
44365 ops[i] = XVECEXP (vals, 0, i);
44366 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44367 return;
44368
44369 case V4HImode:
44370 case V8QImode:
44371 break;
44372
44373 default:
44374 gcc_unreachable ();
44375 }
44376
44377 {
44378 int i, j, n_elts, n_words, n_elt_per_word;
44379 machine_mode inner_mode;
44380 rtx words[4], shift;
44381
44382 inner_mode = GET_MODE_INNER (mode);
44383 n_elts = GET_MODE_NUNITS (mode);
44384 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44385 n_elt_per_word = n_elts / n_words;
44386 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44387
44388 for (i = 0; i < n_words; ++i)
44389 {
44390 rtx word = NULL_RTX;
44391
44392 for (j = 0; j < n_elt_per_word; ++j)
44393 {
44394 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44395 elt = convert_modes (word_mode, inner_mode, elt, true);
44396
44397 if (j == 0)
44398 word = elt;
44399 else
44400 {
44401 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44402 word, 1, OPTAB_LIB_WIDEN);
44403 word = expand_simple_binop (word_mode, IOR, word, elt,
44404 word, 1, OPTAB_LIB_WIDEN);
44405 }
44406 }
44407
44408 words[i] = word;
44409 }
44410
44411 if (n_words == 1)
44412 emit_move_insn (target, gen_lowpart (mode, words[0]));
44413 else if (n_words == 2)
44414 {
44415 rtx tmp = gen_reg_rtx (mode);
44416 emit_clobber (tmp);
44417 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44418 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44419 emit_move_insn (target, tmp);
44420 }
44421 else if (n_words == 4)
44422 {
44423 rtx tmp = gen_reg_rtx (V4SImode);
44424 gcc_assert (word_mode == SImode);
44425 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44426 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44427 emit_move_insn (target, gen_lowpart (mode, tmp));
44428 }
44429 else
44430 gcc_unreachable ();
44431 }
44432 }
44433
44434 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44435 instructions unless MMX_OK is true. */
44436
44437 void
44438 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44439 {
44440 machine_mode mode = GET_MODE (target);
44441 machine_mode inner_mode = GET_MODE_INNER (mode);
44442 int n_elts = GET_MODE_NUNITS (mode);
44443 int n_var = 0, one_var = -1;
44444 bool all_same = true, all_const_zero = true;
44445 int i;
44446 rtx x;
44447
44448 for (i = 0; i < n_elts; ++i)
44449 {
44450 x = XVECEXP (vals, 0, i);
44451 if (!(CONST_INT_P (x)
44452 || GET_CODE (x) == CONST_DOUBLE
44453 || GET_CODE (x) == CONST_FIXED))
44454 n_var++, one_var = i;
44455 else if (x != CONST0_RTX (inner_mode))
44456 all_const_zero = false;
44457 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44458 all_same = false;
44459 }
44460
44461 /* Constants are best loaded from the constant pool. */
44462 if (n_var == 0)
44463 {
44464 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44465 return;
44466 }
44467
44468 /* If all values are identical, broadcast the value. */
44469 if (all_same
44470 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44471 XVECEXP (vals, 0, 0)))
44472 return;
44473
44474 /* Values where only one field is non-constant are best loaded from
44475 the pool and overwritten via move later. */
44476 if (n_var == 1)
44477 {
44478 if (all_const_zero
44479 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44480 XVECEXP (vals, 0, one_var),
44481 one_var))
44482 return;
44483
44484 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44485 return;
44486 }
44487
44488 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44489 }
44490
44491 void
44492 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44493 {
44494 machine_mode mode = GET_MODE (target);
44495 machine_mode inner_mode = GET_MODE_INNER (mode);
44496 machine_mode half_mode;
44497 bool use_vec_merge = false;
44498 rtx tmp;
44499 static rtx (*gen_extract[6][2]) (rtx, rtx)
44500 = {
44501 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44502 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44503 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44504 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44505 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44506 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44507 };
44508 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44509 = {
44510 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44511 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44512 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44513 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44514 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44515 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44516 };
44517 int i, j, n;
44518
44519 switch (mode)
44520 {
44521 case V2SFmode:
44522 case V2SImode:
44523 if (mmx_ok)
44524 {
44525 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44526 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44527 if (elt == 0)
44528 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44529 else
44530 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44531 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44532 return;
44533 }
44534 break;
44535
44536 case V2DImode:
44537 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44538 if (use_vec_merge)
44539 break;
44540
44541 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44542 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44543 if (elt == 0)
44544 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44545 else
44546 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44547 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44548 return;
44549
44550 case V2DFmode:
44551 {
44552 rtx op0, op1;
44553
44554 /* For the two element vectors, we implement a VEC_CONCAT with
44555 the extraction of the other element. */
44556
44557 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44558 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44559
44560 if (elt == 0)
44561 op0 = val, op1 = tmp;
44562 else
44563 op0 = tmp, op1 = val;
44564
44565 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44566 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44567 }
44568 return;
44569
44570 case V4SFmode:
44571 use_vec_merge = TARGET_SSE4_1;
44572 if (use_vec_merge)
44573 break;
44574
44575 switch (elt)
44576 {
44577 case 0:
44578 use_vec_merge = true;
44579 break;
44580
44581 case 1:
44582 /* tmp = target = A B C D */
44583 tmp = copy_to_reg (target);
44584 /* target = A A B B */
44585 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44586 /* target = X A B B */
44587 ix86_expand_vector_set (false, target, val, 0);
44588 /* target = A X C D */
44589 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44590 const1_rtx, const0_rtx,
44591 GEN_INT (2+4), GEN_INT (3+4)));
44592 return;
44593
44594 case 2:
44595 /* tmp = target = A B C D */
44596 tmp = copy_to_reg (target);
44597 /* tmp = X B C D */
44598 ix86_expand_vector_set (false, tmp, val, 0);
44599 /* target = A B X D */
44600 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44601 const0_rtx, const1_rtx,
44602 GEN_INT (0+4), GEN_INT (3+4)));
44603 return;
44604
44605 case 3:
44606 /* tmp = target = A B C D */
44607 tmp = copy_to_reg (target);
44608 /* tmp = X B C D */
44609 ix86_expand_vector_set (false, tmp, val, 0);
44610 /* target = A B X D */
44611 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44612 const0_rtx, const1_rtx,
44613 GEN_INT (2+4), GEN_INT (0+4)));
44614 return;
44615
44616 default:
44617 gcc_unreachable ();
44618 }
44619 break;
44620
44621 case V4SImode:
44622 use_vec_merge = TARGET_SSE4_1;
44623 if (use_vec_merge)
44624 break;
44625
44626 /* Element 0 handled by vec_merge below. */
44627 if (elt == 0)
44628 {
44629 use_vec_merge = true;
44630 break;
44631 }
44632
44633 if (TARGET_SSE2)
44634 {
44635 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44636 store into element 0, then shuffle them back. */
44637
44638 rtx order[4];
44639
44640 order[0] = GEN_INT (elt);
44641 order[1] = const1_rtx;
44642 order[2] = const2_rtx;
44643 order[3] = GEN_INT (3);
44644 order[elt] = const0_rtx;
44645
44646 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44647 order[1], order[2], order[3]));
44648
44649 ix86_expand_vector_set (false, target, val, 0);
44650
44651 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44652 order[1], order[2], order[3]));
44653 }
44654 else
44655 {
44656 /* For SSE1, we have to reuse the V4SF code. */
44657 rtx t = gen_reg_rtx (V4SFmode);
44658 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44659 emit_move_insn (target, gen_lowpart (mode, t));
44660 }
44661 return;
44662
44663 case V8HImode:
44664 use_vec_merge = TARGET_SSE2;
44665 break;
44666 case V4HImode:
44667 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44668 break;
44669
44670 case V16QImode:
44671 use_vec_merge = TARGET_SSE4_1;
44672 break;
44673
44674 case V8QImode:
44675 break;
44676
44677 case V32QImode:
44678 half_mode = V16QImode;
44679 j = 0;
44680 n = 16;
44681 goto half;
44682
44683 case V16HImode:
44684 half_mode = V8HImode;
44685 j = 1;
44686 n = 8;
44687 goto half;
44688
44689 case V8SImode:
44690 half_mode = V4SImode;
44691 j = 2;
44692 n = 4;
44693 goto half;
44694
44695 case V4DImode:
44696 half_mode = V2DImode;
44697 j = 3;
44698 n = 2;
44699 goto half;
44700
44701 case V8SFmode:
44702 half_mode = V4SFmode;
44703 j = 4;
44704 n = 4;
44705 goto half;
44706
44707 case V4DFmode:
44708 half_mode = V2DFmode;
44709 j = 5;
44710 n = 2;
44711 goto half;
44712
44713 half:
44714 /* Compute offset. */
44715 i = elt / n;
44716 elt %= n;
44717
44718 gcc_assert (i <= 1);
44719
44720 /* Extract the half. */
44721 tmp = gen_reg_rtx (half_mode);
44722 emit_insn (gen_extract[j][i] (tmp, target));
44723
44724 /* Put val in tmp at elt. */
44725 ix86_expand_vector_set (false, tmp, val, elt);
44726
44727 /* Put it back. */
44728 emit_insn (gen_insert[j][i] (target, target, tmp));
44729 return;
44730
44731 case V8DFmode:
44732 if (TARGET_AVX512F)
44733 {
44734 tmp = gen_reg_rtx (mode);
44735 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44736 gen_rtx_VEC_DUPLICATE (mode, val)));
44737 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44738 force_reg (QImode, GEN_INT (1 << elt))));
44739 return;
44740 }
44741 else
44742 break;
44743 case V8DImode:
44744 if (TARGET_AVX512F)
44745 {
44746 tmp = gen_reg_rtx (mode);
44747 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44748 gen_rtx_VEC_DUPLICATE (mode, val)));
44749 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44750 force_reg (QImode, GEN_INT (1 << elt))));
44751 return;
44752 }
44753 else
44754 break;
44755 case V16SFmode:
44756 if (TARGET_AVX512F)
44757 {
44758 tmp = gen_reg_rtx (mode);
44759 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44760 gen_rtx_VEC_DUPLICATE (mode, val)));
44761 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44762 force_reg (HImode, GEN_INT (1 << elt))));
44763 return;
44764 }
44765 else
44766 break;
44767 case V16SImode:
44768 if (TARGET_AVX512F)
44769 {
44770 tmp = gen_reg_rtx (mode);
44771 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44772 gen_rtx_VEC_DUPLICATE (mode, val)));
44773 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44774 force_reg (HImode, GEN_INT (1 << elt))));
44775 return;
44776 }
44777 else
44778 break;
44779 case V32HImode:
44780 if (TARGET_AVX512F && TARGET_AVX512BW)
44781 {
44782 tmp = gen_reg_rtx (mode);
44783 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44784 gen_rtx_VEC_DUPLICATE (mode, val)));
44785 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44786 force_reg (SImode, GEN_INT (1 << elt))));
44787 return;
44788 }
44789 else
44790 break;
44791 case V64QImode:
44792 if (TARGET_AVX512F && TARGET_AVX512BW)
44793 {
44794 tmp = gen_reg_rtx (mode);
44795 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44796 gen_rtx_VEC_DUPLICATE (mode, val)));
44797 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44798 force_reg (DImode, GEN_INT (1 << elt))));
44799 return;
44800 }
44801 else
44802 break;
44803
44804 default:
44805 break;
44806 }
44807
44808 if (use_vec_merge)
44809 {
44810 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44811 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44812 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44813 }
44814 else
44815 {
44816 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44817
44818 emit_move_insn (mem, target);
44819
44820 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44821 emit_move_insn (tmp, val);
44822
44823 emit_move_insn (target, mem);
44824 }
44825 }
44826
44827 void
44828 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44829 {
44830 machine_mode mode = GET_MODE (vec);
44831 machine_mode inner_mode = GET_MODE_INNER (mode);
44832 bool use_vec_extr = false;
44833 rtx tmp;
44834
44835 switch (mode)
44836 {
44837 case V2SImode:
44838 case V2SFmode:
44839 if (!mmx_ok)
44840 break;
44841 /* FALLTHRU */
44842
44843 case V2DFmode:
44844 case V2DImode:
44845 use_vec_extr = true;
44846 break;
44847
44848 case V4SFmode:
44849 use_vec_extr = TARGET_SSE4_1;
44850 if (use_vec_extr)
44851 break;
44852
44853 switch (elt)
44854 {
44855 case 0:
44856 tmp = vec;
44857 break;
44858
44859 case 1:
44860 case 3:
44861 tmp = gen_reg_rtx (mode);
44862 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44863 GEN_INT (elt), GEN_INT (elt),
44864 GEN_INT (elt+4), GEN_INT (elt+4)));
44865 break;
44866
44867 case 2:
44868 tmp = gen_reg_rtx (mode);
44869 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44870 break;
44871
44872 default:
44873 gcc_unreachable ();
44874 }
44875 vec = tmp;
44876 use_vec_extr = true;
44877 elt = 0;
44878 break;
44879
44880 case V4SImode:
44881 use_vec_extr = TARGET_SSE4_1;
44882 if (use_vec_extr)
44883 break;
44884
44885 if (TARGET_SSE2)
44886 {
44887 switch (elt)
44888 {
44889 case 0:
44890 tmp = vec;
44891 break;
44892
44893 case 1:
44894 case 3:
44895 tmp = gen_reg_rtx (mode);
44896 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44897 GEN_INT (elt), GEN_INT (elt),
44898 GEN_INT (elt), GEN_INT (elt)));
44899 break;
44900
44901 case 2:
44902 tmp = gen_reg_rtx (mode);
44903 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44904 break;
44905
44906 default:
44907 gcc_unreachable ();
44908 }
44909 vec = tmp;
44910 use_vec_extr = true;
44911 elt = 0;
44912 }
44913 else
44914 {
44915 /* For SSE1, we have to reuse the V4SF code. */
44916 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44917 gen_lowpart (V4SFmode, vec), elt);
44918 return;
44919 }
44920 break;
44921
44922 case V8HImode:
44923 use_vec_extr = TARGET_SSE2;
44924 break;
44925 case V4HImode:
44926 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44927 break;
44928
44929 case V16QImode:
44930 use_vec_extr = TARGET_SSE4_1;
44931 break;
44932
44933 case V8SFmode:
44934 if (TARGET_AVX)
44935 {
44936 tmp = gen_reg_rtx (V4SFmode);
44937 if (elt < 4)
44938 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44939 else
44940 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44941 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44942 return;
44943 }
44944 break;
44945
44946 case V4DFmode:
44947 if (TARGET_AVX)
44948 {
44949 tmp = gen_reg_rtx (V2DFmode);
44950 if (elt < 2)
44951 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44952 else
44953 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44954 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44955 return;
44956 }
44957 break;
44958
44959 case V32QImode:
44960 if (TARGET_AVX)
44961 {
44962 tmp = gen_reg_rtx (V16QImode);
44963 if (elt < 16)
44964 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44965 else
44966 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44967 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44968 return;
44969 }
44970 break;
44971
44972 case V16HImode:
44973 if (TARGET_AVX)
44974 {
44975 tmp = gen_reg_rtx (V8HImode);
44976 if (elt < 8)
44977 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44978 else
44979 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44980 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44981 return;
44982 }
44983 break;
44984
44985 case V8SImode:
44986 if (TARGET_AVX)
44987 {
44988 tmp = gen_reg_rtx (V4SImode);
44989 if (elt < 4)
44990 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44991 else
44992 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44993 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44994 return;
44995 }
44996 break;
44997
44998 case V4DImode:
44999 if (TARGET_AVX)
45000 {
45001 tmp = gen_reg_rtx (V2DImode);
45002 if (elt < 2)
45003 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45004 else
45005 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45006 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45007 return;
45008 }
45009 break;
45010
45011 case V32HImode:
45012 if (TARGET_AVX512BW)
45013 {
45014 tmp = gen_reg_rtx (V16HImode);
45015 if (elt < 16)
45016 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45017 else
45018 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45019 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45020 return;
45021 }
45022 break;
45023
45024 case V64QImode:
45025 if (TARGET_AVX512BW)
45026 {
45027 tmp = gen_reg_rtx (V32QImode);
45028 if (elt < 32)
45029 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45030 else
45031 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45032 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45033 return;
45034 }
45035 break;
45036
45037 case V16SFmode:
45038 tmp = gen_reg_rtx (V8SFmode);
45039 if (elt < 8)
45040 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45041 else
45042 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45043 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45044 return;
45045
45046 case V8DFmode:
45047 tmp = gen_reg_rtx (V4DFmode);
45048 if (elt < 4)
45049 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45050 else
45051 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45052 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45053 return;
45054
45055 case V16SImode:
45056 tmp = gen_reg_rtx (V8SImode);
45057 if (elt < 8)
45058 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45059 else
45060 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45061 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45062 return;
45063
45064 case V8DImode:
45065 tmp = gen_reg_rtx (V4DImode);
45066 if (elt < 4)
45067 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45068 else
45069 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45070 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45071 return;
45072
45073 case V8QImode:
45074 /* ??? Could extract the appropriate HImode element and shift. */
45075 default:
45076 break;
45077 }
45078
45079 if (use_vec_extr)
45080 {
45081 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45082 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45083
45084 /* Let the rtl optimizers know about the zero extension performed. */
45085 if (inner_mode == QImode || inner_mode == HImode)
45086 {
45087 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45088 target = gen_lowpart (SImode, target);
45089 }
45090
45091 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45092 }
45093 else
45094 {
45095 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45096
45097 emit_move_insn (mem, vec);
45098
45099 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45100 emit_move_insn (target, tmp);
45101 }
45102 }
45103
45104 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45105 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45106 The upper bits of DEST are undefined, though they shouldn't cause
45107 exceptions (some bits from src or all zeros are ok). */
45108
45109 static void
45110 emit_reduc_half (rtx dest, rtx src, int i)
45111 {
45112 rtx tem, d = dest;
45113 switch (GET_MODE (src))
45114 {
45115 case V4SFmode:
45116 if (i == 128)
45117 tem = gen_sse_movhlps (dest, src, src);
45118 else
45119 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45120 GEN_INT (1 + 4), GEN_INT (1 + 4));
45121 break;
45122 case V2DFmode:
45123 tem = gen_vec_interleave_highv2df (dest, src, src);
45124 break;
45125 case V16QImode:
45126 case V8HImode:
45127 case V4SImode:
45128 case V2DImode:
45129 d = gen_reg_rtx (V1TImode);
45130 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45131 GEN_INT (i / 2));
45132 break;
45133 case V8SFmode:
45134 if (i == 256)
45135 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45136 else
45137 tem = gen_avx_shufps256 (dest, src, src,
45138 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45139 break;
45140 case V4DFmode:
45141 if (i == 256)
45142 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45143 else
45144 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45145 break;
45146 case V32QImode:
45147 case V16HImode:
45148 case V8SImode:
45149 case V4DImode:
45150 if (i == 256)
45151 {
45152 if (GET_MODE (dest) != V4DImode)
45153 d = gen_reg_rtx (V4DImode);
45154 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45155 gen_lowpart (V4DImode, src),
45156 const1_rtx);
45157 }
45158 else
45159 {
45160 d = gen_reg_rtx (V2TImode);
45161 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45162 GEN_INT (i / 2));
45163 }
45164 break;
45165 case V64QImode:
45166 case V32HImode:
45167 case V16SImode:
45168 case V16SFmode:
45169 case V8DImode:
45170 case V8DFmode:
45171 if (i > 128)
45172 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45173 gen_lowpart (V16SImode, src),
45174 gen_lowpart (V16SImode, src),
45175 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45176 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45177 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45178 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45179 GEN_INT (0xC), GEN_INT (0xD),
45180 GEN_INT (0xE), GEN_INT (0xF),
45181 GEN_INT (0x10), GEN_INT (0x11),
45182 GEN_INT (0x12), GEN_INT (0x13),
45183 GEN_INT (0x14), GEN_INT (0x15),
45184 GEN_INT (0x16), GEN_INT (0x17));
45185 else
45186 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45187 gen_lowpart (V16SImode, src),
45188 GEN_INT (i == 128 ? 0x2 : 0x1),
45189 GEN_INT (0x3),
45190 GEN_INT (0x3),
45191 GEN_INT (0x3),
45192 GEN_INT (i == 128 ? 0x6 : 0x5),
45193 GEN_INT (0x7),
45194 GEN_INT (0x7),
45195 GEN_INT (0x7),
45196 GEN_INT (i == 128 ? 0xA : 0x9),
45197 GEN_INT (0xB),
45198 GEN_INT (0xB),
45199 GEN_INT (0xB),
45200 GEN_INT (i == 128 ? 0xE : 0xD),
45201 GEN_INT (0xF),
45202 GEN_INT (0xF),
45203 GEN_INT (0xF));
45204 break;
45205 default:
45206 gcc_unreachable ();
45207 }
45208 emit_insn (tem);
45209 if (d != dest)
45210 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45211 }
45212
45213 /* Expand a vector reduction. FN is the binary pattern to reduce;
45214 DEST is the destination; IN is the input vector. */
45215
45216 void
45217 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45218 {
45219 rtx half, dst, vec = in;
45220 machine_mode mode = GET_MODE (in);
45221 int i;
45222
45223 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45224 if (TARGET_SSE4_1
45225 && mode == V8HImode
45226 && fn == gen_uminv8hi3)
45227 {
45228 emit_insn (gen_sse4_1_phminposuw (dest, in));
45229 return;
45230 }
45231
45232 for (i = GET_MODE_BITSIZE (mode);
45233 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45234 i >>= 1)
45235 {
45236 half = gen_reg_rtx (mode);
45237 emit_reduc_half (half, vec, i);
45238 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45239 dst = dest;
45240 else
45241 dst = gen_reg_rtx (mode);
45242 emit_insn (fn (dst, half, vec));
45243 vec = dst;
45244 }
45245 }
45246 \f
45247 /* Target hook for scalar_mode_supported_p. */
45248 static bool
45249 ix86_scalar_mode_supported_p (machine_mode mode)
45250 {
45251 if (DECIMAL_FLOAT_MODE_P (mode))
45252 return default_decimal_float_supported_p ();
45253 else if (mode == TFmode)
45254 return true;
45255 else
45256 return default_scalar_mode_supported_p (mode);
45257 }
45258
45259 /* Implements target hook vector_mode_supported_p. */
45260 static bool
45261 ix86_vector_mode_supported_p (machine_mode mode)
45262 {
45263 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45264 return true;
45265 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45266 return true;
45267 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45268 return true;
45269 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45270 return true;
45271 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45272 return true;
45273 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45274 return true;
45275 return false;
45276 }
45277
45278 /* Implement target hook libgcc_floating_mode_supported_p. */
45279 static bool
45280 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45281 {
45282 switch (mode)
45283 {
45284 case SFmode:
45285 case DFmode:
45286 case XFmode:
45287 return true;
45288
45289 case TFmode:
45290 #ifdef IX86_NO_LIBGCC_TFMODE
45291 return false;
45292 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45293 return TARGET_LONG_DOUBLE_128;
45294 #else
45295 return true;
45296 #endif
45297
45298 default:
45299 return false;
45300 }
45301 }
45302
45303 /* Target hook for c_mode_for_suffix. */
45304 static machine_mode
45305 ix86_c_mode_for_suffix (char suffix)
45306 {
45307 if (suffix == 'q')
45308 return TFmode;
45309 if (suffix == 'w')
45310 return XFmode;
45311
45312 return VOIDmode;
45313 }
45314
45315 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45316
45317 We do this in the new i386 backend to maintain source compatibility
45318 with the old cc0-based compiler. */
45319
45320 static tree
45321 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45322 {
45323 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45324 clobbers);
45325 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45326 clobbers);
45327 return clobbers;
45328 }
45329
45330 /* Implements target vector targetm.asm.encode_section_info. */
45331
45332 static void ATTRIBUTE_UNUSED
45333 ix86_encode_section_info (tree decl, rtx rtl, int first)
45334 {
45335 default_encode_section_info (decl, rtl, first);
45336
45337 if (ix86_in_large_data_p (decl))
45338 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45339 }
45340
45341 /* Worker function for REVERSE_CONDITION. */
45342
45343 enum rtx_code
45344 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45345 {
45346 return (mode != CCFPmode && mode != CCFPUmode
45347 ? reverse_condition (code)
45348 : reverse_condition_maybe_unordered (code));
45349 }
45350
45351 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45352 to OPERANDS[0]. */
45353
45354 const char *
45355 output_387_reg_move (rtx insn, rtx *operands)
45356 {
45357 if (REG_P (operands[0]))
45358 {
45359 if (REG_P (operands[1])
45360 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45361 {
45362 if (REGNO (operands[0]) == FIRST_STACK_REG)
45363 return output_387_ffreep (operands, 0);
45364 return "fstp\t%y0";
45365 }
45366 if (STACK_TOP_P (operands[0]))
45367 return "fld%Z1\t%y1";
45368 return "fst\t%y0";
45369 }
45370 else if (MEM_P (operands[0]))
45371 {
45372 gcc_assert (REG_P (operands[1]));
45373 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45374 return "fstp%Z0\t%y0";
45375 else
45376 {
45377 /* There is no non-popping store to memory for XFmode.
45378 So if we need one, follow the store with a load. */
45379 if (GET_MODE (operands[0]) == XFmode)
45380 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45381 else
45382 return "fst%Z0\t%y0";
45383 }
45384 }
45385 else
45386 gcc_unreachable();
45387 }
45388
45389 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45390 FP status register is set. */
45391
45392 void
45393 ix86_emit_fp_unordered_jump (rtx label)
45394 {
45395 rtx reg = gen_reg_rtx (HImode);
45396 rtx temp;
45397
45398 emit_insn (gen_x86_fnstsw_1 (reg));
45399
45400 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45401 {
45402 emit_insn (gen_x86_sahf_1 (reg));
45403
45404 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45405 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45406 }
45407 else
45408 {
45409 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45410
45411 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45412 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45413 }
45414
45415 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45416 gen_rtx_LABEL_REF (VOIDmode, label),
45417 pc_rtx);
45418 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45419
45420 emit_jump_insn (temp);
45421 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45422 }
45423
45424 /* Output code to perform a log1p XFmode calculation. */
45425
45426 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45427 {
45428 rtx_code_label *label1 = gen_label_rtx ();
45429 rtx_code_label *label2 = gen_label_rtx ();
45430
45431 rtx tmp = gen_reg_rtx (XFmode);
45432 rtx tmp2 = gen_reg_rtx (XFmode);
45433 rtx test;
45434
45435 emit_insn (gen_absxf2 (tmp, op1));
45436 test = gen_rtx_GE (VOIDmode, tmp,
45437 CONST_DOUBLE_FROM_REAL_VALUE (
45438 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45439 XFmode));
45440 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45441
45442 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45443 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45444 emit_jump (label2);
45445
45446 emit_label (label1);
45447 emit_move_insn (tmp, CONST1_RTX (XFmode));
45448 emit_insn (gen_addxf3 (tmp, op1, tmp));
45449 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45450 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45451
45452 emit_label (label2);
45453 }
45454
45455 /* Emit code for round calculation. */
45456 void ix86_emit_i387_round (rtx op0, rtx op1)
45457 {
45458 machine_mode inmode = GET_MODE (op1);
45459 machine_mode outmode = GET_MODE (op0);
45460 rtx e1, e2, res, tmp, tmp1, half;
45461 rtx scratch = gen_reg_rtx (HImode);
45462 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45463 rtx_code_label *jump_label = gen_label_rtx ();
45464 rtx insn;
45465 rtx (*gen_abs) (rtx, rtx);
45466 rtx (*gen_neg) (rtx, rtx);
45467
45468 switch (inmode)
45469 {
45470 case SFmode:
45471 gen_abs = gen_abssf2;
45472 break;
45473 case DFmode:
45474 gen_abs = gen_absdf2;
45475 break;
45476 case XFmode:
45477 gen_abs = gen_absxf2;
45478 break;
45479 default:
45480 gcc_unreachable ();
45481 }
45482
45483 switch (outmode)
45484 {
45485 case SFmode:
45486 gen_neg = gen_negsf2;
45487 break;
45488 case DFmode:
45489 gen_neg = gen_negdf2;
45490 break;
45491 case XFmode:
45492 gen_neg = gen_negxf2;
45493 break;
45494 case HImode:
45495 gen_neg = gen_neghi2;
45496 break;
45497 case SImode:
45498 gen_neg = gen_negsi2;
45499 break;
45500 case DImode:
45501 gen_neg = gen_negdi2;
45502 break;
45503 default:
45504 gcc_unreachable ();
45505 }
45506
45507 e1 = gen_reg_rtx (inmode);
45508 e2 = gen_reg_rtx (inmode);
45509 res = gen_reg_rtx (outmode);
45510
45511 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45512
45513 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45514
45515 /* scratch = fxam(op1) */
45516 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45517 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45518 UNSPEC_FXAM)));
45519 /* e1 = fabs(op1) */
45520 emit_insn (gen_abs (e1, op1));
45521
45522 /* e2 = e1 + 0.5 */
45523 half = force_reg (inmode, half);
45524 emit_insn (gen_rtx_SET (VOIDmode, e2,
45525 gen_rtx_PLUS (inmode, e1, half)));
45526
45527 /* res = floor(e2) */
45528 if (inmode != XFmode)
45529 {
45530 tmp1 = gen_reg_rtx (XFmode);
45531
45532 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45533 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45534 }
45535 else
45536 tmp1 = e2;
45537
45538 switch (outmode)
45539 {
45540 case SFmode:
45541 case DFmode:
45542 {
45543 rtx tmp0 = gen_reg_rtx (XFmode);
45544
45545 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45546
45547 emit_insn (gen_rtx_SET (VOIDmode, res,
45548 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45549 UNSPEC_TRUNC_NOOP)));
45550 }
45551 break;
45552 case XFmode:
45553 emit_insn (gen_frndintxf2_floor (res, tmp1));
45554 break;
45555 case HImode:
45556 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45557 break;
45558 case SImode:
45559 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45560 break;
45561 case DImode:
45562 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45563 break;
45564 default:
45565 gcc_unreachable ();
45566 }
45567
45568 /* flags = signbit(a) */
45569 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45570
45571 /* if (flags) then res = -res */
45572 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45573 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45574 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45575 pc_rtx);
45576 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45577 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45578 JUMP_LABEL (insn) = jump_label;
45579
45580 emit_insn (gen_neg (res, res));
45581
45582 emit_label (jump_label);
45583 LABEL_NUSES (jump_label) = 1;
45584
45585 emit_move_insn (op0, res);
45586 }
45587
45588 /* Output code to perform a Newton-Rhapson approximation of a single precision
45589 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45590
45591 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45592 {
45593 rtx x0, x1, e0, e1;
45594
45595 x0 = gen_reg_rtx (mode);
45596 e0 = gen_reg_rtx (mode);
45597 e1 = gen_reg_rtx (mode);
45598 x1 = gen_reg_rtx (mode);
45599
45600 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45601
45602 b = force_reg (mode, b);
45603
45604 /* x0 = rcp(b) estimate */
45605 if (mode == V16SFmode || mode == V8DFmode)
45606 emit_insn (gen_rtx_SET (VOIDmode, x0,
45607 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45608 UNSPEC_RCP14)));
45609 else
45610 emit_insn (gen_rtx_SET (VOIDmode, x0,
45611 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45612 UNSPEC_RCP)));
45613
45614 /* e0 = x0 * b */
45615 emit_insn (gen_rtx_SET (VOIDmode, e0,
45616 gen_rtx_MULT (mode, x0, b)));
45617
45618 /* e0 = x0 * e0 */
45619 emit_insn (gen_rtx_SET (VOIDmode, e0,
45620 gen_rtx_MULT (mode, x0, e0)));
45621
45622 /* e1 = x0 + x0 */
45623 emit_insn (gen_rtx_SET (VOIDmode, e1,
45624 gen_rtx_PLUS (mode, x0, x0)));
45625
45626 /* x1 = e1 - e0 */
45627 emit_insn (gen_rtx_SET (VOIDmode, x1,
45628 gen_rtx_MINUS (mode, e1, e0)));
45629
45630 /* res = a * x1 */
45631 emit_insn (gen_rtx_SET (VOIDmode, res,
45632 gen_rtx_MULT (mode, a, x1)));
45633 }
45634
45635 /* Output code to perform a Newton-Rhapson approximation of a
45636 single precision floating point [reciprocal] square root. */
45637
45638 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45639 bool recip)
45640 {
45641 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45642 REAL_VALUE_TYPE r;
45643 int unspec;
45644
45645 x0 = gen_reg_rtx (mode);
45646 e0 = gen_reg_rtx (mode);
45647 e1 = gen_reg_rtx (mode);
45648 e2 = gen_reg_rtx (mode);
45649 e3 = gen_reg_rtx (mode);
45650
45651 real_from_integer (&r, VOIDmode, -3, SIGNED);
45652 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45653
45654 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45655 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45656 unspec = UNSPEC_RSQRT;
45657
45658 if (VECTOR_MODE_P (mode))
45659 {
45660 mthree = ix86_build_const_vector (mode, true, mthree);
45661 mhalf = ix86_build_const_vector (mode, true, mhalf);
45662 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45663 if (GET_MODE_SIZE (mode) == 64)
45664 unspec = UNSPEC_RSQRT14;
45665 }
45666
45667 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45668 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45669
45670 a = force_reg (mode, a);
45671
45672 /* x0 = rsqrt(a) estimate */
45673 emit_insn (gen_rtx_SET (VOIDmode, x0,
45674 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45675 unspec)));
45676
45677 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45678 if (!recip)
45679 {
45680 rtx zero, mask;
45681
45682 zero = gen_reg_rtx (mode);
45683 mask = gen_reg_rtx (mode);
45684
45685 zero = force_reg (mode, CONST0_RTX(mode));
45686
45687 /* Handle masked compare. */
45688 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45689 {
45690 mask = gen_reg_rtx (HImode);
45691 /* Imm value 0x4 corresponds to not-equal comparison. */
45692 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45693 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45694 }
45695 else
45696 {
45697 emit_insn (gen_rtx_SET (VOIDmode, mask,
45698 gen_rtx_NE (mode, zero, a)));
45699
45700 emit_insn (gen_rtx_SET (VOIDmode, x0,
45701 gen_rtx_AND (mode, x0, mask)));
45702 }
45703 }
45704
45705 /* e0 = x0 * a */
45706 emit_insn (gen_rtx_SET (VOIDmode, e0,
45707 gen_rtx_MULT (mode, x0, a)));
45708 /* e1 = e0 * x0 */
45709 emit_insn (gen_rtx_SET (VOIDmode, e1,
45710 gen_rtx_MULT (mode, e0, x0)));
45711
45712 /* e2 = e1 - 3. */
45713 mthree = force_reg (mode, mthree);
45714 emit_insn (gen_rtx_SET (VOIDmode, e2,
45715 gen_rtx_PLUS (mode, e1, mthree)));
45716
45717 mhalf = force_reg (mode, mhalf);
45718 if (recip)
45719 /* e3 = -.5 * x0 */
45720 emit_insn (gen_rtx_SET (VOIDmode, e3,
45721 gen_rtx_MULT (mode, x0, mhalf)));
45722 else
45723 /* e3 = -.5 * e0 */
45724 emit_insn (gen_rtx_SET (VOIDmode, e3,
45725 gen_rtx_MULT (mode, e0, mhalf)));
45726 /* ret = e2 * e3 */
45727 emit_insn (gen_rtx_SET (VOIDmode, res,
45728 gen_rtx_MULT (mode, e2, e3)));
45729 }
45730
45731 #ifdef TARGET_SOLARIS
45732 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45733
45734 static void
45735 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45736 tree decl)
45737 {
45738 /* With Binutils 2.15, the "@unwind" marker must be specified on
45739 every occurrence of the ".eh_frame" section, not just the first
45740 one. */
45741 if (TARGET_64BIT
45742 && strcmp (name, ".eh_frame") == 0)
45743 {
45744 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45745 flags & SECTION_WRITE ? "aw" : "a");
45746 return;
45747 }
45748
45749 #ifndef USE_GAS
45750 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45751 {
45752 solaris_elf_asm_comdat_section (name, flags, decl);
45753 return;
45754 }
45755 #endif
45756
45757 default_elf_asm_named_section (name, flags, decl);
45758 }
45759 #endif /* TARGET_SOLARIS */
45760
45761 /* Return the mangling of TYPE if it is an extended fundamental type. */
45762
45763 static const char *
45764 ix86_mangle_type (const_tree type)
45765 {
45766 type = TYPE_MAIN_VARIANT (type);
45767
45768 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45769 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45770 return NULL;
45771
45772 switch (TYPE_MODE (type))
45773 {
45774 case TFmode:
45775 /* __float128 is "g". */
45776 return "g";
45777 case XFmode:
45778 /* "long double" or __float80 is "e". */
45779 return "e";
45780 default:
45781 return NULL;
45782 }
45783 }
45784
45785 /* For 32-bit code we can save PIC register setup by using
45786 __stack_chk_fail_local hidden function instead of calling
45787 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45788 register, so it is better to call __stack_chk_fail directly. */
45789
45790 static tree ATTRIBUTE_UNUSED
45791 ix86_stack_protect_fail (void)
45792 {
45793 return TARGET_64BIT
45794 ? default_external_stack_protect_fail ()
45795 : default_hidden_stack_protect_fail ();
45796 }
45797
45798 /* Select a format to encode pointers in exception handling data. CODE
45799 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45800 true if the symbol may be affected by dynamic relocations.
45801
45802 ??? All x86 object file formats are capable of representing this.
45803 After all, the relocation needed is the same as for the call insn.
45804 Whether or not a particular assembler allows us to enter such, I
45805 guess we'll have to see. */
45806 int
45807 asm_preferred_eh_data_format (int code, int global)
45808 {
45809 if (flag_pic)
45810 {
45811 int type = DW_EH_PE_sdata8;
45812 if (!TARGET_64BIT
45813 || ix86_cmodel == CM_SMALL_PIC
45814 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45815 type = DW_EH_PE_sdata4;
45816 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45817 }
45818 if (ix86_cmodel == CM_SMALL
45819 || (ix86_cmodel == CM_MEDIUM && code))
45820 return DW_EH_PE_udata4;
45821 return DW_EH_PE_absptr;
45822 }
45823 \f
45824 /* Expand copysign from SIGN to the positive value ABS_VALUE
45825 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45826 the sign-bit. */
45827 static void
45828 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45829 {
45830 machine_mode mode = GET_MODE (sign);
45831 rtx sgn = gen_reg_rtx (mode);
45832 if (mask == NULL_RTX)
45833 {
45834 machine_mode vmode;
45835
45836 if (mode == SFmode)
45837 vmode = V4SFmode;
45838 else if (mode == DFmode)
45839 vmode = V2DFmode;
45840 else
45841 vmode = mode;
45842
45843 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45844 if (!VECTOR_MODE_P (mode))
45845 {
45846 /* We need to generate a scalar mode mask in this case. */
45847 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45848 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45849 mask = gen_reg_rtx (mode);
45850 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45851 }
45852 }
45853 else
45854 mask = gen_rtx_NOT (mode, mask);
45855 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45856 gen_rtx_AND (mode, mask, sign)));
45857 emit_insn (gen_rtx_SET (VOIDmode, result,
45858 gen_rtx_IOR (mode, abs_value, sgn)));
45859 }
45860
45861 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45862 mask for masking out the sign-bit is stored in *SMASK, if that is
45863 non-null. */
45864 static rtx
45865 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45866 {
45867 machine_mode vmode, mode = GET_MODE (op0);
45868 rtx xa, mask;
45869
45870 xa = gen_reg_rtx (mode);
45871 if (mode == SFmode)
45872 vmode = V4SFmode;
45873 else if (mode == DFmode)
45874 vmode = V2DFmode;
45875 else
45876 vmode = mode;
45877 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45878 if (!VECTOR_MODE_P (mode))
45879 {
45880 /* We need to generate a scalar mode mask in this case. */
45881 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45882 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45883 mask = gen_reg_rtx (mode);
45884 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45885 }
45886 emit_insn (gen_rtx_SET (VOIDmode, xa,
45887 gen_rtx_AND (mode, op0, mask)));
45888
45889 if (smask)
45890 *smask = mask;
45891
45892 return xa;
45893 }
45894
45895 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45896 swapping the operands if SWAP_OPERANDS is true. The expanded
45897 code is a forward jump to a newly created label in case the
45898 comparison is true. The generated label rtx is returned. */
45899 static rtx_code_label *
45900 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45901 bool swap_operands)
45902 {
45903 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45904 rtx_code_label *label;
45905 rtx tmp;
45906
45907 if (swap_operands)
45908 std::swap (op0, op1);
45909
45910 label = gen_label_rtx ();
45911 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45912 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45913 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45914 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45915 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45916 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45917 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45918 JUMP_LABEL (tmp) = label;
45919
45920 return label;
45921 }
45922
45923 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45924 using comparison code CODE. Operands are swapped for the comparison if
45925 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45926 static rtx
45927 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45928 bool swap_operands)
45929 {
45930 rtx (*insn)(rtx, rtx, rtx, rtx);
45931 machine_mode mode = GET_MODE (op0);
45932 rtx mask = gen_reg_rtx (mode);
45933
45934 if (swap_operands)
45935 std::swap (op0, op1);
45936
45937 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45938
45939 emit_insn (insn (mask, op0, op1,
45940 gen_rtx_fmt_ee (code, mode, op0, op1)));
45941 return mask;
45942 }
45943
45944 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45945 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45946 static rtx
45947 ix86_gen_TWO52 (machine_mode mode)
45948 {
45949 REAL_VALUE_TYPE TWO52r;
45950 rtx TWO52;
45951
45952 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45953 TWO52 = const_double_from_real_value (TWO52r, mode);
45954 TWO52 = force_reg (mode, TWO52);
45955
45956 return TWO52;
45957 }
45958
45959 /* Expand SSE sequence for computing lround from OP1 storing
45960 into OP0. */
45961 void
45962 ix86_expand_lround (rtx op0, rtx op1)
45963 {
45964 /* C code for the stuff we're doing below:
45965 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45966 return (long)tmp;
45967 */
45968 machine_mode mode = GET_MODE (op1);
45969 const struct real_format *fmt;
45970 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45971 rtx adj;
45972
45973 /* load nextafter (0.5, 0.0) */
45974 fmt = REAL_MODE_FORMAT (mode);
45975 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45976 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45977
45978 /* adj = copysign (0.5, op1) */
45979 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45980 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45981
45982 /* adj = op1 + adj */
45983 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45984
45985 /* op0 = (imode)adj */
45986 expand_fix (op0, adj, 0);
45987 }
45988
45989 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45990 into OPERAND0. */
45991 void
45992 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45993 {
45994 /* C code for the stuff we're doing below (for do_floor):
45995 xi = (long)op1;
45996 xi -= (double)xi > op1 ? 1 : 0;
45997 return xi;
45998 */
45999 machine_mode fmode = GET_MODE (op1);
46000 machine_mode imode = GET_MODE (op0);
46001 rtx ireg, freg, tmp;
46002 rtx_code_label *label;
46003
46004 /* reg = (long)op1 */
46005 ireg = gen_reg_rtx (imode);
46006 expand_fix (ireg, op1, 0);
46007
46008 /* freg = (double)reg */
46009 freg = gen_reg_rtx (fmode);
46010 expand_float (freg, ireg, 0);
46011
46012 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46013 label = ix86_expand_sse_compare_and_jump (UNLE,
46014 freg, op1, !do_floor);
46015 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46016 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46017 emit_move_insn (ireg, tmp);
46018
46019 emit_label (label);
46020 LABEL_NUSES (label) = 1;
46021
46022 emit_move_insn (op0, ireg);
46023 }
46024
46025 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46026 result in OPERAND0. */
46027 void
46028 ix86_expand_rint (rtx operand0, rtx operand1)
46029 {
46030 /* C code for the stuff we're doing below:
46031 xa = fabs (operand1);
46032 if (!isless (xa, 2**52))
46033 return operand1;
46034 xa = xa + 2**52 - 2**52;
46035 return copysign (xa, operand1);
46036 */
46037 machine_mode mode = GET_MODE (operand0);
46038 rtx res, xa, TWO52, mask;
46039 rtx_code_label *label;
46040
46041 res = gen_reg_rtx (mode);
46042 emit_move_insn (res, operand1);
46043
46044 /* xa = abs (operand1) */
46045 xa = ix86_expand_sse_fabs (res, &mask);
46046
46047 /* if (!isless (xa, TWO52)) goto label; */
46048 TWO52 = ix86_gen_TWO52 (mode);
46049 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46050
46051 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46052 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46053
46054 ix86_sse_copysign_to_positive (res, xa, res, mask);
46055
46056 emit_label (label);
46057 LABEL_NUSES (label) = 1;
46058
46059 emit_move_insn (operand0, res);
46060 }
46061
46062 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46063 into OPERAND0. */
46064 void
46065 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46066 {
46067 /* C code for the stuff we expand below.
46068 double xa = fabs (x), x2;
46069 if (!isless (xa, TWO52))
46070 return x;
46071 xa = xa + TWO52 - TWO52;
46072 x2 = copysign (xa, x);
46073 Compensate. Floor:
46074 if (x2 > x)
46075 x2 -= 1;
46076 Compensate. Ceil:
46077 if (x2 < x)
46078 x2 -= -1;
46079 return x2;
46080 */
46081 machine_mode mode = GET_MODE (operand0);
46082 rtx xa, TWO52, tmp, one, res, mask;
46083 rtx_code_label *label;
46084
46085 TWO52 = ix86_gen_TWO52 (mode);
46086
46087 /* Temporary for holding the result, initialized to the input
46088 operand to ease control flow. */
46089 res = gen_reg_rtx (mode);
46090 emit_move_insn (res, operand1);
46091
46092 /* xa = abs (operand1) */
46093 xa = ix86_expand_sse_fabs (res, &mask);
46094
46095 /* if (!isless (xa, TWO52)) goto label; */
46096 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46097
46098 /* xa = xa + TWO52 - TWO52; */
46099 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46100 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46101
46102 /* xa = copysign (xa, operand1) */
46103 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46104
46105 /* generate 1.0 or -1.0 */
46106 one = force_reg (mode,
46107 const_double_from_real_value (do_floor
46108 ? dconst1 : dconstm1, mode));
46109
46110 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46111 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46112 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46113 gen_rtx_AND (mode, one, tmp)));
46114 /* We always need to subtract here to preserve signed zero. */
46115 tmp = expand_simple_binop (mode, MINUS,
46116 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46117 emit_move_insn (res, tmp);
46118
46119 emit_label (label);
46120 LABEL_NUSES (label) = 1;
46121
46122 emit_move_insn (operand0, res);
46123 }
46124
46125 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46126 into OPERAND0. */
46127 void
46128 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46129 {
46130 /* C code for the stuff we expand below.
46131 double xa = fabs (x), x2;
46132 if (!isless (xa, TWO52))
46133 return x;
46134 x2 = (double)(long)x;
46135 Compensate. Floor:
46136 if (x2 > x)
46137 x2 -= 1;
46138 Compensate. Ceil:
46139 if (x2 < x)
46140 x2 += 1;
46141 if (HONOR_SIGNED_ZEROS (mode))
46142 return copysign (x2, x);
46143 return x2;
46144 */
46145 machine_mode mode = GET_MODE (operand0);
46146 rtx xa, xi, TWO52, tmp, one, res, mask;
46147 rtx_code_label *label;
46148
46149 TWO52 = ix86_gen_TWO52 (mode);
46150
46151 /* Temporary for holding the result, initialized to the input
46152 operand to ease control flow. */
46153 res = gen_reg_rtx (mode);
46154 emit_move_insn (res, operand1);
46155
46156 /* xa = abs (operand1) */
46157 xa = ix86_expand_sse_fabs (res, &mask);
46158
46159 /* if (!isless (xa, TWO52)) goto label; */
46160 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46161
46162 /* xa = (double)(long)x */
46163 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46164 expand_fix (xi, res, 0);
46165 expand_float (xa, xi, 0);
46166
46167 /* generate 1.0 */
46168 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46169
46170 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46171 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46172 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46173 gen_rtx_AND (mode, one, tmp)));
46174 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46175 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46176 emit_move_insn (res, tmp);
46177
46178 if (HONOR_SIGNED_ZEROS (mode))
46179 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46180
46181 emit_label (label);
46182 LABEL_NUSES (label) = 1;
46183
46184 emit_move_insn (operand0, res);
46185 }
46186
46187 /* Expand SSE sequence for computing round from OPERAND1 storing
46188 into OPERAND0. Sequence that works without relying on DImode truncation
46189 via cvttsd2siq that is only available on 64bit targets. */
46190 void
46191 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46192 {
46193 /* C code for the stuff we expand below.
46194 double xa = fabs (x), xa2, x2;
46195 if (!isless (xa, TWO52))
46196 return x;
46197 Using the absolute value and copying back sign makes
46198 -0.0 -> -0.0 correct.
46199 xa2 = xa + TWO52 - TWO52;
46200 Compensate.
46201 dxa = xa2 - xa;
46202 if (dxa <= -0.5)
46203 xa2 += 1;
46204 else if (dxa > 0.5)
46205 xa2 -= 1;
46206 x2 = copysign (xa2, x);
46207 return x2;
46208 */
46209 machine_mode mode = GET_MODE (operand0);
46210 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46211 rtx_code_label *label;
46212
46213 TWO52 = ix86_gen_TWO52 (mode);
46214
46215 /* Temporary for holding the result, initialized to the input
46216 operand to ease control flow. */
46217 res = gen_reg_rtx (mode);
46218 emit_move_insn (res, operand1);
46219
46220 /* xa = abs (operand1) */
46221 xa = ix86_expand_sse_fabs (res, &mask);
46222
46223 /* if (!isless (xa, TWO52)) goto label; */
46224 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46225
46226 /* xa2 = xa + TWO52 - TWO52; */
46227 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46228 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46229
46230 /* dxa = xa2 - xa; */
46231 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46232
46233 /* generate 0.5, 1.0 and -0.5 */
46234 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46235 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46236 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46237 0, OPTAB_DIRECT);
46238
46239 /* Compensate. */
46240 tmp = gen_reg_rtx (mode);
46241 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46242 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46243 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46244 gen_rtx_AND (mode, one, tmp)));
46245 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46246 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46247 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46248 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46249 gen_rtx_AND (mode, one, tmp)));
46250 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46251
46252 /* res = copysign (xa2, operand1) */
46253 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46254
46255 emit_label (label);
46256 LABEL_NUSES (label) = 1;
46257
46258 emit_move_insn (operand0, res);
46259 }
46260
46261 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46262 into OPERAND0. */
46263 void
46264 ix86_expand_trunc (rtx operand0, rtx operand1)
46265 {
46266 /* C code for SSE variant we expand below.
46267 double xa = fabs (x), x2;
46268 if (!isless (xa, TWO52))
46269 return x;
46270 x2 = (double)(long)x;
46271 if (HONOR_SIGNED_ZEROS (mode))
46272 return copysign (x2, x);
46273 return x2;
46274 */
46275 machine_mode mode = GET_MODE (operand0);
46276 rtx xa, xi, TWO52, res, mask;
46277 rtx_code_label *label;
46278
46279 TWO52 = ix86_gen_TWO52 (mode);
46280
46281 /* Temporary for holding the result, initialized to the input
46282 operand to ease control flow. */
46283 res = gen_reg_rtx (mode);
46284 emit_move_insn (res, operand1);
46285
46286 /* xa = abs (operand1) */
46287 xa = ix86_expand_sse_fabs (res, &mask);
46288
46289 /* if (!isless (xa, TWO52)) goto label; */
46290 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46291
46292 /* x = (double)(long)x */
46293 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46294 expand_fix (xi, res, 0);
46295 expand_float (res, xi, 0);
46296
46297 if (HONOR_SIGNED_ZEROS (mode))
46298 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46299
46300 emit_label (label);
46301 LABEL_NUSES (label) = 1;
46302
46303 emit_move_insn (operand0, res);
46304 }
46305
46306 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46307 into OPERAND0. */
46308 void
46309 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46310 {
46311 machine_mode mode = GET_MODE (operand0);
46312 rtx xa, mask, TWO52, one, res, smask, tmp;
46313 rtx_code_label *label;
46314
46315 /* C code for SSE variant we expand below.
46316 double xa = fabs (x), x2;
46317 if (!isless (xa, TWO52))
46318 return x;
46319 xa2 = xa + TWO52 - TWO52;
46320 Compensate:
46321 if (xa2 > xa)
46322 xa2 -= 1.0;
46323 x2 = copysign (xa2, x);
46324 return x2;
46325 */
46326
46327 TWO52 = ix86_gen_TWO52 (mode);
46328
46329 /* Temporary for holding the result, initialized to the input
46330 operand to ease control flow. */
46331 res = gen_reg_rtx (mode);
46332 emit_move_insn (res, operand1);
46333
46334 /* xa = abs (operand1) */
46335 xa = ix86_expand_sse_fabs (res, &smask);
46336
46337 /* if (!isless (xa, TWO52)) goto label; */
46338 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46339
46340 /* res = xa + TWO52 - TWO52; */
46341 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46342 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46343 emit_move_insn (res, tmp);
46344
46345 /* generate 1.0 */
46346 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46347
46348 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46349 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46350 emit_insn (gen_rtx_SET (VOIDmode, mask,
46351 gen_rtx_AND (mode, mask, one)));
46352 tmp = expand_simple_binop (mode, MINUS,
46353 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46354 emit_move_insn (res, tmp);
46355
46356 /* res = copysign (res, operand1) */
46357 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46358
46359 emit_label (label);
46360 LABEL_NUSES (label) = 1;
46361
46362 emit_move_insn (operand0, res);
46363 }
46364
46365 /* Expand SSE sequence for computing round from OPERAND1 storing
46366 into OPERAND0. */
46367 void
46368 ix86_expand_round (rtx operand0, rtx operand1)
46369 {
46370 /* C code for the stuff we're doing below:
46371 double xa = fabs (x);
46372 if (!isless (xa, TWO52))
46373 return x;
46374 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46375 return copysign (xa, x);
46376 */
46377 machine_mode mode = GET_MODE (operand0);
46378 rtx res, TWO52, xa, xi, half, mask;
46379 rtx_code_label *label;
46380 const struct real_format *fmt;
46381 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46382
46383 /* Temporary for holding the result, initialized to the input
46384 operand to ease control flow. */
46385 res = gen_reg_rtx (mode);
46386 emit_move_insn (res, operand1);
46387
46388 TWO52 = ix86_gen_TWO52 (mode);
46389 xa = ix86_expand_sse_fabs (res, &mask);
46390 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46391
46392 /* load nextafter (0.5, 0.0) */
46393 fmt = REAL_MODE_FORMAT (mode);
46394 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46395 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46396
46397 /* xa = xa + 0.5 */
46398 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46399 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46400
46401 /* xa = (double)(int64_t)xa */
46402 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46403 expand_fix (xi, xa, 0);
46404 expand_float (xa, xi, 0);
46405
46406 /* res = copysign (xa, operand1) */
46407 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46408
46409 emit_label (label);
46410 LABEL_NUSES (label) = 1;
46411
46412 emit_move_insn (operand0, res);
46413 }
46414
46415 /* Expand SSE sequence for computing round
46416 from OP1 storing into OP0 using sse4 round insn. */
46417 void
46418 ix86_expand_round_sse4 (rtx op0, rtx op1)
46419 {
46420 machine_mode mode = GET_MODE (op0);
46421 rtx e1, e2, res, half;
46422 const struct real_format *fmt;
46423 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46424 rtx (*gen_copysign) (rtx, rtx, rtx);
46425 rtx (*gen_round) (rtx, rtx, rtx);
46426
46427 switch (mode)
46428 {
46429 case SFmode:
46430 gen_copysign = gen_copysignsf3;
46431 gen_round = gen_sse4_1_roundsf2;
46432 break;
46433 case DFmode:
46434 gen_copysign = gen_copysigndf3;
46435 gen_round = gen_sse4_1_rounddf2;
46436 break;
46437 default:
46438 gcc_unreachable ();
46439 }
46440
46441 /* round (a) = trunc (a + copysign (0.5, a)) */
46442
46443 /* load nextafter (0.5, 0.0) */
46444 fmt = REAL_MODE_FORMAT (mode);
46445 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46446 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46447 half = const_double_from_real_value (pred_half, mode);
46448
46449 /* e1 = copysign (0.5, op1) */
46450 e1 = gen_reg_rtx (mode);
46451 emit_insn (gen_copysign (e1, half, op1));
46452
46453 /* e2 = op1 + e1 */
46454 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46455
46456 /* res = trunc (e2) */
46457 res = gen_reg_rtx (mode);
46458 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46459
46460 emit_move_insn (op0, res);
46461 }
46462 \f
46463
46464 /* Table of valid machine attributes. */
46465 static const struct attribute_spec ix86_attribute_table[] =
46466 {
46467 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46468 affects_type_identity } */
46469 /* Stdcall attribute says callee is responsible for popping arguments
46470 if they are not variable. */
46471 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46472 true },
46473 /* Fastcall attribute says callee is responsible for popping arguments
46474 if they are not variable. */
46475 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46476 true },
46477 /* Thiscall attribute says callee is responsible for popping arguments
46478 if they are not variable. */
46479 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46480 true },
46481 /* Cdecl attribute says the callee is a normal C declaration */
46482 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46483 true },
46484 /* Regparm attribute specifies how many integer arguments are to be
46485 passed in registers. */
46486 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46487 true },
46488 /* Sseregparm attribute says we are using x86_64 calling conventions
46489 for FP arguments. */
46490 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46491 true },
46492 /* The transactional memory builtins are implicitly regparm or fastcall
46493 depending on the ABI. Override the generic do-nothing attribute that
46494 these builtins were declared with. */
46495 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46496 true },
46497 /* force_align_arg_pointer says this function realigns the stack at entry. */
46498 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46499 false, true, true, ix86_handle_cconv_attribute, false },
46500 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46501 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46502 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46503 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46504 false },
46505 #endif
46506 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46507 false },
46508 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46509 false },
46510 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46511 SUBTARGET_ATTRIBUTE_TABLE,
46512 #endif
46513 /* ms_abi and sysv_abi calling convention function attributes. */
46514 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46515 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46516 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46517 false },
46518 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46519 ix86_handle_callee_pop_aggregate_return, true },
46520 /* End element. */
46521 { NULL, 0, 0, false, false, false, NULL, false }
46522 };
46523
46524 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46525 static int
46526 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46527 tree vectype, int)
46528 {
46529 unsigned elements;
46530
46531 switch (type_of_cost)
46532 {
46533 case scalar_stmt:
46534 return ix86_cost->scalar_stmt_cost;
46535
46536 case scalar_load:
46537 return ix86_cost->scalar_load_cost;
46538
46539 case scalar_store:
46540 return ix86_cost->scalar_store_cost;
46541
46542 case vector_stmt:
46543 return ix86_cost->vec_stmt_cost;
46544
46545 case vector_load:
46546 return ix86_cost->vec_align_load_cost;
46547
46548 case vector_store:
46549 return ix86_cost->vec_store_cost;
46550
46551 case vec_to_scalar:
46552 return ix86_cost->vec_to_scalar_cost;
46553
46554 case scalar_to_vec:
46555 return ix86_cost->scalar_to_vec_cost;
46556
46557 case unaligned_load:
46558 case unaligned_store:
46559 return ix86_cost->vec_unalign_load_cost;
46560
46561 case cond_branch_taken:
46562 return ix86_cost->cond_taken_branch_cost;
46563
46564 case cond_branch_not_taken:
46565 return ix86_cost->cond_not_taken_branch_cost;
46566
46567 case vec_perm:
46568 case vec_promote_demote:
46569 return ix86_cost->vec_stmt_cost;
46570
46571 case vec_construct:
46572 elements = TYPE_VECTOR_SUBPARTS (vectype);
46573 return elements / 2 + 1;
46574
46575 default:
46576 gcc_unreachable ();
46577 }
46578 }
46579
46580 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46581 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46582 insn every time. */
46583
46584 static GTY(()) rtx_insn *vselect_insn;
46585
46586 /* Initialize vselect_insn. */
46587
46588 static void
46589 init_vselect_insn (void)
46590 {
46591 unsigned i;
46592 rtx x;
46593
46594 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46595 for (i = 0; i < MAX_VECT_LEN; ++i)
46596 XVECEXP (x, 0, i) = const0_rtx;
46597 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46598 const0_rtx), x);
46599 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46600 start_sequence ();
46601 vselect_insn = emit_insn (x);
46602 end_sequence ();
46603 }
46604
46605 /* Construct (set target (vec_select op0 (parallel perm))) and
46606 return true if that's a valid instruction in the active ISA. */
46607
46608 static bool
46609 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46610 unsigned nelt, bool testing_p)
46611 {
46612 unsigned int i;
46613 rtx x, save_vconcat;
46614 int icode;
46615
46616 if (vselect_insn == NULL_RTX)
46617 init_vselect_insn ();
46618
46619 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46620 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46621 for (i = 0; i < nelt; ++i)
46622 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46623 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46624 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46625 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46626 SET_DEST (PATTERN (vselect_insn)) = target;
46627 icode = recog_memoized (vselect_insn);
46628
46629 if (icode >= 0 && !testing_p)
46630 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46631
46632 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46633 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46634 INSN_CODE (vselect_insn) = -1;
46635
46636 return icode >= 0;
46637 }
46638
46639 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46640
46641 static bool
46642 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46643 const unsigned char *perm, unsigned nelt,
46644 bool testing_p)
46645 {
46646 machine_mode v2mode;
46647 rtx x;
46648 bool ok;
46649
46650 if (vselect_insn == NULL_RTX)
46651 init_vselect_insn ();
46652
46653 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46654 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46655 PUT_MODE (x, v2mode);
46656 XEXP (x, 0) = op0;
46657 XEXP (x, 1) = op1;
46658 ok = expand_vselect (target, x, perm, nelt, testing_p);
46659 XEXP (x, 0) = const0_rtx;
46660 XEXP (x, 1) = const0_rtx;
46661 return ok;
46662 }
46663
46664 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46665 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46666
46667 static bool
46668 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46669 {
46670 machine_mode vmode = d->vmode;
46671 unsigned i, mask, nelt = d->nelt;
46672 rtx target, op0, op1, x;
46673 rtx rperm[32], vperm;
46674
46675 if (d->one_operand_p)
46676 return false;
46677 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46678 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46679 ;
46680 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46681 ;
46682 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46683 ;
46684 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46685 ;
46686 else
46687 return false;
46688
46689 /* This is a blend, not a permute. Elements must stay in their
46690 respective lanes. */
46691 for (i = 0; i < nelt; ++i)
46692 {
46693 unsigned e = d->perm[i];
46694 if (!(e == i || e == i + nelt))
46695 return false;
46696 }
46697
46698 if (d->testing_p)
46699 return true;
46700
46701 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46702 decision should be extracted elsewhere, so that we only try that
46703 sequence once all budget==3 options have been tried. */
46704 target = d->target;
46705 op0 = d->op0;
46706 op1 = d->op1;
46707 mask = 0;
46708
46709 switch (vmode)
46710 {
46711 case V8DFmode:
46712 case V16SFmode:
46713 case V4DFmode:
46714 case V8SFmode:
46715 case V2DFmode:
46716 case V4SFmode:
46717 case V8HImode:
46718 case V8SImode:
46719 case V32HImode:
46720 case V64QImode:
46721 case V16SImode:
46722 case V8DImode:
46723 for (i = 0; i < nelt; ++i)
46724 mask |= (d->perm[i] >= nelt) << i;
46725 break;
46726
46727 case V2DImode:
46728 for (i = 0; i < 2; ++i)
46729 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46730 vmode = V8HImode;
46731 goto do_subreg;
46732
46733 case V4SImode:
46734 for (i = 0; i < 4; ++i)
46735 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46736 vmode = V8HImode;
46737 goto do_subreg;
46738
46739 case V16QImode:
46740 /* See if bytes move in pairs so we can use pblendw with
46741 an immediate argument, rather than pblendvb with a vector
46742 argument. */
46743 for (i = 0; i < 16; i += 2)
46744 if (d->perm[i] + 1 != d->perm[i + 1])
46745 {
46746 use_pblendvb:
46747 for (i = 0; i < nelt; ++i)
46748 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46749
46750 finish_pblendvb:
46751 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46752 vperm = force_reg (vmode, vperm);
46753
46754 if (GET_MODE_SIZE (vmode) == 16)
46755 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46756 else
46757 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46758 if (target != d->target)
46759 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46760 return true;
46761 }
46762
46763 for (i = 0; i < 8; ++i)
46764 mask |= (d->perm[i * 2] >= 16) << i;
46765 vmode = V8HImode;
46766 /* FALLTHRU */
46767
46768 do_subreg:
46769 target = gen_reg_rtx (vmode);
46770 op0 = gen_lowpart (vmode, op0);
46771 op1 = gen_lowpart (vmode, op1);
46772 break;
46773
46774 case V32QImode:
46775 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46776 for (i = 0; i < 32; i += 2)
46777 if (d->perm[i] + 1 != d->perm[i + 1])
46778 goto use_pblendvb;
46779 /* See if bytes move in quadruplets. If yes, vpblendd
46780 with immediate can be used. */
46781 for (i = 0; i < 32; i += 4)
46782 if (d->perm[i] + 2 != d->perm[i + 2])
46783 break;
46784 if (i < 32)
46785 {
46786 /* See if bytes move the same in both lanes. If yes,
46787 vpblendw with immediate can be used. */
46788 for (i = 0; i < 16; i += 2)
46789 if (d->perm[i] + 16 != d->perm[i + 16])
46790 goto use_pblendvb;
46791
46792 /* Use vpblendw. */
46793 for (i = 0; i < 16; ++i)
46794 mask |= (d->perm[i * 2] >= 32) << i;
46795 vmode = V16HImode;
46796 goto do_subreg;
46797 }
46798
46799 /* Use vpblendd. */
46800 for (i = 0; i < 8; ++i)
46801 mask |= (d->perm[i * 4] >= 32) << i;
46802 vmode = V8SImode;
46803 goto do_subreg;
46804
46805 case V16HImode:
46806 /* See if words move in pairs. If yes, vpblendd can be used. */
46807 for (i = 0; i < 16; i += 2)
46808 if (d->perm[i] + 1 != d->perm[i + 1])
46809 break;
46810 if (i < 16)
46811 {
46812 /* See if words move the same in both lanes. If not,
46813 vpblendvb must be used. */
46814 for (i = 0; i < 8; i++)
46815 if (d->perm[i] + 8 != d->perm[i + 8])
46816 {
46817 /* Use vpblendvb. */
46818 for (i = 0; i < 32; ++i)
46819 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46820
46821 vmode = V32QImode;
46822 nelt = 32;
46823 target = gen_reg_rtx (vmode);
46824 op0 = gen_lowpart (vmode, op0);
46825 op1 = gen_lowpart (vmode, op1);
46826 goto finish_pblendvb;
46827 }
46828
46829 /* Use vpblendw. */
46830 for (i = 0; i < 16; ++i)
46831 mask |= (d->perm[i] >= 16) << i;
46832 break;
46833 }
46834
46835 /* Use vpblendd. */
46836 for (i = 0; i < 8; ++i)
46837 mask |= (d->perm[i * 2] >= 16) << i;
46838 vmode = V8SImode;
46839 goto do_subreg;
46840
46841 case V4DImode:
46842 /* Use vpblendd. */
46843 for (i = 0; i < 4; ++i)
46844 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46845 vmode = V8SImode;
46846 goto do_subreg;
46847
46848 default:
46849 gcc_unreachable ();
46850 }
46851
46852 /* This matches five different patterns with the different modes. */
46853 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46854 x = gen_rtx_SET (VOIDmode, target, x);
46855 emit_insn (x);
46856 if (target != d->target)
46857 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46858
46859 return true;
46860 }
46861
46862 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46863 in terms of the variable form of vpermilps.
46864
46865 Note that we will have already failed the immediate input vpermilps,
46866 which requires that the high and low part shuffle be identical; the
46867 variable form doesn't require that. */
46868
46869 static bool
46870 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46871 {
46872 rtx rperm[8], vperm;
46873 unsigned i;
46874
46875 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46876 return false;
46877
46878 /* We can only permute within the 128-bit lane. */
46879 for (i = 0; i < 8; ++i)
46880 {
46881 unsigned e = d->perm[i];
46882 if (i < 4 ? e >= 4 : e < 4)
46883 return false;
46884 }
46885
46886 if (d->testing_p)
46887 return true;
46888
46889 for (i = 0; i < 8; ++i)
46890 {
46891 unsigned e = d->perm[i];
46892
46893 /* Within each 128-bit lane, the elements of op0 are numbered
46894 from 0 and the elements of op1 are numbered from 4. */
46895 if (e >= 8 + 4)
46896 e -= 8;
46897 else if (e >= 4)
46898 e -= 4;
46899
46900 rperm[i] = GEN_INT (e);
46901 }
46902
46903 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46904 vperm = force_reg (V8SImode, vperm);
46905 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46906
46907 return true;
46908 }
46909
46910 /* Return true if permutation D can be performed as VMODE permutation
46911 instead. */
46912
46913 static bool
46914 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46915 {
46916 unsigned int i, j, chunk;
46917
46918 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46919 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46920 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46921 return false;
46922
46923 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46924 return true;
46925
46926 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46927 for (i = 0; i < d->nelt; i += chunk)
46928 if (d->perm[i] & (chunk - 1))
46929 return false;
46930 else
46931 for (j = 1; j < chunk; ++j)
46932 if (d->perm[i] + j != d->perm[i + j])
46933 return false;
46934
46935 return true;
46936 }
46937
46938 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46939 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46940
46941 static bool
46942 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46943 {
46944 unsigned i, nelt, eltsz, mask;
46945 unsigned char perm[64];
46946 machine_mode vmode = V16QImode;
46947 rtx rperm[64], vperm, target, op0, op1;
46948
46949 nelt = d->nelt;
46950
46951 if (!d->one_operand_p)
46952 {
46953 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46954 {
46955 if (TARGET_AVX2
46956 && valid_perm_using_mode_p (V2TImode, d))
46957 {
46958 if (d->testing_p)
46959 return true;
46960
46961 /* Use vperm2i128 insn. The pattern uses
46962 V4DImode instead of V2TImode. */
46963 target = d->target;
46964 if (d->vmode != V4DImode)
46965 target = gen_reg_rtx (V4DImode);
46966 op0 = gen_lowpart (V4DImode, d->op0);
46967 op1 = gen_lowpart (V4DImode, d->op1);
46968 rperm[0]
46969 = GEN_INT ((d->perm[0] / (nelt / 2))
46970 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46971 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46972 if (target != d->target)
46973 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46974 return true;
46975 }
46976 return false;
46977 }
46978 }
46979 else
46980 {
46981 if (GET_MODE_SIZE (d->vmode) == 16)
46982 {
46983 if (!TARGET_SSSE3)
46984 return false;
46985 }
46986 else if (GET_MODE_SIZE (d->vmode) == 32)
46987 {
46988 if (!TARGET_AVX2)
46989 return false;
46990
46991 /* V4DImode should be already handled through
46992 expand_vselect by vpermq instruction. */
46993 gcc_assert (d->vmode != V4DImode);
46994
46995 vmode = V32QImode;
46996 if (d->vmode == V8SImode
46997 || d->vmode == V16HImode
46998 || d->vmode == V32QImode)
46999 {
47000 /* First see if vpermq can be used for
47001 V8SImode/V16HImode/V32QImode. */
47002 if (valid_perm_using_mode_p (V4DImode, d))
47003 {
47004 for (i = 0; i < 4; i++)
47005 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47006 if (d->testing_p)
47007 return true;
47008 target = gen_reg_rtx (V4DImode);
47009 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47010 perm, 4, false))
47011 {
47012 emit_move_insn (d->target,
47013 gen_lowpart (d->vmode, target));
47014 return true;
47015 }
47016 return false;
47017 }
47018
47019 /* Next see if vpermd can be used. */
47020 if (valid_perm_using_mode_p (V8SImode, d))
47021 vmode = V8SImode;
47022 }
47023 /* Or if vpermps can be used. */
47024 else if (d->vmode == V8SFmode)
47025 vmode = V8SImode;
47026
47027 if (vmode == V32QImode)
47028 {
47029 /* vpshufb only works intra lanes, it is not
47030 possible to shuffle bytes in between the lanes. */
47031 for (i = 0; i < nelt; ++i)
47032 if ((d->perm[i] ^ i) & (nelt / 2))
47033 return false;
47034 }
47035 }
47036 else if (GET_MODE_SIZE (d->vmode) == 64)
47037 {
47038 if (!TARGET_AVX512BW)
47039 return false;
47040
47041 /* If vpermq didn't work, vpshufb won't work either. */
47042 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47043 return false;
47044
47045 vmode = V64QImode;
47046 if (d->vmode == V16SImode
47047 || d->vmode == V32HImode
47048 || d->vmode == V64QImode)
47049 {
47050 /* First see if vpermq can be used for
47051 V16SImode/V32HImode/V64QImode. */
47052 if (valid_perm_using_mode_p (V8DImode, d))
47053 {
47054 for (i = 0; i < 8; i++)
47055 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47056 if (d->testing_p)
47057 return true;
47058 target = gen_reg_rtx (V8DImode);
47059 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47060 perm, 8, false))
47061 {
47062 emit_move_insn (d->target,
47063 gen_lowpart (d->vmode, target));
47064 return true;
47065 }
47066 return false;
47067 }
47068
47069 /* Next see if vpermd can be used. */
47070 if (valid_perm_using_mode_p (V16SImode, d))
47071 vmode = V16SImode;
47072 }
47073 /* Or if vpermps can be used. */
47074 else if (d->vmode == V16SFmode)
47075 vmode = V16SImode;
47076 if (vmode == V64QImode)
47077 {
47078 /* vpshufb only works intra lanes, it is not
47079 possible to shuffle bytes in between the lanes. */
47080 for (i = 0; i < nelt; ++i)
47081 if ((d->perm[i] ^ i) & (nelt / 4))
47082 return false;
47083 }
47084 }
47085 else
47086 return false;
47087 }
47088
47089 if (d->testing_p)
47090 return true;
47091
47092 if (vmode == V8SImode)
47093 for (i = 0; i < 8; ++i)
47094 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47095 else if (vmode == V16SImode)
47096 for (i = 0; i < 16; ++i)
47097 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47098 else
47099 {
47100 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47101 if (!d->one_operand_p)
47102 mask = 2 * nelt - 1;
47103 else if (vmode == V16QImode)
47104 mask = nelt - 1;
47105 else if (vmode == V64QImode)
47106 mask = nelt / 4 - 1;
47107 else
47108 mask = nelt / 2 - 1;
47109
47110 for (i = 0; i < nelt; ++i)
47111 {
47112 unsigned j, e = d->perm[i] & mask;
47113 for (j = 0; j < eltsz; ++j)
47114 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47115 }
47116 }
47117
47118 vperm = gen_rtx_CONST_VECTOR (vmode,
47119 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47120 vperm = force_reg (vmode, vperm);
47121
47122 target = d->target;
47123 if (d->vmode != vmode)
47124 target = gen_reg_rtx (vmode);
47125 op0 = gen_lowpart (vmode, d->op0);
47126 if (d->one_operand_p)
47127 {
47128 if (vmode == V16QImode)
47129 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47130 else if (vmode == V32QImode)
47131 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47132 else if (vmode == V64QImode)
47133 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47134 else if (vmode == V8SFmode)
47135 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47136 else if (vmode == V8SImode)
47137 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47138 else if (vmode == V16SFmode)
47139 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47140 else if (vmode == V16SImode)
47141 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47142 else
47143 gcc_unreachable ();
47144 }
47145 else
47146 {
47147 op1 = gen_lowpart (vmode, d->op1);
47148 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47149 }
47150 if (target != d->target)
47151 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47152
47153 return true;
47154 }
47155
47156 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47157 in a single instruction. */
47158
47159 static bool
47160 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47161 {
47162 unsigned i, nelt = d->nelt;
47163 unsigned char perm2[MAX_VECT_LEN];
47164
47165 /* Check plain VEC_SELECT first, because AVX has instructions that could
47166 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47167 input where SEL+CONCAT may not. */
47168 if (d->one_operand_p)
47169 {
47170 int mask = nelt - 1;
47171 bool identity_perm = true;
47172 bool broadcast_perm = true;
47173
47174 for (i = 0; i < nelt; i++)
47175 {
47176 perm2[i] = d->perm[i] & mask;
47177 if (perm2[i] != i)
47178 identity_perm = false;
47179 if (perm2[i])
47180 broadcast_perm = false;
47181 }
47182
47183 if (identity_perm)
47184 {
47185 if (!d->testing_p)
47186 emit_move_insn (d->target, d->op0);
47187 return true;
47188 }
47189 else if (broadcast_perm && TARGET_AVX2)
47190 {
47191 /* Use vpbroadcast{b,w,d}. */
47192 rtx (*gen) (rtx, rtx) = NULL;
47193 switch (d->vmode)
47194 {
47195 case V64QImode:
47196 if (TARGET_AVX512BW)
47197 gen = gen_avx512bw_vec_dupv64qi_1;
47198 break;
47199 case V32QImode:
47200 gen = gen_avx2_pbroadcastv32qi_1;
47201 break;
47202 case V32HImode:
47203 if (TARGET_AVX512BW)
47204 gen = gen_avx512bw_vec_dupv32hi_1;
47205 break;
47206 case V16HImode:
47207 gen = gen_avx2_pbroadcastv16hi_1;
47208 break;
47209 case V16SImode:
47210 if (TARGET_AVX512F)
47211 gen = gen_avx512f_vec_dupv16si_1;
47212 break;
47213 case V8SImode:
47214 gen = gen_avx2_pbroadcastv8si_1;
47215 break;
47216 case V16QImode:
47217 gen = gen_avx2_pbroadcastv16qi;
47218 break;
47219 case V8HImode:
47220 gen = gen_avx2_pbroadcastv8hi;
47221 break;
47222 case V16SFmode:
47223 if (TARGET_AVX512F)
47224 gen = gen_avx512f_vec_dupv16sf_1;
47225 break;
47226 case V8SFmode:
47227 gen = gen_avx2_vec_dupv8sf_1;
47228 break;
47229 case V8DFmode:
47230 if (TARGET_AVX512F)
47231 gen = gen_avx512f_vec_dupv8df_1;
47232 break;
47233 case V8DImode:
47234 if (TARGET_AVX512F)
47235 gen = gen_avx512f_vec_dupv8di_1;
47236 break;
47237 /* For other modes prefer other shuffles this function creates. */
47238 default: break;
47239 }
47240 if (gen != NULL)
47241 {
47242 if (!d->testing_p)
47243 emit_insn (gen (d->target, d->op0));
47244 return true;
47245 }
47246 }
47247
47248 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47249 return true;
47250
47251 /* There are plenty of patterns in sse.md that are written for
47252 SEL+CONCAT and are not replicated for a single op. Perhaps
47253 that should be changed, to avoid the nastiness here. */
47254
47255 /* Recognize interleave style patterns, which means incrementing
47256 every other permutation operand. */
47257 for (i = 0; i < nelt; i += 2)
47258 {
47259 perm2[i] = d->perm[i] & mask;
47260 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47261 }
47262 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47263 d->testing_p))
47264 return true;
47265
47266 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47267 if (nelt >= 4)
47268 {
47269 for (i = 0; i < nelt; i += 4)
47270 {
47271 perm2[i + 0] = d->perm[i + 0] & mask;
47272 perm2[i + 1] = d->perm[i + 1] & mask;
47273 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47274 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47275 }
47276
47277 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47278 d->testing_p))
47279 return true;
47280 }
47281 }
47282
47283 /* Finally, try the fully general two operand permute. */
47284 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47285 d->testing_p))
47286 return true;
47287
47288 /* Recognize interleave style patterns with reversed operands. */
47289 if (!d->one_operand_p)
47290 {
47291 for (i = 0; i < nelt; ++i)
47292 {
47293 unsigned e = d->perm[i];
47294 if (e >= nelt)
47295 e -= nelt;
47296 else
47297 e += nelt;
47298 perm2[i] = e;
47299 }
47300
47301 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47302 d->testing_p))
47303 return true;
47304 }
47305
47306 /* Try the SSE4.1 blend variable merge instructions. */
47307 if (expand_vec_perm_blend (d))
47308 return true;
47309
47310 /* Try one of the AVX vpermil variable permutations. */
47311 if (expand_vec_perm_vpermil (d))
47312 return true;
47313
47314 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47315 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47316 if (expand_vec_perm_pshufb (d))
47317 return true;
47318
47319 /* Try the AVX2 vpalignr instruction. */
47320 if (expand_vec_perm_palignr (d, true))
47321 return true;
47322
47323 /* Try the AVX512F vpermi2 instructions. */
47324 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47325 return true;
47326
47327 return false;
47328 }
47329
47330 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47331 in terms of a pair of pshuflw + pshufhw instructions. */
47332
47333 static bool
47334 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47335 {
47336 unsigned char perm2[MAX_VECT_LEN];
47337 unsigned i;
47338 bool ok;
47339
47340 if (d->vmode != V8HImode || !d->one_operand_p)
47341 return false;
47342
47343 /* The two permutations only operate in 64-bit lanes. */
47344 for (i = 0; i < 4; ++i)
47345 if (d->perm[i] >= 4)
47346 return false;
47347 for (i = 4; i < 8; ++i)
47348 if (d->perm[i] < 4)
47349 return false;
47350
47351 if (d->testing_p)
47352 return true;
47353
47354 /* Emit the pshuflw. */
47355 memcpy (perm2, d->perm, 4);
47356 for (i = 4; i < 8; ++i)
47357 perm2[i] = i;
47358 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47359 gcc_assert (ok);
47360
47361 /* Emit the pshufhw. */
47362 memcpy (perm2 + 4, d->perm + 4, 4);
47363 for (i = 0; i < 4; ++i)
47364 perm2[i] = i;
47365 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47366 gcc_assert (ok);
47367
47368 return true;
47369 }
47370
47371 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47372 the permutation using the SSSE3 palignr instruction. This succeeds
47373 when all of the elements in PERM fit within one vector and we merely
47374 need to shift them down so that a single vector permutation has a
47375 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47376 the vpalignr instruction itself can perform the requested permutation. */
47377
47378 static bool
47379 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47380 {
47381 unsigned i, nelt = d->nelt;
47382 unsigned min, max, minswap, maxswap;
47383 bool in_order, ok, swap = false;
47384 rtx shift, target;
47385 struct expand_vec_perm_d dcopy;
47386
47387 /* Even with AVX, palignr only operates on 128-bit vectors,
47388 in AVX2 palignr operates on both 128-bit lanes. */
47389 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47390 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47391 return false;
47392
47393 min = 2 * nelt;
47394 max = 0;
47395 minswap = 2 * nelt;
47396 maxswap = 0;
47397 for (i = 0; i < nelt; ++i)
47398 {
47399 unsigned e = d->perm[i];
47400 unsigned eswap = d->perm[i] ^ nelt;
47401 if (GET_MODE_SIZE (d->vmode) == 32)
47402 {
47403 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47404 eswap = e ^ (nelt / 2);
47405 }
47406 if (e < min)
47407 min = e;
47408 if (e > max)
47409 max = e;
47410 if (eswap < minswap)
47411 minswap = eswap;
47412 if (eswap > maxswap)
47413 maxswap = eswap;
47414 }
47415 if (min == 0
47416 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47417 {
47418 if (d->one_operand_p
47419 || minswap == 0
47420 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47421 ? nelt / 2 : nelt))
47422 return false;
47423 swap = true;
47424 min = minswap;
47425 max = maxswap;
47426 }
47427
47428 /* Given that we have SSSE3, we know we'll be able to implement the
47429 single operand permutation after the palignr with pshufb for
47430 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47431 first. */
47432 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47433 return true;
47434
47435 dcopy = *d;
47436 if (swap)
47437 {
47438 dcopy.op0 = d->op1;
47439 dcopy.op1 = d->op0;
47440 for (i = 0; i < nelt; ++i)
47441 dcopy.perm[i] ^= nelt;
47442 }
47443
47444 in_order = true;
47445 for (i = 0; i < nelt; ++i)
47446 {
47447 unsigned e = dcopy.perm[i];
47448 if (GET_MODE_SIZE (d->vmode) == 32
47449 && e >= nelt
47450 && (e & (nelt / 2 - 1)) < min)
47451 e = e - min - (nelt / 2);
47452 else
47453 e = e - min;
47454 if (e != i)
47455 in_order = false;
47456 dcopy.perm[i] = e;
47457 }
47458 dcopy.one_operand_p = true;
47459
47460 if (single_insn_only_p && !in_order)
47461 return false;
47462
47463 /* For AVX2, test whether we can permute the result in one instruction. */
47464 if (d->testing_p)
47465 {
47466 if (in_order)
47467 return true;
47468 dcopy.op1 = dcopy.op0;
47469 return expand_vec_perm_1 (&dcopy);
47470 }
47471
47472 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47473 if (GET_MODE_SIZE (d->vmode) == 16)
47474 {
47475 target = gen_reg_rtx (TImode);
47476 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47477 gen_lowpart (TImode, dcopy.op0), shift));
47478 }
47479 else
47480 {
47481 target = gen_reg_rtx (V2TImode);
47482 emit_insn (gen_avx2_palignrv2ti (target,
47483 gen_lowpart (V2TImode, dcopy.op1),
47484 gen_lowpart (V2TImode, dcopy.op0),
47485 shift));
47486 }
47487
47488 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47489
47490 /* Test for the degenerate case where the alignment by itself
47491 produces the desired permutation. */
47492 if (in_order)
47493 {
47494 emit_move_insn (d->target, dcopy.op0);
47495 return true;
47496 }
47497
47498 ok = expand_vec_perm_1 (&dcopy);
47499 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47500
47501 return ok;
47502 }
47503
47504 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47505 the permutation using the SSE4_1 pblendv instruction. Potentially
47506 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47507
47508 static bool
47509 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47510 {
47511 unsigned i, which, nelt = d->nelt;
47512 struct expand_vec_perm_d dcopy, dcopy1;
47513 machine_mode vmode = d->vmode;
47514 bool ok;
47515
47516 /* Use the same checks as in expand_vec_perm_blend. */
47517 if (d->one_operand_p)
47518 return false;
47519 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47520 ;
47521 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47522 ;
47523 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47524 ;
47525 else
47526 return false;
47527
47528 /* Figure out where permutation elements stay not in their
47529 respective lanes. */
47530 for (i = 0, which = 0; i < nelt; ++i)
47531 {
47532 unsigned e = d->perm[i];
47533 if (e != i)
47534 which |= (e < nelt ? 1 : 2);
47535 }
47536 /* We can pblend the part where elements stay not in their
47537 respective lanes only when these elements are all in one
47538 half of a permutation.
47539 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47540 lanes, but both 8 and 9 >= 8
47541 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47542 respective lanes and 8 >= 8, but 2 not. */
47543 if (which != 1 && which != 2)
47544 return false;
47545 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47546 return true;
47547
47548 /* First we apply one operand permutation to the part where
47549 elements stay not in their respective lanes. */
47550 dcopy = *d;
47551 if (which == 2)
47552 dcopy.op0 = dcopy.op1 = d->op1;
47553 else
47554 dcopy.op0 = dcopy.op1 = d->op0;
47555 dcopy.one_operand_p = true;
47556
47557 for (i = 0; i < nelt; ++i)
47558 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47559
47560 ok = expand_vec_perm_1 (&dcopy);
47561 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47562 return false;
47563 else
47564 gcc_assert (ok);
47565 if (d->testing_p)
47566 return true;
47567
47568 /* Next we put permuted elements into their positions. */
47569 dcopy1 = *d;
47570 if (which == 2)
47571 dcopy1.op1 = dcopy.target;
47572 else
47573 dcopy1.op0 = dcopy.target;
47574
47575 for (i = 0; i < nelt; ++i)
47576 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47577
47578 ok = expand_vec_perm_blend (&dcopy1);
47579 gcc_assert (ok);
47580
47581 return true;
47582 }
47583
47584 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47585
47586 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47587 a two vector permutation into a single vector permutation by using
47588 an interleave operation to merge the vectors. */
47589
47590 static bool
47591 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47592 {
47593 struct expand_vec_perm_d dremap, dfinal;
47594 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47595 unsigned HOST_WIDE_INT contents;
47596 unsigned char remap[2 * MAX_VECT_LEN];
47597 rtx_insn *seq;
47598 bool ok, same_halves = false;
47599
47600 if (GET_MODE_SIZE (d->vmode) == 16)
47601 {
47602 if (d->one_operand_p)
47603 return false;
47604 }
47605 else if (GET_MODE_SIZE (d->vmode) == 32)
47606 {
47607 if (!TARGET_AVX)
47608 return false;
47609 /* For 32-byte modes allow even d->one_operand_p.
47610 The lack of cross-lane shuffling in some instructions
47611 might prevent a single insn shuffle. */
47612 dfinal = *d;
47613 dfinal.testing_p = true;
47614 /* If expand_vec_perm_interleave3 can expand this into
47615 a 3 insn sequence, give up and let it be expanded as
47616 3 insn sequence. While that is one insn longer,
47617 it doesn't need a memory operand and in the common
47618 case that both interleave low and high permutations
47619 with the same operands are adjacent needs 4 insns
47620 for both after CSE. */
47621 if (expand_vec_perm_interleave3 (&dfinal))
47622 return false;
47623 }
47624 else
47625 return false;
47626
47627 /* Examine from whence the elements come. */
47628 contents = 0;
47629 for (i = 0; i < nelt; ++i)
47630 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47631
47632 memset (remap, 0xff, sizeof (remap));
47633 dremap = *d;
47634
47635 if (GET_MODE_SIZE (d->vmode) == 16)
47636 {
47637 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47638
47639 /* Split the two input vectors into 4 halves. */
47640 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47641 h2 = h1 << nelt2;
47642 h3 = h2 << nelt2;
47643 h4 = h3 << nelt2;
47644
47645 /* If the elements from the low halves use interleave low, and similarly
47646 for interleave high. If the elements are from mis-matched halves, we
47647 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47648 if ((contents & (h1 | h3)) == contents)
47649 {
47650 /* punpckl* */
47651 for (i = 0; i < nelt2; ++i)
47652 {
47653 remap[i] = i * 2;
47654 remap[i + nelt] = i * 2 + 1;
47655 dremap.perm[i * 2] = i;
47656 dremap.perm[i * 2 + 1] = i + nelt;
47657 }
47658 if (!TARGET_SSE2 && d->vmode == V4SImode)
47659 dremap.vmode = V4SFmode;
47660 }
47661 else if ((contents & (h2 | h4)) == contents)
47662 {
47663 /* punpckh* */
47664 for (i = 0; i < nelt2; ++i)
47665 {
47666 remap[i + nelt2] = i * 2;
47667 remap[i + nelt + nelt2] = i * 2 + 1;
47668 dremap.perm[i * 2] = i + nelt2;
47669 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47670 }
47671 if (!TARGET_SSE2 && d->vmode == V4SImode)
47672 dremap.vmode = V4SFmode;
47673 }
47674 else if ((contents & (h1 | h4)) == contents)
47675 {
47676 /* shufps */
47677 for (i = 0; i < nelt2; ++i)
47678 {
47679 remap[i] = i;
47680 remap[i + nelt + nelt2] = i + nelt2;
47681 dremap.perm[i] = i;
47682 dremap.perm[i + nelt2] = i + nelt + nelt2;
47683 }
47684 if (nelt != 4)
47685 {
47686 /* shufpd */
47687 dremap.vmode = V2DImode;
47688 dremap.nelt = 2;
47689 dremap.perm[0] = 0;
47690 dremap.perm[1] = 3;
47691 }
47692 }
47693 else if ((contents & (h2 | h3)) == contents)
47694 {
47695 /* shufps */
47696 for (i = 0; i < nelt2; ++i)
47697 {
47698 remap[i + nelt2] = i;
47699 remap[i + nelt] = i + nelt2;
47700 dremap.perm[i] = i + nelt2;
47701 dremap.perm[i + nelt2] = i + nelt;
47702 }
47703 if (nelt != 4)
47704 {
47705 /* shufpd */
47706 dremap.vmode = V2DImode;
47707 dremap.nelt = 2;
47708 dremap.perm[0] = 1;
47709 dremap.perm[1] = 2;
47710 }
47711 }
47712 else
47713 return false;
47714 }
47715 else
47716 {
47717 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47718 unsigned HOST_WIDE_INT q[8];
47719 unsigned int nonzero_halves[4];
47720
47721 /* Split the two input vectors into 8 quarters. */
47722 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47723 for (i = 1; i < 8; ++i)
47724 q[i] = q[0] << (nelt4 * i);
47725 for (i = 0; i < 4; ++i)
47726 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47727 {
47728 nonzero_halves[nzcnt] = i;
47729 ++nzcnt;
47730 }
47731
47732 if (nzcnt == 1)
47733 {
47734 gcc_assert (d->one_operand_p);
47735 nonzero_halves[1] = nonzero_halves[0];
47736 same_halves = true;
47737 }
47738 else if (d->one_operand_p)
47739 {
47740 gcc_assert (nonzero_halves[0] == 0);
47741 gcc_assert (nonzero_halves[1] == 1);
47742 }
47743
47744 if (nzcnt <= 2)
47745 {
47746 if (d->perm[0] / nelt2 == nonzero_halves[1])
47747 {
47748 /* Attempt to increase the likelihood that dfinal
47749 shuffle will be intra-lane. */
47750 char tmph = nonzero_halves[0];
47751 nonzero_halves[0] = nonzero_halves[1];
47752 nonzero_halves[1] = tmph;
47753 }
47754
47755 /* vperm2f128 or vperm2i128. */
47756 for (i = 0; i < nelt2; ++i)
47757 {
47758 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47759 remap[i + nonzero_halves[0] * nelt2] = i;
47760 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47761 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47762 }
47763
47764 if (d->vmode != V8SFmode
47765 && d->vmode != V4DFmode
47766 && d->vmode != V8SImode)
47767 {
47768 dremap.vmode = V8SImode;
47769 dremap.nelt = 8;
47770 for (i = 0; i < 4; ++i)
47771 {
47772 dremap.perm[i] = i + nonzero_halves[0] * 4;
47773 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47774 }
47775 }
47776 }
47777 else if (d->one_operand_p)
47778 return false;
47779 else if (TARGET_AVX2
47780 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47781 {
47782 /* vpunpckl* */
47783 for (i = 0; i < nelt4; ++i)
47784 {
47785 remap[i] = i * 2;
47786 remap[i + nelt] = i * 2 + 1;
47787 remap[i + nelt2] = i * 2 + nelt2;
47788 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47789 dremap.perm[i * 2] = i;
47790 dremap.perm[i * 2 + 1] = i + nelt;
47791 dremap.perm[i * 2 + nelt2] = i + nelt2;
47792 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47793 }
47794 }
47795 else if (TARGET_AVX2
47796 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47797 {
47798 /* vpunpckh* */
47799 for (i = 0; i < nelt4; ++i)
47800 {
47801 remap[i + nelt4] = i * 2;
47802 remap[i + nelt + nelt4] = i * 2 + 1;
47803 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47804 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47805 dremap.perm[i * 2] = i + nelt4;
47806 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47807 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47808 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47809 }
47810 }
47811 else
47812 return false;
47813 }
47814
47815 /* Use the remapping array set up above to move the elements from their
47816 swizzled locations into their final destinations. */
47817 dfinal = *d;
47818 for (i = 0; i < nelt; ++i)
47819 {
47820 unsigned e = remap[d->perm[i]];
47821 gcc_assert (e < nelt);
47822 /* If same_halves is true, both halves of the remapped vector are the
47823 same. Avoid cross-lane accesses if possible. */
47824 if (same_halves && i >= nelt2)
47825 {
47826 gcc_assert (e < nelt2);
47827 dfinal.perm[i] = e + nelt2;
47828 }
47829 else
47830 dfinal.perm[i] = e;
47831 }
47832 if (!d->testing_p)
47833 {
47834 dremap.target = gen_reg_rtx (dremap.vmode);
47835 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47836 }
47837 dfinal.op1 = dfinal.op0;
47838 dfinal.one_operand_p = true;
47839
47840 /* Test if the final remap can be done with a single insn. For V4SFmode or
47841 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47842 start_sequence ();
47843 ok = expand_vec_perm_1 (&dfinal);
47844 seq = get_insns ();
47845 end_sequence ();
47846
47847 if (!ok)
47848 return false;
47849
47850 if (d->testing_p)
47851 return true;
47852
47853 if (dremap.vmode != dfinal.vmode)
47854 {
47855 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47856 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47857 }
47858
47859 ok = expand_vec_perm_1 (&dremap);
47860 gcc_assert (ok);
47861
47862 emit_insn (seq);
47863 return true;
47864 }
47865
47866 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47867 a single vector cross-lane permutation into vpermq followed
47868 by any of the single insn permutations. */
47869
47870 static bool
47871 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47872 {
47873 struct expand_vec_perm_d dremap, dfinal;
47874 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47875 unsigned contents[2];
47876 bool ok;
47877
47878 if (!(TARGET_AVX2
47879 && (d->vmode == V32QImode || d->vmode == V16HImode)
47880 && d->one_operand_p))
47881 return false;
47882
47883 contents[0] = 0;
47884 contents[1] = 0;
47885 for (i = 0; i < nelt2; ++i)
47886 {
47887 contents[0] |= 1u << (d->perm[i] / nelt4);
47888 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47889 }
47890
47891 for (i = 0; i < 2; ++i)
47892 {
47893 unsigned int cnt = 0;
47894 for (j = 0; j < 4; ++j)
47895 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47896 return false;
47897 }
47898
47899 if (d->testing_p)
47900 return true;
47901
47902 dremap = *d;
47903 dremap.vmode = V4DImode;
47904 dremap.nelt = 4;
47905 dremap.target = gen_reg_rtx (V4DImode);
47906 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47907 dremap.op1 = dremap.op0;
47908 dremap.one_operand_p = true;
47909 for (i = 0; i < 2; ++i)
47910 {
47911 unsigned int cnt = 0;
47912 for (j = 0; j < 4; ++j)
47913 if ((contents[i] & (1u << j)) != 0)
47914 dremap.perm[2 * i + cnt++] = j;
47915 for (; cnt < 2; ++cnt)
47916 dremap.perm[2 * i + cnt] = 0;
47917 }
47918
47919 dfinal = *d;
47920 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47921 dfinal.op1 = dfinal.op0;
47922 dfinal.one_operand_p = true;
47923 for (i = 0, j = 0; i < nelt; ++i)
47924 {
47925 if (i == nelt2)
47926 j = 2;
47927 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47928 if ((d->perm[i] / nelt4) == dremap.perm[j])
47929 ;
47930 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47931 dfinal.perm[i] |= nelt4;
47932 else
47933 gcc_unreachable ();
47934 }
47935
47936 ok = expand_vec_perm_1 (&dremap);
47937 gcc_assert (ok);
47938
47939 ok = expand_vec_perm_1 (&dfinal);
47940 gcc_assert (ok);
47941
47942 return true;
47943 }
47944
47945 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47946 a vector permutation using two instructions, vperm2f128 resp.
47947 vperm2i128 followed by any single in-lane permutation. */
47948
47949 static bool
47950 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47951 {
47952 struct expand_vec_perm_d dfirst, dsecond;
47953 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47954 bool ok;
47955
47956 if (!TARGET_AVX
47957 || GET_MODE_SIZE (d->vmode) != 32
47958 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47959 return false;
47960
47961 dsecond = *d;
47962 dsecond.one_operand_p = false;
47963 dsecond.testing_p = true;
47964
47965 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47966 immediate. For perm < 16 the second permutation uses
47967 d->op0 as first operand, for perm >= 16 it uses d->op1
47968 as first operand. The second operand is the result of
47969 vperm2[fi]128. */
47970 for (perm = 0; perm < 32; perm++)
47971 {
47972 /* Ignore permutations which do not move anything cross-lane. */
47973 if (perm < 16)
47974 {
47975 /* The second shuffle for e.g. V4DFmode has
47976 0123 and ABCD operands.
47977 Ignore AB23, as 23 is already in the second lane
47978 of the first operand. */
47979 if ((perm & 0xc) == (1 << 2)) continue;
47980 /* And 01CD, as 01 is in the first lane of the first
47981 operand. */
47982 if ((perm & 3) == 0) continue;
47983 /* And 4567, as then the vperm2[fi]128 doesn't change
47984 anything on the original 4567 second operand. */
47985 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47986 }
47987 else
47988 {
47989 /* The second shuffle for e.g. V4DFmode has
47990 4567 and ABCD operands.
47991 Ignore AB67, as 67 is already in the second lane
47992 of the first operand. */
47993 if ((perm & 0xc) == (3 << 2)) continue;
47994 /* And 45CD, as 45 is in the first lane of the first
47995 operand. */
47996 if ((perm & 3) == 2) continue;
47997 /* And 0123, as then the vperm2[fi]128 doesn't change
47998 anything on the original 0123 first operand. */
47999 if ((perm & 0xf) == (1 << 2)) continue;
48000 }
48001
48002 for (i = 0; i < nelt; i++)
48003 {
48004 j = d->perm[i] / nelt2;
48005 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48006 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48007 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48008 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48009 else
48010 break;
48011 }
48012
48013 if (i == nelt)
48014 {
48015 start_sequence ();
48016 ok = expand_vec_perm_1 (&dsecond);
48017 end_sequence ();
48018 }
48019 else
48020 ok = false;
48021
48022 if (ok)
48023 {
48024 if (d->testing_p)
48025 return true;
48026
48027 /* Found a usable second shuffle. dfirst will be
48028 vperm2f128 on d->op0 and d->op1. */
48029 dsecond.testing_p = false;
48030 dfirst = *d;
48031 dfirst.target = gen_reg_rtx (d->vmode);
48032 for (i = 0; i < nelt; i++)
48033 dfirst.perm[i] = (i & (nelt2 - 1))
48034 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48035
48036 canonicalize_perm (&dfirst);
48037 ok = expand_vec_perm_1 (&dfirst);
48038 gcc_assert (ok);
48039
48040 /* And dsecond is some single insn shuffle, taking
48041 d->op0 and result of vperm2f128 (if perm < 16) or
48042 d->op1 and result of vperm2f128 (otherwise). */
48043 if (perm >= 16)
48044 dsecond.op0 = dsecond.op1;
48045 dsecond.op1 = dfirst.target;
48046
48047 ok = expand_vec_perm_1 (&dsecond);
48048 gcc_assert (ok);
48049
48050 return true;
48051 }
48052
48053 /* For one operand, the only useful vperm2f128 permutation is 0x01
48054 aka lanes swap. */
48055 if (d->one_operand_p)
48056 return false;
48057 }
48058
48059 return false;
48060 }
48061
48062 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48063 a two vector permutation using 2 intra-lane interleave insns
48064 and cross-lane shuffle for 32-byte vectors. */
48065
48066 static bool
48067 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48068 {
48069 unsigned i, nelt;
48070 rtx (*gen) (rtx, rtx, rtx);
48071
48072 if (d->one_operand_p)
48073 return false;
48074 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48075 ;
48076 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48077 ;
48078 else
48079 return false;
48080
48081 nelt = d->nelt;
48082 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48083 return false;
48084 for (i = 0; i < nelt; i += 2)
48085 if (d->perm[i] != d->perm[0] + i / 2
48086 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48087 return false;
48088
48089 if (d->testing_p)
48090 return true;
48091
48092 switch (d->vmode)
48093 {
48094 case V32QImode:
48095 if (d->perm[0])
48096 gen = gen_vec_interleave_highv32qi;
48097 else
48098 gen = gen_vec_interleave_lowv32qi;
48099 break;
48100 case V16HImode:
48101 if (d->perm[0])
48102 gen = gen_vec_interleave_highv16hi;
48103 else
48104 gen = gen_vec_interleave_lowv16hi;
48105 break;
48106 case V8SImode:
48107 if (d->perm[0])
48108 gen = gen_vec_interleave_highv8si;
48109 else
48110 gen = gen_vec_interleave_lowv8si;
48111 break;
48112 case V4DImode:
48113 if (d->perm[0])
48114 gen = gen_vec_interleave_highv4di;
48115 else
48116 gen = gen_vec_interleave_lowv4di;
48117 break;
48118 case V8SFmode:
48119 if (d->perm[0])
48120 gen = gen_vec_interleave_highv8sf;
48121 else
48122 gen = gen_vec_interleave_lowv8sf;
48123 break;
48124 case V4DFmode:
48125 if (d->perm[0])
48126 gen = gen_vec_interleave_highv4df;
48127 else
48128 gen = gen_vec_interleave_lowv4df;
48129 break;
48130 default:
48131 gcc_unreachable ();
48132 }
48133
48134 emit_insn (gen (d->target, d->op0, d->op1));
48135 return true;
48136 }
48137
48138 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48139 a single vector permutation using a single intra-lane vector
48140 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48141 the non-swapped and swapped vectors together. */
48142
48143 static bool
48144 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48145 {
48146 struct expand_vec_perm_d dfirst, dsecond;
48147 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48148 rtx_insn *seq;
48149 bool ok;
48150 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48151
48152 if (!TARGET_AVX
48153 || TARGET_AVX2
48154 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48155 || !d->one_operand_p)
48156 return false;
48157
48158 dfirst = *d;
48159 for (i = 0; i < nelt; i++)
48160 dfirst.perm[i] = 0xff;
48161 for (i = 0, msk = 0; i < nelt; i++)
48162 {
48163 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48164 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48165 return false;
48166 dfirst.perm[j] = d->perm[i];
48167 if (j != i)
48168 msk |= (1 << i);
48169 }
48170 for (i = 0; i < nelt; i++)
48171 if (dfirst.perm[i] == 0xff)
48172 dfirst.perm[i] = i;
48173
48174 if (!d->testing_p)
48175 dfirst.target = gen_reg_rtx (dfirst.vmode);
48176
48177 start_sequence ();
48178 ok = expand_vec_perm_1 (&dfirst);
48179 seq = get_insns ();
48180 end_sequence ();
48181
48182 if (!ok)
48183 return false;
48184
48185 if (d->testing_p)
48186 return true;
48187
48188 emit_insn (seq);
48189
48190 dsecond = *d;
48191 dsecond.op0 = dfirst.target;
48192 dsecond.op1 = dfirst.target;
48193 dsecond.one_operand_p = true;
48194 dsecond.target = gen_reg_rtx (dsecond.vmode);
48195 for (i = 0; i < nelt; i++)
48196 dsecond.perm[i] = i ^ nelt2;
48197
48198 ok = expand_vec_perm_1 (&dsecond);
48199 gcc_assert (ok);
48200
48201 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48202 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48203 return true;
48204 }
48205
48206 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48207 permutation using two vperm2f128, followed by a vshufpd insn blending
48208 the two vectors together. */
48209
48210 static bool
48211 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48212 {
48213 struct expand_vec_perm_d dfirst, dsecond, dthird;
48214 bool ok;
48215
48216 if (!TARGET_AVX || (d->vmode != V4DFmode))
48217 return false;
48218
48219 if (d->testing_p)
48220 return true;
48221
48222 dfirst = *d;
48223 dsecond = *d;
48224 dthird = *d;
48225
48226 dfirst.perm[0] = (d->perm[0] & ~1);
48227 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48228 dfirst.perm[2] = (d->perm[2] & ~1);
48229 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48230 dsecond.perm[0] = (d->perm[1] & ~1);
48231 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48232 dsecond.perm[2] = (d->perm[3] & ~1);
48233 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48234 dthird.perm[0] = (d->perm[0] % 2);
48235 dthird.perm[1] = (d->perm[1] % 2) + 4;
48236 dthird.perm[2] = (d->perm[2] % 2) + 2;
48237 dthird.perm[3] = (d->perm[3] % 2) + 6;
48238
48239 dfirst.target = gen_reg_rtx (dfirst.vmode);
48240 dsecond.target = gen_reg_rtx (dsecond.vmode);
48241 dthird.op0 = dfirst.target;
48242 dthird.op1 = dsecond.target;
48243 dthird.one_operand_p = false;
48244
48245 canonicalize_perm (&dfirst);
48246 canonicalize_perm (&dsecond);
48247
48248 ok = expand_vec_perm_1 (&dfirst)
48249 && expand_vec_perm_1 (&dsecond)
48250 && expand_vec_perm_1 (&dthird);
48251
48252 gcc_assert (ok);
48253
48254 return true;
48255 }
48256
48257 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48258 permutation with two pshufb insns and an ior. We should have already
48259 failed all two instruction sequences. */
48260
48261 static bool
48262 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48263 {
48264 rtx rperm[2][16], vperm, l, h, op, m128;
48265 unsigned int i, nelt, eltsz;
48266
48267 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48268 return false;
48269 gcc_assert (!d->one_operand_p);
48270
48271 if (d->testing_p)
48272 return true;
48273
48274 nelt = d->nelt;
48275 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48276
48277 /* Generate two permutation masks. If the required element is within
48278 the given vector it is shuffled into the proper lane. If the required
48279 element is in the other vector, force a zero into the lane by setting
48280 bit 7 in the permutation mask. */
48281 m128 = GEN_INT (-128);
48282 for (i = 0; i < nelt; ++i)
48283 {
48284 unsigned j, e = d->perm[i];
48285 unsigned which = (e >= nelt);
48286 if (e >= nelt)
48287 e -= nelt;
48288
48289 for (j = 0; j < eltsz; ++j)
48290 {
48291 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48292 rperm[1-which][i*eltsz + j] = m128;
48293 }
48294 }
48295
48296 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48297 vperm = force_reg (V16QImode, vperm);
48298
48299 l = gen_reg_rtx (V16QImode);
48300 op = gen_lowpart (V16QImode, d->op0);
48301 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48302
48303 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48304 vperm = force_reg (V16QImode, vperm);
48305
48306 h = gen_reg_rtx (V16QImode);
48307 op = gen_lowpart (V16QImode, d->op1);
48308 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48309
48310 op = d->target;
48311 if (d->vmode != V16QImode)
48312 op = gen_reg_rtx (V16QImode);
48313 emit_insn (gen_iorv16qi3 (op, l, h));
48314 if (op != d->target)
48315 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48316
48317 return true;
48318 }
48319
48320 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48321 with two vpshufb insns, vpermq and vpor. We should have already failed
48322 all two or three instruction sequences. */
48323
48324 static bool
48325 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48326 {
48327 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48328 unsigned int i, nelt, eltsz;
48329
48330 if (!TARGET_AVX2
48331 || !d->one_operand_p
48332 || (d->vmode != V32QImode && d->vmode != V16HImode))
48333 return false;
48334
48335 if (d->testing_p)
48336 return true;
48337
48338 nelt = d->nelt;
48339 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48340
48341 /* Generate two permutation masks. If the required element is within
48342 the same lane, it is shuffled in. If the required element from the
48343 other lane, force a zero by setting bit 7 in the permutation mask.
48344 In the other mask the mask has non-negative elements if element
48345 is requested from the other lane, but also moved to the other lane,
48346 so that the result of vpshufb can have the two V2TImode halves
48347 swapped. */
48348 m128 = GEN_INT (-128);
48349 for (i = 0; i < nelt; ++i)
48350 {
48351 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48352 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48353
48354 for (j = 0; j < eltsz; ++j)
48355 {
48356 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48357 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48358 }
48359 }
48360
48361 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48362 vperm = force_reg (V32QImode, vperm);
48363
48364 h = gen_reg_rtx (V32QImode);
48365 op = gen_lowpart (V32QImode, d->op0);
48366 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48367
48368 /* Swap the 128-byte lanes of h into hp. */
48369 hp = gen_reg_rtx (V4DImode);
48370 op = gen_lowpart (V4DImode, h);
48371 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48372 const1_rtx));
48373
48374 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48375 vperm = force_reg (V32QImode, vperm);
48376
48377 l = gen_reg_rtx (V32QImode);
48378 op = gen_lowpart (V32QImode, d->op0);
48379 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48380
48381 op = d->target;
48382 if (d->vmode != V32QImode)
48383 op = gen_reg_rtx (V32QImode);
48384 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48385 if (op != d->target)
48386 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48387
48388 return true;
48389 }
48390
48391 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48392 and extract-odd permutations of two V32QImode and V16QImode operand
48393 with two vpshufb insns, vpor and vpermq. We should have already
48394 failed all two or three instruction sequences. */
48395
48396 static bool
48397 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48398 {
48399 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48400 unsigned int i, nelt, eltsz;
48401
48402 if (!TARGET_AVX2
48403 || d->one_operand_p
48404 || (d->vmode != V32QImode && d->vmode != V16HImode))
48405 return false;
48406
48407 for (i = 0; i < d->nelt; ++i)
48408 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48409 return false;
48410
48411 if (d->testing_p)
48412 return true;
48413
48414 nelt = d->nelt;
48415 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48416
48417 /* Generate two permutation masks. In the first permutation mask
48418 the first quarter will contain indexes for the first half
48419 of the op0, the second quarter will contain bit 7 set, third quarter
48420 will contain indexes for the second half of the op0 and the
48421 last quarter bit 7 set. In the second permutation mask
48422 the first quarter will contain bit 7 set, the second quarter
48423 indexes for the first half of the op1, the third quarter bit 7 set
48424 and last quarter indexes for the second half of the op1.
48425 I.e. the first mask e.g. for V32QImode extract even will be:
48426 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48427 (all values masked with 0xf except for -128) and second mask
48428 for extract even will be
48429 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48430 m128 = GEN_INT (-128);
48431 for (i = 0; i < nelt; ++i)
48432 {
48433 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48434 unsigned which = d->perm[i] >= nelt;
48435 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48436
48437 for (j = 0; j < eltsz; ++j)
48438 {
48439 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48440 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48441 }
48442 }
48443
48444 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48445 vperm = force_reg (V32QImode, vperm);
48446
48447 l = gen_reg_rtx (V32QImode);
48448 op = gen_lowpart (V32QImode, d->op0);
48449 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48450
48451 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48452 vperm = force_reg (V32QImode, vperm);
48453
48454 h = gen_reg_rtx (V32QImode);
48455 op = gen_lowpart (V32QImode, d->op1);
48456 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48457
48458 ior = gen_reg_rtx (V32QImode);
48459 emit_insn (gen_iorv32qi3 (ior, l, h));
48460
48461 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48462 op = gen_reg_rtx (V4DImode);
48463 ior = gen_lowpart (V4DImode, ior);
48464 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48465 const1_rtx, GEN_INT (3)));
48466 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48467
48468 return true;
48469 }
48470
48471 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48472 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48473 with two "and" and "pack" or two "shift" and "pack" insns. We should
48474 have already failed all two instruction sequences. */
48475
48476 static bool
48477 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48478 {
48479 rtx op, dop0, dop1, t, rperm[16];
48480 unsigned i, odd, c, s, nelt = d->nelt;
48481 bool end_perm = false;
48482 machine_mode half_mode;
48483 rtx (*gen_and) (rtx, rtx, rtx);
48484 rtx (*gen_pack) (rtx, rtx, rtx);
48485 rtx (*gen_shift) (rtx, rtx, rtx);
48486
48487 if (d->one_operand_p)
48488 return false;
48489
48490 switch (d->vmode)
48491 {
48492 case V8HImode:
48493 /* Required for "pack". */
48494 if (!TARGET_SSE4_1)
48495 return false;
48496 c = 0xffff;
48497 s = 16;
48498 half_mode = V4SImode;
48499 gen_and = gen_andv4si3;
48500 gen_pack = gen_sse4_1_packusdw;
48501 gen_shift = gen_lshrv4si3;
48502 break;
48503 case V16QImode:
48504 /* No check as all instructions are SSE2. */
48505 c = 0xff;
48506 s = 8;
48507 half_mode = V8HImode;
48508 gen_and = gen_andv8hi3;
48509 gen_pack = gen_sse2_packuswb;
48510 gen_shift = gen_lshrv8hi3;
48511 break;
48512 case V16HImode:
48513 if (!TARGET_AVX2)
48514 return false;
48515 c = 0xffff;
48516 s = 16;
48517 half_mode = V8SImode;
48518 gen_and = gen_andv8si3;
48519 gen_pack = gen_avx2_packusdw;
48520 gen_shift = gen_lshrv8si3;
48521 end_perm = true;
48522 break;
48523 case V32QImode:
48524 if (!TARGET_AVX2)
48525 return false;
48526 c = 0xff;
48527 s = 8;
48528 half_mode = V16HImode;
48529 gen_and = gen_andv16hi3;
48530 gen_pack = gen_avx2_packuswb;
48531 gen_shift = gen_lshrv16hi3;
48532 end_perm = true;
48533 break;
48534 default:
48535 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48536 general shuffles. */
48537 return false;
48538 }
48539
48540 /* Check that permutation is even or odd. */
48541 odd = d->perm[0];
48542 if (odd > 1)
48543 return false;
48544
48545 for (i = 1; i < nelt; ++i)
48546 if (d->perm[i] != 2 * i + odd)
48547 return false;
48548
48549 if (d->testing_p)
48550 return true;
48551
48552 dop0 = gen_reg_rtx (half_mode);
48553 dop1 = gen_reg_rtx (half_mode);
48554 if (odd == 0)
48555 {
48556 for (i = 0; i < nelt / 2; i++)
48557 rperm[i] = GEN_INT (c);
48558 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48559 t = force_reg (half_mode, t);
48560 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48561 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48562 }
48563 else
48564 {
48565 emit_insn (gen_shift (dop0,
48566 gen_lowpart (half_mode, d->op0),
48567 GEN_INT (s)));
48568 emit_insn (gen_shift (dop1,
48569 gen_lowpart (half_mode, d->op1),
48570 GEN_INT (s)));
48571 }
48572 /* In AVX2 for 256 bit case we need to permute pack result. */
48573 if (TARGET_AVX2 && end_perm)
48574 {
48575 op = gen_reg_rtx (d->vmode);
48576 t = gen_reg_rtx (V4DImode);
48577 emit_insn (gen_pack (op, dop0, dop1));
48578 emit_insn (gen_avx2_permv4di_1 (t,
48579 gen_lowpart (V4DImode, op),
48580 const0_rtx,
48581 const2_rtx,
48582 const1_rtx,
48583 GEN_INT (3)));
48584 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48585 }
48586 else
48587 emit_insn (gen_pack (d->target, dop0, dop1));
48588
48589 return true;
48590 }
48591
48592 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48593 and extract-odd permutations. */
48594
48595 static bool
48596 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48597 {
48598 rtx t1, t2, t3, t4, t5;
48599
48600 switch (d->vmode)
48601 {
48602 case V4DFmode:
48603 if (d->testing_p)
48604 break;
48605 t1 = gen_reg_rtx (V4DFmode);
48606 t2 = gen_reg_rtx (V4DFmode);
48607
48608 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48609 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48610 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48611
48612 /* Now an unpck[lh]pd will produce the result required. */
48613 if (odd)
48614 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48615 else
48616 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48617 emit_insn (t3);
48618 break;
48619
48620 case V8SFmode:
48621 {
48622 int mask = odd ? 0xdd : 0x88;
48623
48624 if (d->testing_p)
48625 break;
48626 t1 = gen_reg_rtx (V8SFmode);
48627 t2 = gen_reg_rtx (V8SFmode);
48628 t3 = gen_reg_rtx (V8SFmode);
48629
48630 /* Shuffle within the 128-bit lanes to produce:
48631 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48632 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48633 GEN_INT (mask)));
48634
48635 /* Shuffle the lanes around to produce:
48636 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48637 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48638 GEN_INT (0x3)));
48639
48640 /* Shuffle within the 128-bit lanes to produce:
48641 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48642 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48643
48644 /* Shuffle within the 128-bit lanes to produce:
48645 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48646 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48647
48648 /* Shuffle the lanes around to produce:
48649 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48650 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48651 GEN_INT (0x20)));
48652 }
48653 break;
48654
48655 case V2DFmode:
48656 case V4SFmode:
48657 case V2DImode:
48658 case V4SImode:
48659 /* These are always directly implementable by expand_vec_perm_1. */
48660 gcc_unreachable ();
48661
48662 case V8HImode:
48663 if (TARGET_SSE4_1)
48664 return expand_vec_perm_even_odd_pack (d);
48665 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48666 return expand_vec_perm_pshufb2 (d);
48667 else
48668 {
48669 if (d->testing_p)
48670 break;
48671 /* We need 2*log2(N)-1 operations to achieve odd/even
48672 with interleave. */
48673 t1 = gen_reg_rtx (V8HImode);
48674 t2 = gen_reg_rtx (V8HImode);
48675 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48676 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48677 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48678 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48679 if (odd)
48680 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48681 else
48682 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48683 emit_insn (t3);
48684 }
48685 break;
48686
48687 case V16QImode:
48688 return expand_vec_perm_even_odd_pack (d);
48689
48690 case V16HImode:
48691 case V32QImode:
48692 return expand_vec_perm_even_odd_pack (d);
48693
48694 case V4DImode:
48695 if (!TARGET_AVX2)
48696 {
48697 struct expand_vec_perm_d d_copy = *d;
48698 d_copy.vmode = V4DFmode;
48699 if (d->testing_p)
48700 d_copy.target = gen_lowpart (V4DFmode, d->target);
48701 else
48702 d_copy.target = gen_reg_rtx (V4DFmode);
48703 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48704 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48705 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48706 {
48707 if (!d->testing_p)
48708 emit_move_insn (d->target,
48709 gen_lowpart (V4DImode, d_copy.target));
48710 return true;
48711 }
48712 return false;
48713 }
48714
48715 if (d->testing_p)
48716 break;
48717
48718 t1 = gen_reg_rtx (V4DImode);
48719 t2 = gen_reg_rtx (V4DImode);
48720
48721 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48722 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48723 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48724
48725 /* Now an vpunpck[lh]qdq will produce the result required. */
48726 if (odd)
48727 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48728 else
48729 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48730 emit_insn (t3);
48731 break;
48732
48733 case V8SImode:
48734 if (!TARGET_AVX2)
48735 {
48736 struct expand_vec_perm_d d_copy = *d;
48737 d_copy.vmode = V8SFmode;
48738 if (d->testing_p)
48739 d_copy.target = gen_lowpart (V8SFmode, d->target);
48740 else
48741 d_copy.target = gen_reg_rtx (V8SFmode);
48742 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48743 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48744 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48745 {
48746 if (!d->testing_p)
48747 emit_move_insn (d->target,
48748 gen_lowpart (V8SImode, d_copy.target));
48749 return true;
48750 }
48751 return false;
48752 }
48753
48754 if (d->testing_p)
48755 break;
48756
48757 t1 = gen_reg_rtx (V8SImode);
48758 t2 = gen_reg_rtx (V8SImode);
48759 t3 = gen_reg_rtx (V4DImode);
48760 t4 = gen_reg_rtx (V4DImode);
48761 t5 = gen_reg_rtx (V4DImode);
48762
48763 /* Shuffle the lanes around into
48764 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48765 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48766 gen_lowpart (V4DImode, d->op1),
48767 GEN_INT (0x20)));
48768 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48769 gen_lowpart (V4DImode, d->op1),
48770 GEN_INT (0x31)));
48771
48772 /* Swap the 2nd and 3rd position in each lane into
48773 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48774 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48775 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48776 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48777 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48778
48779 /* Now an vpunpck[lh]qdq will produce
48780 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48781 if (odd)
48782 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48783 gen_lowpart (V4DImode, t2));
48784 else
48785 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48786 gen_lowpart (V4DImode, t2));
48787 emit_insn (t3);
48788 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48789 break;
48790
48791 default:
48792 gcc_unreachable ();
48793 }
48794
48795 return true;
48796 }
48797
48798 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48799 extract-even and extract-odd permutations. */
48800
48801 static bool
48802 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48803 {
48804 unsigned i, odd, nelt = d->nelt;
48805
48806 odd = d->perm[0];
48807 if (odd != 0 && odd != 1)
48808 return false;
48809
48810 for (i = 1; i < nelt; ++i)
48811 if (d->perm[i] != 2 * i + odd)
48812 return false;
48813
48814 return expand_vec_perm_even_odd_1 (d, odd);
48815 }
48816
48817 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48818 permutations. We assume that expand_vec_perm_1 has already failed. */
48819
48820 static bool
48821 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48822 {
48823 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48824 machine_mode vmode = d->vmode;
48825 unsigned char perm2[4];
48826 rtx op0 = d->op0, dest;
48827 bool ok;
48828
48829 switch (vmode)
48830 {
48831 case V4DFmode:
48832 case V8SFmode:
48833 /* These are special-cased in sse.md so that we can optionally
48834 use the vbroadcast instruction. They expand to two insns
48835 if the input happens to be in a register. */
48836 gcc_unreachable ();
48837
48838 case V2DFmode:
48839 case V2DImode:
48840 case V4SFmode:
48841 case V4SImode:
48842 /* These are always implementable using standard shuffle patterns. */
48843 gcc_unreachable ();
48844
48845 case V8HImode:
48846 case V16QImode:
48847 /* These can be implemented via interleave. We save one insn by
48848 stopping once we have promoted to V4SImode and then use pshufd. */
48849 if (d->testing_p)
48850 return true;
48851 do
48852 {
48853 rtx dest;
48854 rtx (*gen) (rtx, rtx, rtx)
48855 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48856 : gen_vec_interleave_lowv8hi;
48857
48858 if (elt >= nelt2)
48859 {
48860 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48861 : gen_vec_interleave_highv8hi;
48862 elt -= nelt2;
48863 }
48864 nelt2 /= 2;
48865
48866 dest = gen_reg_rtx (vmode);
48867 emit_insn (gen (dest, op0, op0));
48868 vmode = get_mode_wider_vector (vmode);
48869 op0 = gen_lowpart (vmode, dest);
48870 }
48871 while (vmode != V4SImode);
48872
48873 memset (perm2, elt, 4);
48874 dest = gen_reg_rtx (V4SImode);
48875 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48876 gcc_assert (ok);
48877 if (!d->testing_p)
48878 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48879 return true;
48880
48881 case V32QImode:
48882 case V16HImode:
48883 case V8SImode:
48884 case V4DImode:
48885 /* For AVX2 broadcasts of the first element vpbroadcast* or
48886 vpermq should be used by expand_vec_perm_1. */
48887 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48888 return false;
48889
48890 default:
48891 gcc_unreachable ();
48892 }
48893 }
48894
48895 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48896 broadcast permutations. */
48897
48898 static bool
48899 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48900 {
48901 unsigned i, elt, nelt = d->nelt;
48902
48903 if (!d->one_operand_p)
48904 return false;
48905
48906 elt = d->perm[0];
48907 for (i = 1; i < nelt; ++i)
48908 if (d->perm[i] != elt)
48909 return false;
48910
48911 return expand_vec_perm_broadcast_1 (d);
48912 }
48913
48914 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48915 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48916 all the shorter instruction sequences. */
48917
48918 static bool
48919 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48920 {
48921 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48922 unsigned int i, nelt, eltsz;
48923 bool used[4];
48924
48925 if (!TARGET_AVX2
48926 || d->one_operand_p
48927 || (d->vmode != V32QImode && d->vmode != V16HImode))
48928 return false;
48929
48930 if (d->testing_p)
48931 return true;
48932
48933 nelt = d->nelt;
48934 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48935
48936 /* Generate 4 permutation masks. If the required element is within
48937 the same lane, it is shuffled in. If the required element from the
48938 other lane, force a zero by setting bit 7 in the permutation mask.
48939 In the other mask the mask has non-negative elements if element
48940 is requested from the other lane, but also moved to the other lane,
48941 so that the result of vpshufb can have the two V2TImode halves
48942 swapped. */
48943 m128 = GEN_INT (-128);
48944 for (i = 0; i < 32; ++i)
48945 {
48946 rperm[0][i] = m128;
48947 rperm[1][i] = m128;
48948 rperm[2][i] = m128;
48949 rperm[3][i] = m128;
48950 }
48951 used[0] = false;
48952 used[1] = false;
48953 used[2] = false;
48954 used[3] = false;
48955 for (i = 0; i < nelt; ++i)
48956 {
48957 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48958 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48959 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48960
48961 for (j = 0; j < eltsz; ++j)
48962 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48963 used[which] = true;
48964 }
48965
48966 for (i = 0; i < 2; ++i)
48967 {
48968 if (!used[2 * i + 1])
48969 {
48970 h[i] = NULL_RTX;
48971 continue;
48972 }
48973 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48974 gen_rtvec_v (32, rperm[2 * i + 1]));
48975 vperm = force_reg (V32QImode, vperm);
48976 h[i] = gen_reg_rtx (V32QImode);
48977 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48978 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48979 }
48980
48981 /* Swap the 128-byte lanes of h[X]. */
48982 for (i = 0; i < 2; ++i)
48983 {
48984 if (h[i] == NULL_RTX)
48985 continue;
48986 op = gen_reg_rtx (V4DImode);
48987 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48988 const2_rtx, GEN_INT (3), const0_rtx,
48989 const1_rtx));
48990 h[i] = gen_lowpart (V32QImode, op);
48991 }
48992
48993 for (i = 0; i < 2; ++i)
48994 {
48995 if (!used[2 * i])
48996 {
48997 l[i] = NULL_RTX;
48998 continue;
48999 }
49000 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49001 vperm = force_reg (V32QImode, vperm);
49002 l[i] = gen_reg_rtx (V32QImode);
49003 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49004 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49005 }
49006
49007 for (i = 0; i < 2; ++i)
49008 {
49009 if (h[i] && l[i])
49010 {
49011 op = gen_reg_rtx (V32QImode);
49012 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49013 l[i] = op;
49014 }
49015 else if (h[i])
49016 l[i] = h[i];
49017 }
49018
49019 gcc_assert (l[0] && l[1]);
49020 op = d->target;
49021 if (d->vmode != V32QImode)
49022 op = gen_reg_rtx (V32QImode);
49023 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49024 if (op != d->target)
49025 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49026 return true;
49027 }
49028
49029 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49030 With all of the interface bits taken care of, perform the expansion
49031 in D and return true on success. */
49032
49033 static bool
49034 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49035 {
49036 /* Try a single instruction expansion. */
49037 if (expand_vec_perm_1 (d))
49038 return true;
49039
49040 /* Try sequences of two instructions. */
49041
49042 if (expand_vec_perm_pshuflw_pshufhw (d))
49043 return true;
49044
49045 if (expand_vec_perm_palignr (d, false))
49046 return true;
49047
49048 if (expand_vec_perm_interleave2 (d))
49049 return true;
49050
49051 if (expand_vec_perm_broadcast (d))
49052 return true;
49053
49054 if (expand_vec_perm_vpermq_perm_1 (d))
49055 return true;
49056
49057 if (expand_vec_perm_vperm2f128 (d))
49058 return true;
49059
49060 if (expand_vec_perm_pblendv (d))
49061 return true;
49062
49063 /* Try sequences of three instructions. */
49064
49065 if (expand_vec_perm_even_odd_pack (d))
49066 return true;
49067
49068 if (expand_vec_perm_2vperm2f128_vshuf (d))
49069 return true;
49070
49071 if (expand_vec_perm_pshufb2 (d))
49072 return true;
49073
49074 if (expand_vec_perm_interleave3 (d))
49075 return true;
49076
49077 if (expand_vec_perm_vperm2f128_vblend (d))
49078 return true;
49079
49080 /* Try sequences of four instructions. */
49081
49082 if (expand_vec_perm_vpshufb2_vpermq (d))
49083 return true;
49084
49085 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49086 return true;
49087
49088 /* ??? Look for narrow permutations whose element orderings would
49089 allow the promotion to a wider mode. */
49090
49091 /* ??? Look for sequences of interleave or a wider permute that place
49092 the data into the correct lanes for a half-vector shuffle like
49093 pshuf[lh]w or vpermilps. */
49094
49095 /* ??? Look for sequences of interleave that produce the desired results.
49096 The combinatorics of punpck[lh] get pretty ugly... */
49097
49098 if (expand_vec_perm_even_odd (d))
49099 return true;
49100
49101 /* Even longer sequences. */
49102 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49103 return true;
49104
49105 return false;
49106 }
49107
49108 /* If a permutation only uses one operand, make it clear. Returns true
49109 if the permutation references both operands. */
49110
49111 static bool
49112 canonicalize_perm (struct expand_vec_perm_d *d)
49113 {
49114 int i, which, nelt = d->nelt;
49115
49116 for (i = which = 0; i < nelt; ++i)
49117 which |= (d->perm[i] < nelt ? 1 : 2);
49118
49119 d->one_operand_p = true;
49120 switch (which)
49121 {
49122 default:
49123 gcc_unreachable();
49124
49125 case 3:
49126 if (!rtx_equal_p (d->op0, d->op1))
49127 {
49128 d->one_operand_p = false;
49129 break;
49130 }
49131 /* The elements of PERM do not suggest that only the first operand
49132 is used, but both operands are identical. Allow easier matching
49133 of the permutation by folding the permutation into the single
49134 input vector. */
49135 /* FALLTHRU */
49136
49137 case 2:
49138 for (i = 0; i < nelt; ++i)
49139 d->perm[i] &= nelt - 1;
49140 d->op0 = d->op1;
49141 break;
49142
49143 case 1:
49144 d->op1 = d->op0;
49145 break;
49146 }
49147
49148 return (which == 3);
49149 }
49150
49151 bool
49152 ix86_expand_vec_perm_const (rtx operands[4])
49153 {
49154 struct expand_vec_perm_d d;
49155 unsigned char perm[MAX_VECT_LEN];
49156 int i, nelt;
49157 bool two_args;
49158 rtx sel;
49159
49160 d.target = operands[0];
49161 d.op0 = operands[1];
49162 d.op1 = operands[2];
49163 sel = operands[3];
49164
49165 d.vmode = GET_MODE (d.target);
49166 gcc_assert (VECTOR_MODE_P (d.vmode));
49167 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49168 d.testing_p = false;
49169
49170 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49171 gcc_assert (XVECLEN (sel, 0) == nelt);
49172 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49173
49174 for (i = 0; i < nelt; ++i)
49175 {
49176 rtx e = XVECEXP (sel, 0, i);
49177 int ei = INTVAL (e) & (2 * nelt - 1);
49178 d.perm[i] = ei;
49179 perm[i] = ei;
49180 }
49181
49182 two_args = canonicalize_perm (&d);
49183
49184 if (ix86_expand_vec_perm_const_1 (&d))
49185 return true;
49186
49187 /* If the selector says both arguments are needed, but the operands are the
49188 same, the above tried to expand with one_operand_p and flattened selector.
49189 If that didn't work, retry without one_operand_p; we succeeded with that
49190 during testing. */
49191 if (two_args && d.one_operand_p)
49192 {
49193 d.one_operand_p = false;
49194 memcpy (d.perm, perm, sizeof (perm));
49195 return ix86_expand_vec_perm_const_1 (&d);
49196 }
49197
49198 return false;
49199 }
49200
49201 /* Implement targetm.vectorize.vec_perm_const_ok. */
49202
49203 static bool
49204 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49205 const unsigned char *sel)
49206 {
49207 struct expand_vec_perm_d d;
49208 unsigned int i, nelt, which;
49209 bool ret;
49210
49211 d.vmode = vmode;
49212 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49213 d.testing_p = true;
49214
49215 /* Given sufficient ISA support we can just return true here
49216 for selected vector modes. */
49217 switch (d.vmode)
49218 {
49219 case V16SFmode:
49220 case V16SImode:
49221 case V8DImode:
49222 case V8DFmode:
49223 if (TARGET_AVX512F)
49224 /* All implementable with a single vpermi2 insn. */
49225 return true;
49226 break;
49227 case V32HImode:
49228 if (TARGET_AVX512BW)
49229 /* All implementable with a single vpermi2 insn. */
49230 return true;
49231 break;
49232 case V8SImode:
49233 case V8SFmode:
49234 case V4DFmode:
49235 case V4DImode:
49236 if (TARGET_AVX512VL)
49237 /* All implementable with a single vpermi2 insn. */
49238 return true;
49239 break;
49240 case V16HImode:
49241 if (TARGET_AVX2)
49242 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49243 return true;
49244 break;
49245 case V32QImode:
49246 if (TARGET_AVX2)
49247 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49248 return true;
49249 break;
49250 case V4SImode:
49251 case V4SFmode:
49252 case V8HImode:
49253 case V16QImode:
49254 /* All implementable with a single vpperm insn. */
49255 if (TARGET_XOP)
49256 return true;
49257 /* All implementable with 2 pshufb + 1 ior. */
49258 if (TARGET_SSSE3)
49259 return true;
49260 break;
49261 case V2DImode:
49262 case V2DFmode:
49263 /* All implementable with shufpd or unpck[lh]pd. */
49264 return true;
49265 default:
49266 return false;
49267 }
49268
49269 /* Extract the values from the vector CST into the permutation
49270 array in D. */
49271 memcpy (d.perm, sel, nelt);
49272 for (i = which = 0; i < nelt; ++i)
49273 {
49274 unsigned char e = d.perm[i];
49275 gcc_assert (e < 2 * nelt);
49276 which |= (e < nelt ? 1 : 2);
49277 }
49278
49279 /* For all elements from second vector, fold the elements to first. */
49280 if (which == 2)
49281 for (i = 0; i < nelt; ++i)
49282 d.perm[i] -= nelt;
49283
49284 /* Check whether the mask can be applied to the vector type. */
49285 d.one_operand_p = (which != 3);
49286
49287 /* Implementable with shufps or pshufd. */
49288 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49289 return true;
49290
49291 /* Otherwise we have to go through the motions and see if we can
49292 figure out how to generate the requested permutation. */
49293 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49294 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49295 if (!d.one_operand_p)
49296 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49297
49298 start_sequence ();
49299 ret = ix86_expand_vec_perm_const_1 (&d);
49300 end_sequence ();
49301
49302 return ret;
49303 }
49304
49305 void
49306 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49307 {
49308 struct expand_vec_perm_d d;
49309 unsigned i, nelt;
49310
49311 d.target = targ;
49312 d.op0 = op0;
49313 d.op1 = op1;
49314 d.vmode = GET_MODE (targ);
49315 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49316 d.one_operand_p = false;
49317 d.testing_p = false;
49318
49319 for (i = 0; i < nelt; ++i)
49320 d.perm[i] = i * 2 + odd;
49321
49322 /* We'll either be able to implement the permutation directly... */
49323 if (expand_vec_perm_1 (&d))
49324 return;
49325
49326 /* ... or we use the special-case patterns. */
49327 expand_vec_perm_even_odd_1 (&d, odd);
49328 }
49329
49330 static void
49331 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49332 {
49333 struct expand_vec_perm_d d;
49334 unsigned i, nelt, base;
49335 bool ok;
49336
49337 d.target = targ;
49338 d.op0 = op0;
49339 d.op1 = op1;
49340 d.vmode = GET_MODE (targ);
49341 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49342 d.one_operand_p = false;
49343 d.testing_p = false;
49344
49345 base = high_p ? nelt / 2 : 0;
49346 for (i = 0; i < nelt / 2; ++i)
49347 {
49348 d.perm[i * 2] = i + base;
49349 d.perm[i * 2 + 1] = i + base + nelt;
49350 }
49351
49352 /* Note that for AVX this isn't one instruction. */
49353 ok = ix86_expand_vec_perm_const_1 (&d);
49354 gcc_assert (ok);
49355 }
49356
49357
49358 /* Expand a vector operation CODE for a V*QImode in terms of the
49359 same operation on V*HImode. */
49360
49361 void
49362 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49363 {
49364 machine_mode qimode = GET_MODE (dest);
49365 machine_mode himode;
49366 rtx (*gen_il) (rtx, rtx, rtx);
49367 rtx (*gen_ih) (rtx, rtx, rtx);
49368 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49369 struct expand_vec_perm_d d;
49370 bool ok, full_interleave;
49371 bool uns_p = false;
49372 int i;
49373
49374 switch (qimode)
49375 {
49376 case V16QImode:
49377 himode = V8HImode;
49378 gen_il = gen_vec_interleave_lowv16qi;
49379 gen_ih = gen_vec_interleave_highv16qi;
49380 break;
49381 case V32QImode:
49382 himode = V16HImode;
49383 gen_il = gen_avx2_interleave_lowv32qi;
49384 gen_ih = gen_avx2_interleave_highv32qi;
49385 break;
49386 case V64QImode:
49387 himode = V32HImode;
49388 gen_il = gen_avx512bw_interleave_lowv64qi;
49389 gen_ih = gen_avx512bw_interleave_highv64qi;
49390 break;
49391 default:
49392 gcc_unreachable ();
49393 }
49394
49395 op2_l = op2_h = op2;
49396 switch (code)
49397 {
49398 case MULT:
49399 /* Unpack data such that we've got a source byte in each low byte of
49400 each word. We don't care what goes into the high byte of each word.
49401 Rather than trying to get zero in there, most convenient is to let
49402 it be a copy of the low byte. */
49403 op2_l = gen_reg_rtx (qimode);
49404 op2_h = gen_reg_rtx (qimode);
49405 emit_insn (gen_il (op2_l, op2, op2));
49406 emit_insn (gen_ih (op2_h, op2, op2));
49407 /* FALLTHRU */
49408
49409 op1_l = gen_reg_rtx (qimode);
49410 op1_h = gen_reg_rtx (qimode);
49411 emit_insn (gen_il (op1_l, op1, op1));
49412 emit_insn (gen_ih (op1_h, op1, op1));
49413 full_interleave = qimode == V16QImode;
49414 break;
49415
49416 case ASHIFT:
49417 case LSHIFTRT:
49418 uns_p = true;
49419 /* FALLTHRU */
49420 case ASHIFTRT:
49421 op1_l = gen_reg_rtx (himode);
49422 op1_h = gen_reg_rtx (himode);
49423 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49424 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49425 full_interleave = true;
49426 break;
49427 default:
49428 gcc_unreachable ();
49429 }
49430
49431 /* Perform the operation. */
49432 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49433 1, OPTAB_DIRECT);
49434 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49435 1, OPTAB_DIRECT);
49436 gcc_assert (res_l && res_h);
49437
49438 /* Merge the data back into the right place. */
49439 d.target = dest;
49440 d.op0 = gen_lowpart (qimode, res_l);
49441 d.op1 = gen_lowpart (qimode, res_h);
49442 d.vmode = qimode;
49443 d.nelt = GET_MODE_NUNITS (qimode);
49444 d.one_operand_p = false;
49445 d.testing_p = false;
49446
49447 if (full_interleave)
49448 {
49449 /* For SSE2, we used an full interleave, so the desired
49450 results are in the even elements. */
49451 for (i = 0; i < 64; ++i)
49452 d.perm[i] = i * 2;
49453 }
49454 else
49455 {
49456 /* For AVX, the interleave used above was not cross-lane. So the
49457 extraction is evens but with the second and third quarter swapped.
49458 Happily, that is even one insn shorter than even extraction. */
49459 for (i = 0; i < 64; ++i)
49460 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49461 }
49462
49463 ok = ix86_expand_vec_perm_const_1 (&d);
49464 gcc_assert (ok);
49465
49466 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49467 gen_rtx_fmt_ee (code, qimode, op1, op2));
49468 }
49469
49470 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49471 if op is CONST_VECTOR with all odd elements equal to their
49472 preceding element. */
49473
49474 static bool
49475 const_vector_equal_evenodd_p (rtx op)
49476 {
49477 machine_mode mode = GET_MODE (op);
49478 int i, nunits = GET_MODE_NUNITS (mode);
49479 if (GET_CODE (op) != CONST_VECTOR
49480 || nunits != CONST_VECTOR_NUNITS (op))
49481 return false;
49482 for (i = 0; i < nunits; i += 2)
49483 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49484 return false;
49485 return true;
49486 }
49487
49488 void
49489 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49490 bool uns_p, bool odd_p)
49491 {
49492 machine_mode mode = GET_MODE (op1);
49493 machine_mode wmode = GET_MODE (dest);
49494 rtx x;
49495 rtx orig_op1 = op1, orig_op2 = op2;
49496
49497 if (!nonimmediate_operand (op1, mode))
49498 op1 = force_reg (mode, op1);
49499 if (!nonimmediate_operand (op2, mode))
49500 op2 = force_reg (mode, op2);
49501
49502 /* We only play even/odd games with vectors of SImode. */
49503 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49504
49505 /* If we're looking for the odd results, shift those members down to
49506 the even slots. For some cpus this is faster than a PSHUFD. */
49507 if (odd_p)
49508 {
49509 /* For XOP use vpmacsdqh, but only for smult, as it is only
49510 signed. */
49511 if (TARGET_XOP && mode == V4SImode && !uns_p)
49512 {
49513 x = force_reg (wmode, CONST0_RTX (wmode));
49514 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49515 return;
49516 }
49517
49518 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49519 if (!const_vector_equal_evenodd_p (orig_op1))
49520 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49521 x, NULL, 1, OPTAB_DIRECT);
49522 if (!const_vector_equal_evenodd_p (orig_op2))
49523 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49524 x, NULL, 1, OPTAB_DIRECT);
49525 op1 = gen_lowpart (mode, op1);
49526 op2 = gen_lowpart (mode, op2);
49527 }
49528
49529 if (mode == V16SImode)
49530 {
49531 if (uns_p)
49532 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49533 else
49534 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49535 }
49536 else if (mode == V8SImode)
49537 {
49538 if (uns_p)
49539 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49540 else
49541 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49542 }
49543 else if (uns_p)
49544 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49545 else if (TARGET_SSE4_1)
49546 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49547 else
49548 {
49549 rtx s1, s2, t0, t1, t2;
49550
49551 /* The easiest way to implement this without PMULDQ is to go through
49552 the motions as if we are performing a full 64-bit multiply. With
49553 the exception that we need to do less shuffling of the elements. */
49554
49555 /* Compute the sign-extension, aka highparts, of the two operands. */
49556 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49557 op1, pc_rtx, pc_rtx);
49558 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49559 op2, pc_rtx, pc_rtx);
49560
49561 /* Multiply LO(A) * HI(B), and vice-versa. */
49562 t1 = gen_reg_rtx (wmode);
49563 t2 = gen_reg_rtx (wmode);
49564 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49565 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49566
49567 /* Multiply LO(A) * LO(B). */
49568 t0 = gen_reg_rtx (wmode);
49569 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49570
49571 /* Combine and shift the highparts into place. */
49572 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49573 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49574 1, OPTAB_DIRECT);
49575
49576 /* Combine high and low parts. */
49577 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49578 return;
49579 }
49580 emit_insn (x);
49581 }
49582
49583 void
49584 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49585 bool uns_p, bool high_p)
49586 {
49587 machine_mode wmode = GET_MODE (dest);
49588 machine_mode mode = GET_MODE (op1);
49589 rtx t1, t2, t3, t4, mask;
49590
49591 switch (mode)
49592 {
49593 case V4SImode:
49594 t1 = gen_reg_rtx (mode);
49595 t2 = gen_reg_rtx (mode);
49596 if (TARGET_XOP && !uns_p)
49597 {
49598 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49599 shuffle the elements once so that all elements are in the right
49600 place for immediate use: { A C B D }. */
49601 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49602 const1_rtx, GEN_INT (3)));
49603 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49604 const1_rtx, GEN_INT (3)));
49605 }
49606 else
49607 {
49608 /* Put the elements into place for the multiply. */
49609 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49610 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49611 high_p = false;
49612 }
49613 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49614 break;
49615
49616 case V8SImode:
49617 /* Shuffle the elements between the lanes. After this we
49618 have { A B E F | C D G H } for each operand. */
49619 t1 = gen_reg_rtx (V4DImode);
49620 t2 = gen_reg_rtx (V4DImode);
49621 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49622 const0_rtx, const2_rtx,
49623 const1_rtx, GEN_INT (3)));
49624 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49625 const0_rtx, const2_rtx,
49626 const1_rtx, GEN_INT (3)));
49627
49628 /* Shuffle the elements within the lanes. After this we
49629 have { A A B B | C C D D } or { E E F F | G G H H }. */
49630 t3 = gen_reg_rtx (V8SImode);
49631 t4 = gen_reg_rtx (V8SImode);
49632 mask = GEN_INT (high_p
49633 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49634 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49635 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49636 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49637
49638 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49639 break;
49640
49641 case V8HImode:
49642 case V16HImode:
49643 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49644 uns_p, OPTAB_DIRECT);
49645 t2 = expand_binop (mode,
49646 uns_p ? umul_highpart_optab : smul_highpart_optab,
49647 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49648 gcc_assert (t1 && t2);
49649
49650 t3 = gen_reg_rtx (mode);
49651 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49652 emit_move_insn (dest, gen_lowpart (wmode, t3));
49653 break;
49654
49655 case V16QImode:
49656 case V32QImode:
49657 case V32HImode:
49658 case V16SImode:
49659 case V64QImode:
49660 t1 = gen_reg_rtx (wmode);
49661 t2 = gen_reg_rtx (wmode);
49662 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49663 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49664
49665 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49666 break;
49667
49668 default:
49669 gcc_unreachable ();
49670 }
49671 }
49672
49673 void
49674 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49675 {
49676 rtx res_1, res_2, res_3, res_4;
49677
49678 res_1 = gen_reg_rtx (V4SImode);
49679 res_2 = gen_reg_rtx (V4SImode);
49680 res_3 = gen_reg_rtx (V2DImode);
49681 res_4 = gen_reg_rtx (V2DImode);
49682 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49683 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49684
49685 /* Move the results in element 2 down to element 1; we don't care
49686 what goes in elements 2 and 3. Then we can merge the parts
49687 back together with an interleave.
49688
49689 Note that two other sequences were tried:
49690 (1) Use interleaves at the start instead of psrldq, which allows
49691 us to use a single shufps to merge things back at the end.
49692 (2) Use shufps here to combine the two vectors, then pshufd to
49693 put the elements in the correct order.
49694 In both cases the cost of the reformatting stall was too high
49695 and the overall sequence slower. */
49696
49697 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49698 const0_rtx, const2_rtx,
49699 const0_rtx, const0_rtx));
49700 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49701 const0_rtx, const2_rtx,
49702 const0_rtx, const0_rtx));
49703 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49704
49705 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49706 }
49707
49708 void
49709 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49710 {
49711 machine_mode mode = GET_MODE (op0);
49712 rtx t1, t2, t3, t4, t5, t6;
49713
49714 if (TARGET_AVX512DQ && mode == V8DImode)
49715 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49716 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49717 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49718 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49719 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49720 else if (TARGET_XOP && mode == V2DImode)
49721 {
49722 /* op1: A,B,C,D, op2: E,F,G,H */
49723 op1 = gen_lowpart (V4SImode, op1);
49724 op2 = gen_lowpart (V4SImode, op2);
49725
49726 t1 = gen_reg_rtx (V4SImode);
49727 t2 = gen_reg_rtx (V4SImode);
49728 t3 = gen_reg_rtx (V2DImode);
49729 t4 = gen_reg_rtx (V2DImode);
49730
49731 /* t1: B,A,D,C */
49732 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49733 GEN_INT (1),
49734 GEN_INT (0),
49735 GEN_INT (3),
49736 GEN_INT (2)));
49737
49738 /* t2: (B*E),(A*F),(D*G),(C*H) */
49739 emit_insn (gen_mulv4si3 (t2, t1, op2));
49740
49741 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49742 emit_insn (gen_xop_phadddq (t3, t2));
49743
49744 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49745 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49746
49747 /* Multiply lower parts and add all */
49748 t5 = gen_reg_rtx (V2DImode);
49749 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49750 gen_lowpart (V4SImode, op1),
49751 gen_lowpart (V4SImode, op2)));
49752 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49753
49754 }
49755 else
49756 {
49757 machine_mode nmode;
49758 rtx (*umul) (rtx, rtx, rtx);
49759
49760 if (mode == V2DImode)
49761 {
49762 umul = gen_vec_widen_umult_even_v4si;
49763 nmode = V4SImode;
49764 }
49765 else if (mode == V4DImode)
49766 {
49767 umul = gen_vec_widen_umult_even_v8si;
49768 nmode = V8SImode;
49769 }
49770 else if (mode == V8DImode)
49771 {
49772 umul = gen_vec_widen_umult_even_v16si;
49773 nmode = V16SImode;
49774 }
49775 else
49776 gcc_unreachable ();
49777
49778
49779 /* Multiply low parts. */
49780 t1 = gen_reg_rtx (mode);
49781 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49782
49783 /* Shift input vectors right 32 bits so we can multiply high parts. */
49784 t6 = GEN_INT (32);
49785 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49786 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49787
49788 /* Multiply high parts by low parts. */
49789 t4 = gen_reg_rtx (mode);
49790 t5 = gen_reg_rtx (mode);
49791 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49792 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49793
49794 /* Combine and shift the highparts back. */
49795 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49796 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49797
49798 /* Combine high and low parts. */
49799 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49800 }
49801
49802 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49803 gen_rtx_MULT (mode, op1, op2));
49804 }
49805
49806 /* Return 1 if control tansfer instruction INSN
49807 should be encoded with bnd prefix.
49808 If insn is NULL then return 1 when control
49809 transfer instructions should be prefixed with
49810 bnd by default for current function. */
49811
49812 bool
49813 ix86_bnd_prefixed_insn_p (rtx insn)
49814 {
49815 /* For call insns check special flag. */
49816 if (insn && CALL_P (insn))
49817 {
49818 rtx call = get_call_rtx_from (insn);
49819 if (call)
49820 return CALL_EXPR_WITH_BOUNDS_P (call);
49821 }
49822
49823 /* All other insns are prefixed only if function is instrumented. */
49824 return chkp_function_instrumented_p (current_function_decl);
49825 }
49826
49827 /* Calculate integer abs() using only SSE2 instructions. */
49828
49829 void
49830 ix86_expand_sse2_abs (rtx target, rtx input)
49831 {
49832 machine_mode mode = GET_MODE (target);
49833 rtx tmp0, tmp1, x;
49834
49835 switch (mode)
49836 {
49837 /* For 32-bit signed integer X, the best way to calculate the absolute
49838 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49839 case V4SImode:
49840 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49841 GEN_INT (GET_MODE_BITSIZE
49842 (GET_MODE_INNER (mode)) - 1),
49843 NULL, 0, OPTAB_DIRECT);
49844 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49845 NULL, 0, OPTAB_DIRECT);
49846 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49847 target, 0, OPTAB_DIRECT);
49848 break;
49849
49850 /* For 16-bit signed integer X, the best way to calculate the absolute
49851 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49852 case V8HImode:
49853 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49854
49855 x = expand_simple_binop (mode, SMAX, tmp0, input,
49856 target, 0, OPTAB_DIRECT);
49857 break;
49858
49859 /* For 8-bit signed integer X, the best way to calculate the absolute
49860 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49861 as SSE2 provides the PMINUB insn. */
49862 case V16QImode:
49863 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49864
49865 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49866 target, 0, OPTAB_DIRECT);
49867 break;
49868
49869 default:
49870 gcc_unreachable ();
49871 }
49872
49873 if (x != target)
49874 emit_move_insn (target, x);
49875 }
49876
49877 /* Expand an insert into a vector register through pinsr insn.
49878 Return true if successful. */
49879
49880 bool
49881 ix86_expand_pinsr (rtx *operands)
49882 {
49883 rtx dst = operands[0];
49884 rtx src = operands[3];
49885
49886 unsigned int size = INTVAL (operands[1]);
49887 unsigned int pos = INTVAL (operands[2]);
49888
49889 if (GET_CODE (dst) == SUBREG)
49890 {
49891 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49892 dst = SUBREG_REG (dst);
49893 }
49894
49895 if (GET_CODE (src) == SUBREG)
49896 src = SUBREG_REG (src);
49897
49898 switch (GET_MODE (dst))
49899 {
49900 case V16QImode:
49901 case V8HImode:
49902 case V4SImode:
49903 case V2DImode:
49904 {
49905 machine_mode srcmode, dstmode;
49906 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49907
49908 srcmode = mode_for_size (size, MODE_INT, 0);
49909
49910 switch (srcmode)
49911 {
49912 case QImode:
49913 if (!TARGET_SSE4_1)
49914 return false;
49915 dstmode = V16QImode;
49916 pinsr = gen_sse4_1_pinsrb;
49917 break;
49918
49919 case HImode:
49920 if (!TARGET_SSE2)
49921 return false;
49922 dstmode = V8HImode;
49923 pinsr = gen_sse2_pinsrw;
49924 break;
49925
49926 case SImode:
49927 if (!TARGET_SSE4_1)
49928 return false;
49929 dstmode = V4SImode;
49930 pinsr = gen_sse4_1_pinsrd;
49931 break;
49932
49933 case DImode:
49934 gcc_assert (TARGET_64BIT);
49935 if (!TARGET_SSE4_1)
49936 return false;
49937 dstmode = V2DImode;
49938 pinsr = gen_sse4_1_pinsrq;
49939 break;
49940
49941 default:
49942 return false;
49943 }
49944
49945 rtx d = dst;
49946 if (GET_MODE (dst) != dstmode)
49947 d = gen_reg_rtx (dstmode);
49948 src = gen_lowpart (srcmode, src);
49949
49950 pos /= size;
49951
49952 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
49953 GEN_INT (1 << pos)));
49954 if (d != dst)
49955 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49956 return true;
49957 }
49958
49959 default:
49960 return false;
49961 }
49962 }
49963 \f
49964 /* This function returns the calling abi specific va_list type node.
49965 It returns the FNDECL specific va_list type. */
49966
49967 static tree
49968 ix86_fn_abi_va_list (tree fndecl)
49969 {
49970 if (!TARGET_64BIT)
49971 return va_list_type_node;
49972 gcc_assert (fndecl != NULL_TREE);
49973
49974 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49975 return ms_va_list_type_node;
49976 else
49977 return sysv_va_list_type_node;
49978 }
49979
49980 /* Returns the canonical va_list type specified by TYPE. If there
49981 is no valid TYPE provided, it return NULL_TREE. */
49982
49983 static tree
49984 ix86_canonical_va_list_type (tree type)
49985 {
49986 tree wtype, htype;
49987
49988 /* Resolve references and pointers to va_list type. */
49989 if (TREE_CODE (type) == MEM_REF)
49990 type = TREE_TYPE (type);
49991 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
49992 type = TREE_TYPE (type);
49993 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
49994 type = TREE_TYPE (type);
49995
49996 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
49997 {
49998 wtype = va_list_type_node;
49999 gcc_assert (wtype != NULL_TREE);
50000 htype = type;
50001 if (TREE_CODE (wtype) == ARRAY_TYPE)
50002 {
50003 /* If va_list is an array type, the argument may have decayed
50004 to a pointer type, e.g. by being passed to another function.
50005 In that case, unwrap both types so that we can compare the
50006 underlying records. */
50007 if (TREE_CODE (htype) == ARRAY_TYPE
50008 || POINTER_TYPE_P (htype))
50009 {
50010 wtype = TREE_TYPE (wtype);
50011 htype = TREE_TYPE (htype);
50012 }
50013 }
50014 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50015 return va_list_type_node;
50016 wtype = sysv_va_list_type_node;
50017 gcc_assert (wtype != NULL_TREE);
50018 htype = type;
50019 if (TREE_CODE (wtype) == ARRAY_TYPE)
50020 {
50021 /* If va_list is an array type, the argument may have decayed
50022 to a pointer type, e.g. by being passed to another function.
50023 In that case, unwrap both types so that we can compare the
50024 underlying records. */
50025 if (TREE_CODE (htype) == ARRAY_TYPE
50026 || POINTER_TYPE_P (htype))
50027 {
50028 wtype = TREE_TYPE (wtype);
50029 htype = TREE_TYPE (htype);
50030 }
50031 }
50032 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50033 return sysv_va_list_type_node;
50034 wtype = ms_va_list_type_node;
50035 gcc_assert (wtype != NULL_TREE);
50036 htype = type;
50037 if (TREE_CODE (wtype) == ARRAY_TYPE)
50038 {
50039 /* If va_list is an array type, the argument may have decayed
50040 to a pointer type, e.g. by being passed to another function.
50041 In that case, unwrap both types so that we can compare the
50042 underlying records. */
50043 if (TREE_CODE (htype) == ARRAY_TYPE
50044 || POINTER_TYPE_P (htype))
50045 {
50046 wtype = TREE_TYPE (wtype);
50047 htype = TREE_TYPE (htype);
50048 }
50049 }
50050 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50051 return ms_va_list_type_node;
50052 return NULL_TREE;
50053 }
50054 return std_canonical_va_list_type (type);
50055 }
50056
50057 /* Iterate through the target-specific builtin types for va_list.
50058 IDX denotes the iterator, *PTREE is set to the result type of
50059 the va_list builtin, and *PNAME to its internal type.
50060 Returns zero if there is no element for this index, otherwise
50061 IDX should be increased upon the next call.
50062 Note, do not iterate a base builtin's name like __builtin_va_list.
50063 Used from c_common_nodes_and_builtins. */
50064
50065 static int
50066 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50067 {
50068 if (TARGET_64BIT)
50069 {
50070 switch (idx)
50071 {
50072 default:
50073 break;
50074
50075 case 0:
50076 *ptree = ms_va_list_type_node;
50077 *pname = "__builtin_ms_va_list";
50078 return 1;
50079
50080 case 1:
50081 *ptree = sysv_va_list_type_node;
50082 *pname = "__builtin_sysv_va_list";
50083 return 1;
50084 }
50085 }
50086
50087 return 0;
50088 }
50089
50090 #undef TARGET_SCHED_DISPATCH
50091 #define TARGET_SCHED_DISPATCH has_dispatch
50092 #undef TARGET_SCHED_DISPATCH_DO
50093 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50094 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50095 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50096 #undef TARGET_SCHED_REORDER
50097 #define TARGET_SCHED_REORDER ix86_sched_reorder
50098 #undef TARGET_SCHED_ADJUST_PRIORITY
50099 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50100 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50101 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50102 ix86_dependencies_evaluation_hook
50103
50104 /* The size of the dispatch window is the total number of bytes of
50105 object code allowed in a window. */
50106 #define DISPATCH_WINDOW_SIZE 16
50107
50108 /* Number of dispatch windows considered for scheduling. */
50109 #define MAX_DISPATCH_WINDOWS 3
50110
50111 /* Maximum number of instructions in a window. */
50112 #define MAX_INSN 4
50113
50114 /* Maximum number of immediate operands in a window. */
50115 #define MAX_IMM 4
50116
50117 /* Maximum number of immediate bits allowed in a window. */
50118 #define MAX_IMM_SIZE 128
50119
50120 /* Maximum number of 32 bit immediates allowed in a window. */
50121 #define MAX_IMM_32 4
50122
50123 /* Maximum number of 64 bit immediates allowed in a window. */
50124 #define MAX_IMM_64 2
50125
50126 /* Maximum total of loads or prefetches allowed in a window. */
50127 #define MAX_LOAD 2
50128
50129 /* Maximum total of stores allowed in a window. */
50130 #define MAX_STORE 1
50131
50132 #undef BIG
50133 #define BIG 100
50134
50135
50136 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50137 enum dispatch_group {
50138 disp_no_group = 0,
50139 disp_load,
50140 disp_store,
50141 disp_load_store,
50142 disp_prefetch,
50143 disp_imm,
50144 disp_imm_32,
50145 disp_imm_64,
50146 disp_branch,
50147 disp_cmp,
50148 disp_jcc,
50149 disp_last
50150 };
50151
50152 /* Number of allowable groups in a dispatch window. It is an array
50153 indexed by dispatch_group enum. 100 is used as a big number,
50154 because the number of these kind of operations does not have any
50155 effect in dispatch window, but we need them for other reasons in
50156 the table. */
50157 static unsigned int num_allowable_groups[disp_last] = {
50158 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50159 };
50160
50161 char group_name[disp_last + 1][16] = {
50162 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50163 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50164 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50165 };
50166
50167 /* Instruction path. */
50168 enum insn_path {
50169 no_path = 0,
50170 path_single, /* Single micro op. */
50171 path_double, /* Double micro op. */
50172 path_multi, /* Instructions with more than 2 micro op.. */
50173 last_path
50174 };
50175
50176 /* sched_insn_info defines a window to the instructions scheduled in
50177 the basic block. It contains a pointer to the insn_info table and
50178 the instruction scheduled.
50179
50180 Windows are allocated for each basic block and are linked
50181 together. */
50182 typedef struct sched_insn_info_s {
50183 rtx insn;
50184 enum dispatch_group group;
50185 enum insn_path path;
50186 int byte_len;
50187 int imm_bytes;
50188 } sched_insn_info;
50189
50190 /* Linked list of dispatch windows. This is a two way list of
50191 dispatch windows of a basic block. It contains information about
50192 the number of uops in the window and the total number of
50193 instructions and of bytes in the object code for this dispatch
50194 window. */
50195 typedef struct dispatch_windows_s {
50196 int num_insn; /* Number of insn in the window. */
50197 int num_uops; /* Number of uops in the window. */
50198 int window_size; /* Number of bytes in the window. */
50199 int window_num; /* Window number between 0 or 1. */
50200 int num_imm; /* Number of immediates in an insn. */
50201 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50202 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50203 int imm_size; /* Total immediates in the window. */
50204 int num_loads; /* Total memory loads in the window. */
50205 int num_stores; /* Total memory stores in the window. */
50206 int violation; /* Violation exists in window. */
50207 sched_insn_info *window; /* Pointer to the window. */
50208 struct dispatch_windows_s *next;
50209 struct dispatch_windows_s *prev;
50210 } dispatch_windows;
50211
50212 /* Immediate valuse used in an insn. */
50213 typedef struct imm_info_s
50214 {
50215 int imm;
50216 int imm32;
50217 int imm64;
50218 } imm_info;
50219
50220 static dispatch_windows *dispatch_window_list;
50221 static dispatch_windows *dispatch_window_list1;
50222
50223 /* Get dispatch group of insn. */
50224
50225 static enum dispatch_group
50226 get_mem_group (rtx_insn *insn)
50227 {
50228 enum attr_memory memory;
50229
50230 if (INSN_CODE (insn) < 0)
50231 return disp_no_group;
50232 memory = get_attr_memory (insn);
50233 if (memory == MEMORY_STORE)
50234 return disp_store;
50235
50236 if (memory == MEMORY_LOAD)
50237 return disp_load;
50238
50239 if (memory == MEMORY_BOTH)
50240 return disp_load_store;
50241
50242 return disp_no_group;
50243 }
50244
50245 /* Return true if insn is a compare instruction. */
50246
50247 static bool
50248 is_cmp (rtx_insn *insn)
50249 {
50250 enum attr_type type;
50251
50252 type = get_attr_type (insn);
50253 return (type == TYPE_TEST
50254 || type == TYPE_ICMP
50255 || type == TYPE_FCMP
50256 || GET_CODE (PATTERN (insn)) == COMPARE);
50257 }
50258
50259 /* Return true if a dispatch violation encountered. */
50260
50261 static bool
50262 dispatch_violation (void)
50263 {
50264 if (dispatch_window_list->next)
50265 return dispatch_window_list->next->violation;
50266 return dispatch_window_list->violation;
50267 }
50268
50269 /* Return true if insn is a branch instruction. */
50270
50271 static bool
50272 is_branch (rtx insn)
50273 {
50274 return (CALL_P (insn) || JUMP_P (insn));
50275 }
50276
50277 /* Return true if insn is a prefetch instruction. */
50278
50279 static bool
50280 is_prefetch (rtx insn)
50281 {
50282 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50283 }
50284
50285 /* This function initializes a dispatch window and the list container holding a
50286 pointer to the window. */
50287
50288 static void
50289 init_window (int window_num)
50290 {
50291 int i;
50292 dispatch_windows *new_list;
50293
50294 if (window_num == 0)
50295 new_list = dispatch_window_list;
50296 else
50297 new_list = dispatch_window_list1;
50298
50299 new_list->num_insn = 0;
50300 new_list->num_uops = 0;
50301 new_list->window_size = 0;
50302 new_list->next = NULL;
50303 new_list->prev = NULL;
50304 new_list->window_num = window_num;
50305 new_list->num_imm = 0;
50306 new_list->num_imm_32 = 0;
50307 new_list->num_imm_64 = 0;
50308 new_list->imm_size = 0;
50309 new_list->num_loads = 0;
50310 new_list->num_stores = 0;
50311 new_list->violation = false;
50312
50313 for (i = 0; i < MAX_INSN; i++)
50314 {
50315 new_list->window[i].insn = NULL;
50316 new_list->window[i].group = disp_no_group;
50317 new_list->window[i].path = no_path;
50318 new_list->window[i].byte_len = 0;
50319 new_list->window[i].imm_bytes = 0;
50320 }
50321 return;
50322 }
50323
50324 /* This function allocates and initializes a dispatch window and the
50325 list container holding a pointer to the window. */
50326
50327 static dispatch_windows *
50328 allocate_window (void)
50329 {
50330 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50331 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50332
50333 return new_list;
50334 }
50335
50336 /* This routine initializes the dispatch scheduling information. It
50337 initiates building dispatch scheduler tables and constructs the
50338 first dispatch window. */
50339
50340 static void
50341 init_dispatch_sched (void)
50342 {
50343 /* Allocate a dispatch list and a window. */
50344 dispatch_window_list = allocate_window ();
50345 dispatch_window_list1 = allocate_window ();
50346 init_window (0);
50347 init_window (1);
50348 }
50349
50350 /* This function returns true if a branch is detected. End of a basic block
50351 does not have to be a branch, but here we assume only branches end a
50352 window. */
50353
50354 static bool
50355 is_end_basic_block (enum dispatch_group group)
50356 {
50357 return group == disp_branch;
50358 }
50359
50360 /* This function is called when the end of a window processing is reached. */
50361
50362 static void
50363 process_end_window (void)
50364 {
50365 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50366 if (dispatch_window_list->next)
50367 {
50368 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50369 gcc_assert (dispatch_window_list->window_size
50370 + dispatch_window_list1->window_size <= 48);
50371 init_window (1);
50372 }
50373 init_window (0);
50374 }
50375
50376 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50377 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50378 for 48 bytes of instructions. Note that these windows are not dispatch
50379 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50380
50381 static dispatch_windows *
50382 allocate_next_window (int window_num)
50383 {
50384 if (window_num == 0)
50385 {
50386 if (dispatch_window_list->next)
50387 init_window (1);
50388 init_window (0);
50389 return dispatch_window_list;
50390 }
50391
50392 dispatch_window_list->next = dispatch_window_list1;
50393 dispatch_window_list1->prev = dispatch_window_list;
50394
50395 return dispatch_window_list1;
50396 }
50397
50398 /* Compute number of immediate operands of an instruction. */
50399
50400 static void
50401 find_constant (rtx in_rtx, imm_info *imm_values)
50402 {
50403 if (INSN_P (in_rtx))
50404 in_rtx = PATTERN (in_rtx);
50405 subrtx_iterator::array_type array;
50406 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50407 if (const_rtx x = *iter)
50408 switch (GET_CODE (x))
50409 {
50410 case CONST:
50411 case SYMBOL_REF:
50412 case CONST_INT:
50413 (imm_values->imm)++;
50414 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50415 (imm_values->imm32)++;
50416 else
50417 (imm_values->imm64)++;
50418 break;
50419
50420 case CONST_DOUBLE:
50421 (imm_values->imm)++;
50422 (imm_values->imm64)++;
50423 break;
50424
50425 case CODE_LABEL:
50426 if (LABEL_KIND (x) == LABEL_NORMAL)
50427 {
50428 (imm_values->imm)++;
50429 (imm_values->imm32)++;
50430 }
50431 break;
50432
50433 default:
50434 break;
50435 }
50436 }
50437
50438 /* Return total size of immediate operands of an instruction along with number
50439 of corresponding immediate-operands. It initializes its parameters to zero
50440 befor calling FIND_CONSTANT.
50441 INSN is the input instruction. IMM is the total of immediates.
50442 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50443 bit immediates. */
50444
50445 static int
50446 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50447 {
50448 imm_info imm_values = {0, 0, 0};
50449
50450 find_constant (insn, &imm_values);
50451 *imm = imm_values.imm;
50452 *imm32 = imm_values.imm32;
50453 *imm64 = imm_values.imm64;
50454 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50455 }
50456
50457 /* This function indicates if an operand of an instruction is an
50458 immediate. */
50459
50460 static bool
50461 has_immediate (rtx insn)
50462 {
50463 int num_imm_operand;
50464 int num_imm32_operand;
50465 int num_imm64_operand;
50466
50467 if (insn)
50468 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50469 &num_imm64_operand);
50470 return false;
50471 }
50472
50473 /* Return single or double path for instructions. */
50474
50475 static enum insn_path
50476 get_insn_path (rtx_insn *insn)
50477 {
50478 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50479
50480 if ((int)path == 0)
50481 return path_single;
50482
50483 if ((int)path == 1)
50484 return path_double;
50485
50486 return path_multi;
50487 }
50488
50489 /* Return insn dispatch group. */
50490
50491 static enum dispatch_group
50492 get_insn_group (rtx_insn *insn)
50493 {
50494 enum dispatch_group group = get_mem_group (insn);
50495 if (group)
50496 return group;
50497
50498 if (is_branch (insn))
50499 return disp_branch;
50500
50501 if (is_cmp (insn))
50502 return disp_cmp;
50503
50504 if (has_immediate (insn))
50505 return disp_imm;
50506
50507 if (is_prefetch (insn))
50508 return disp_prefetch;
50509
50510 return disp_no_group;
50511 }
50512
50513 /* Count number of GROUP restricted instructions in a dispatch
50514 window WINDOW_LIST. */
50515
50516 static int
50517 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50518 {
50519 enum dispatch_group group = get_insn_group (insn);
50520 int imm_size;
50521 int num_imm_operand;
50522 int num_imm32_operand;
50523 int num_imm64_operand;
50524
50525 if (group == disp_no_group)
50526 return 0;
50527
50528 if (group == disp_imm)
50529 {
50530 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50531 &num_imm64_operand);
50532 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50533 || num_imm_operand + window_list->num_imm > MAX_IMM
50534 || (num_imm32_operand > 0
50535 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50536 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50537 || (num_imm64_operand > 0
50538 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50539 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50540 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50541 && num_imm64_operand > 0
50542 && ((window_list->num_imm_64 > 0
50543 && window_list->num_insn >= 2)
50544 || window_list->num_insn >= 3)))
50545 return BIG;
50546
50547 return 1;
50548 }
50549
50550 if ((group == disp_load_store
50551 && (window_list->num_loads >= MAX_LOAD
50552 || window_list->num_stores >= MAX_STORE))
50553 || ((group == disp_load
50554 || group == disp_prefetch)
50555 && window_list->num_loads >= MAX_LOAD)
50556 || (group == disp_store
50557 && window_list->num_stores >= MAX_STORE))
50558 return BIG;
50559
50560 return 1;
50561 }
50562
50563 /* This function returns true if insn satisfies dispatch rules on the
50564 last window scheduled. */
50565
50566 static bool
50567 fits_dispatch_window (rtx_insn *insn)
50568 {
50569 dispatch_windows *window_list = dispatch_window_list;
50570 dispatch_windows *window_list_next = dispatch_window_list->next;
50571 unsigned int num_restrict;
50572 enum dispatch_group group = get_insn_group (insn);
50573 enum insn_path path = get_insn_path (insn);
50574 int sum;
50575
50576 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50577 instructions should be given the lowest priority in the
50578 scheduling process in Haifa scheduler to make sure they will be
50579 scheduled in the same dispatch window as the reference to them. */
50580 if (group == disp_jcc || group == disp_cmp)
50581 return false;
50582
50583 /* Check nonrestricted. */
50584 if (group == disp_no_group || group == disp_branch)
50585 return true;
50586
50587 /* Get last dispatch window. */
50588 if (window_list_next)
50589 window_list = window_list_next;
50590
50591 if (window_list->window_num == 1)
50592 {
50593 sum = window_list->prev->window_size + window_list->window_size;
50594
50595 if (sum == 32
50596 || (min_insn_size (insn) + sum) >= 48)
50597 /* Window 1 is full. Go for next window. */
50598 return true;
50599 }
50600
50601 num_restrict = count_num_restricted (insn, window_list);
50602
50603 if (num_restrict > num_allowable_groups[group])
50604 return false;
50605
50606 /* See if it fits in the first window. */
50607 if (window_list->window_num == 0)
50608 {
50609 /* The first widow should have only single and double path
50610 uops. */
50611 if (path == path_double
50612 && (window_list->num_uops + 2) > MAX_INSN)
50613 return false;
50614 else if (path != path_single)
50615 return false;
50616 }
50617 return true;
50618 }
50619
50620 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50621 dispatch window WINDOW_LIST. */
50622
50623 static void
50624 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50625 {
50626 int byte_len = min_insn_size (insn);
50627 int num_insn = window_list->num_insn;
50628 int imm_size;
50629 sched_insn_info *window = window_list->window;
50630 enum dispatch_group group = get_insn_group (insn);
50631 enum insn_path path = get_insn_path (insn);
50632 int num_imm_operand;
50633 int num_imm32_operand;
50634 int num_imm64_operand;
50635
50636 if (!window_list->violation && group != disp_cmp
50637 && !fits_dispatch_window (insn))
50638 window_list->violation = true;
50639
50640 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50641 &num_imm64_operand);
50642
50643 /* Initialize window with new instruction. */
50644 window[num_insn].insn = insn;
50645 window[num_insn].byte_len = byte_len;
50646 window[num_insn].group = group;
50647 window[num_insn].path = path;
50648 window[num_insn].imm_bytes = imm_size;
50649
50650 window_list->window_size += byte_len;
50651 window_list->num_insn = num_insn + 1;
50652 window_list->num_uops = window_list->num_uops + num_uops;
50653 window_list->imm_size += imm_size;
50654 window_list->num_imm += num_imm_operand;
50655 window_list->num_imm_32 += num_imm32_operand;
50656 window_list->num_imm_64 += num_imm64_operand;
50657
50658 if (group == disp_store)
50659 window_list->num_stores += 1;
50660 else if (group == disp_load
50661 || group == disp_prefetch)
50662 window_list->num_loads += 1;
50663 else if (group == disp_load_store)
50664 {
50665 window_list->num_stores += 1;
50666 window_list->num_loads += 1;
50667 }
50668 }
50669
50670 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50671 If the total bytes of instructions or the number of instructions in
50672 the window exceed allowable, it allocates a new window. */
50673
50674 static void
50675 add_to_dispatch_window (rtx_insn *insn)
50676 {
50677 int byte_len;
50678 dispatch_windows *window_list;
50679 dispatch_windows *next_list;
50680 dispatch_windows *window0_list;
50681 enum insn_path path;
50682 enum dispatch_group insn_group;
50683 bool insn_fits;
50684 int num_insn;
50685 int num_uops;
50686 int window_num;
50687 int insn_num_uops;
50688 int sum;
50689
50690 if (INSN_CODE (insn) < 0)
50691 return;
50692
50693 byte_len = min_insn_size (insn);
50694 window_list = dispatch_window_list;
50695 next_list = window_list->next;
50696 path = get_insn_path (insn);
50697 insn_group = get_insn_group (insn);
50698
50699 /* Get the last dispatch window. */
50700 if (next_list)
50701 window_list = dispatch_window_list->next;
50702
50703 if (path == path_single)
50704 insn_num_uops = 1;
50705 else if (path == path_double)
50706 insn_num_uops = 2;
50707 else
50708 insn_num_uops = (int) path;
50709
50710 /* If current window is full, get a new window.
50711 Window number zero is full, if MAX_INSN uops are scheduled in it.
50712 Window number one is full, if window zero's bytes plus window
50713 one's bytes is 32, or if the bytes of the new instruction added
50714 to the total makes it greater than 48, or it has already MAX_INSN
50715 instructions in it. */
50716 num_insn = window_list->num_insn;
50717 num_uops = window_list->num_uops;
50718 window_num = window_list->window_num;
50719 insn_fits = fits_dispatch_window (insn);
50720
50721 if (num_insn >= MAX_INSN
50722 || num_uops + insn_num_uops > MAX_INSN
50723 || !(insn_fits))
50724 {
50725 window_num = ~window_num & 1;
50726 window_list = allocate_next_window (window_num);
50727 }
50728
50729 if (window_num == 0)
50730 {
50731 add_insn_window (insn, window_list, insn_num_uops);
50732 if (window_list->num_insn >= MAX_INSN
50733 && insn_group == disp_branch)
50734 {
50735 process_end_window ();
50736 return;
50737 }
50738 }
50739 else if (window_num == 1)
50740 {
50741 window0_list = window_list->prev;
50742 sum = window0_list->window_size + window_list->window_size;
50743 if (sum == 32
50744 || (byte_len + sum) >= 48)
50745 {
50746 process_end_window ();
50747 window_list = dispatch_window_list;
50748 }
50749
50750 add_insn_window (insn, window_list, insn_num_uops);
50751 }
50752 else
50753 gcc_unreachable ();
50754
50755 if (is_end_basic_block (insn_group))
50756 {
50757 /* End of basic block is reached do end-basic-block process. */
50758 process_end_window ();
50759 return;
50760 }
50761 }
50762
50763 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50764
50765 DEBUG_FUNCTION static void
50766 debug_dispatch_window_file (FILE *file, int window_num)
50767 {
50768 dispatch_windows *list;
50769 int i;
50770
50771 if (window_num == 0)
50772 list = dispatch_window_list;
50773 else
50774 list = dispatch_window_list1;
50775
50776 fprintf (file, "Window #%d:\n", list->window_num);
50777 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50778 list->num_insn, list->num_uops, list->window_size);
50779 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50780 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50781
50782 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50783 list->num_stores);
50784 fprintf (file, " insn info:\n");
50785
50786 for (i = 0; i < MAX_INSN; i++)
50787 {
50788 if (!list->window[i].insn)
50789 break;
50790 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50791 i, group_name[list->window[i].group],
50792 i, (void *)list->window[i].insn,
50793 i, list->window[i].path,
50794 i, list->window[i].byte_len,
50795 i, list->window[i].imm_bytes);
50796 }
50797 }
50798
50799 /* Print to stdout a dispatch window. */
50800
50801 DEBUG_FUNCTION void
50802 debug_dispatch_window (int window_num)
50803 {
50804 debug_dispatch_window_file (stdout, window_num);
50805 }
50806
50807 /* Print INSN dispatch information to FILE. */
50808
50809 DEBUG_FUNCTION static void
50810 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50811 {
50812 int byte_len;
50813 enum insn_path path;
50814 enum dispatch_group group;
50815 int imm_size;
50816 int num_imm_operand;
50817 int num_imm32_operand;
50818 int num_imm64_operand;
50819
50820 if (INSN_CODE (insn) < 0)
50821 return;
50822
50823 byte_len = min_insn_size (insn);
50824 path = get_insn_path (insn);
50825 group = get_insn_group (insn);
50826 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50827 &num_imm64_operand);
50828
50829 fprintf (file, " insn info:\n");
50830 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50831 group_name[group], path, byte_len);
50832 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50833 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50834 }
50835
50836 /* Print to STDERR the status of the ready list with respect to
50837 dispatch windows. */
50838
50839 DEBUG_FUNCTION void
50840 debug_ready_dispatch (void)
50841 {
50842 int i;
50843 int no_ready = number_in_ready ();
50844
50845 fprintf (stdout, "Number of ready: %d\n", no_ready);
50846
50847 for (i = 0; i < no_ready; i++)
50848 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50849 }
50850
50851 /* This routine is the driver of the dispatch scheduler. */
50852
50853 static void
50854 do_dispatch (rtx_insn *insn, int mode)
50855 {
50856 if (mode == DISPATCH_INIT)
50857 init_dispatch_sched ();
50858 else if (mode == ADD_TO_DISPATCH_WINDOW)
50859 add_to_dispatch_window (insn);
50860 }
50861
50862 /* Return TRUE if Dispatch Scheduling is supported. */
50863
50864 static bool
50865 has_dispatch (rtx_insn *insn, int action)
50866 {
50867 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50868 && flag_dispatch_scheduler)
50869 switch (action)
50870 {
50871 default:
50872 return false;
50873
50874 case IS_DISPATCH_ON:
50875 return true;
50876 break;
50877
50878 case IS_CMP:
50879 return is_cmp (insn);
50880
50881 case DISPATCH_VIOLATION:
50882 return dispatch_violation ();
50883
50884 case FITS_DISPATCH_WINDOW:
50885 return fits_dispatch_window (insn);
50886 }
50887
50888 return false;
50889 }
50890
50891 /* Implementation of reassociation_width target hook used by
50892 reassoc phase to identify parallelism level in reassociated
50893 tree. Statements tree_code is passed in OPC. Arguments type
50894 is passed in MODE.
50895
50896 Currently parallel reassociation is enabled for Atom
50897 processors only and we set reassociation width to be 2
50898 because Atom may issue up to 2 instructions per cycle.
50899
50900 Return value should be fixed if parallel reassociation is
50901 enabled for other processors. */
50902
50903 static int
50904 ix86_reassociation_width (unsigned int, machine_mode mode)
50905 {
50906 int res = 1;
50907
50908 /* Vector part. */
50909 if (VECTOR_MODE_P (mode))
50910 {
50911 if (TARGET_VECTOR_PARALLEL_EXECUTION)
50912 return 2;
50913 else
50914 return 1;
50915 }
50916
50917 /* Scalar part. */
50918 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
50919 res = 2;
50920 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
50921 res = 2;
50922
50923 return res;
50924 }
50925
50926 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50927 place emms and femms instructions. */
50928
50929 static machine_mode
50930 ix86_preferred_simd_mode (machine_mode mode)
50931 {
50932 if (!TARGET_SSE)
50933 return word_mode;
50934
50935 switch (mode)
50936 {
50937 case QImode:
50938 return TARGET_AVX512BW ? V64QImode :
50939 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
50940 case HImode:
50941 return TARGET_AVX512BW ? V32HImode :
50942 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
50943 case SImode:
50944 return TARGET_AVX512F ? V16SImode :
50945 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
50946 case DImode:
50947 return TARGET_AVX512F ? V8DImode :
50948 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
50949
50950 case SFmode:
50951 if (TARGET_AVX512F)
50952 return V16SFmode;
50953 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50954 return V8SFmode;
50955 else
50956 return V4SFmode;
50957
50958 case DFmode:
50959 if (!TARGET_VECTORIZE_DOUBLE)
50960 return word_mode;
50961 else if (TARGET_AVX512F)
50962 return V8DFmode;
50963 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50964 return V4DFmode;
50965 else if (TARGET_SSE2)
50966 return V2DFmode;
50967 /* FALLTHRU */
50968
50969 default:
50970 return word_mode;
50971 }
50972 }
50973
50974 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50975 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50976 256bit and 128bit vectors. */
50977
50978 static unsigned int
50979 ix86_autovectorize_vector_sizes (void)
50980 {
50981 return TARGET_AVX512F ? 64 | 32 | 16 :
50982 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
50983 }
50984
50985 \f
50986
50987 /* Return class of registers which could be used for pseudo of MODE
50988 and of class RCLASS for spilling instead of memory. Return NO_REGS
50989 if it is not possible or non-profitable. */
50990 static reg_class_t
50991 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50992 {
50993 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
50994 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50995 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
50996 return ALL_SSE_REGS;
50997 return NO_REGS;
50998 }
50999
51000 /* Implement targetm.vectorize.init_cost. */
51001
51002 static void *
51003 ix86_init_cost (struct loop *)
51004 {
51005 unsigned *cost = XNEWVEC (unsigned, 3);
51006 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51007 return cost;
51008 }
51009
51010 /* Implement targetm.vectorize.add_stmt_cost. */
51011
51012 static unsigned
51013 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51014 struct _stmt_vec_info *stmt_info, int misalign,
51015 enum vect_cost_model_location where)
51016 {
51017 unsigned *cost = (unsigned *) data;
51018 unsigned retval = 0;
51019
51020 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51021 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51022
51023 /* Statements in an inner loop relative to the loop being
51024 vectorized are weighted more heavily. The value here is
51025 arbitrary and could potentially be improved with analysis. */
51026 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51027 count *= 50; /* FIXME. */
51028
51029 retval = (unsigned) (count * stmt_cost);
51030
51031 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51032 for Silvermont as it has out of order integer pipeline and can execute
51033 2 scalar instruction per tick, but has in order SIMD pipeline. */
51034 if (TARGET_SILVERMONT || TARGET_INTEL)
51035 if (stmt_info && stmt_info->stmt)
51036 {
51037 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51038 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51039 retval = (retval * 17) / 10;
51040 }
51041
51042 cost[where] += retval;
51043
51044 return retval;
51045 }
51046
51047 /* Implement targetm.vectorize.finish_cost. */
51048
51049 static void
51050 ix86_finish_cost (void *data, unsigned *prologue_cost,
51051 unsigned *body_cost, unsigned *epilogue_cost)
51052 {
51053 unsigned *cost = (unsigned *) data;
51054 *prologue_cost = cost[vect_prologue];
51055 *body_cost = cost[vect_body];
51056 *epilogue_cost = cost[vect_epilogue];
51057 }
51058
51059 /* Implement targetm.vectorize.destroy_cost_data. */
51060
51061 static void
51062 ix86_destroy_cost_data (void *data)
51063 {
51064 free (data);
51065 }
51066
51067 /* Validate target specific memory model bits in VAL. */
51068
51069 static unsigned HOST_WIDE_INT
51070 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51071 {
51072 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51073 bool strong;
51074
51075 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51076 |MEMMODEL_MASK)
51077 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51078 {
51079 warning (OPT_Winvalid_memory_model,
51080 "Unknown architecture specific memory model");
51081 return MEMMODEL_SEQ_CST;
51082 }
51083 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51084 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51085 {
51086 warning (OPT_Winvalid_memory_model,
51087 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51088 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51089 }
51090 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51091 {
51092 warning (OPT_Winvalid_memory_model,
51093 "HLE_RELEASE not used with RELEASE or stronger memory model");
51094 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51095 }
51096 return val;
51097 }
51098
51099 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51100 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51101 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51102 or number of vecsize_mangle variants that should be emitted. */
51103
51104 static int
51105 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51106 struct cgraph_simd_clone *clonei,
51107 tree base_type, int num)
51108 {
51109 int ret = 1;
51110
51111 if (clonei->simdlen
51112 && (clonei->simdlen < 2
51113 || clonei->simdlen > 16
51114 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51115 {
51116 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51117 "unsupported simdlen %d", clonei->simdlen);
51118 return 0;
51119 }
51120
51121 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51122 if (TREE_CODE (ret_type) != VOID_TYPE)
51123 switch (TYPE_MODE (ret_type))
51124 {
51125 case QImode:
51126 case HImode:
51127 case SImode:
51128 case DImode:
51129 case SFmode:
51130 case DFmode:
51131 /* case SCmode: */
51132 /* case DCmode: */
51133 break;
51134 default:
51135 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51136 "unsupported return type %qT for simd\n", ret_type);
51137 return 0;
51138 }
51139
51140 tree t;
51141 int i;
51142
51143 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51144 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51145 switch (TYPE_MODE (TREE_TYPE (t)))
51146 {
51147 case QImode:
51148 case HImode:
51149 case SImode:
51150 case DImode:
51151 case SFmode:
51152 case DFmode:
51153 /* case SCmode: */
51154 /* case DCmode: */
51155 break;
51156 default:
51157 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51158 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51159 return 0;
51160 }
51161
51162 if (clonei->cilk_elemental)
51163 {
51164 /* Parse here processor clause. If not present, default to 'b'. */
51165 clonei->vecsize_mangle = 'b';
51166 }
51167 else if (!TREE_PUBLIC (node->decl))
51168 {
51169 /* If the function isn't exported, we can pick up just one ISA
51170 for the clones. */
51171 if (TARGET_AVX2)
51172 clonei->vecsize_mangle = 'd';
51173 else if (TARGET_AVX)
51174 clonei->vecsize_mangle = 'c';
51175 else
51176 clonei->vecsize_mangle = 'b';
51177 ret = 1;
51178 }
51179 else
51180 {
51181 clonei->vecsize_mangle = "bcd"[num];
51182 ret = 3;
51183 }
51184 switch (clonei->vecsize_mangle)
51185 {
51186 case 'b':
51187 clonei->vecsize_int = 128;
51188 clonei->vecsize_float = 128;
51189 break;
51190 case 'c':
51191 clonei->vecsize_int = 128;
51192 clonei->vecsize_float = 256;
51193 break;
51194 case 'd':
51195 clonei->vecsize_int = 256;
51196 clonei->vecsize_float = 256;
51197 break;
51198 }
51199 if (clonei->simdlen == 0)
51200 {
51201 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51202 clonei->simdlen = clonei->vecsize_int;
51203 else
51204 clonei->simdlen = clonei->vecsize_float;
51205 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51206 if (clonei->simdlen > 16)
51207 clonei->simdlen = 16;
51208 }
51209 return ret;
51210 }
51211
51212 /* Add target attribute to SIMD clone NODE if needed. */
51213
51214 static void
51215 ix86_simd_clone_adjust (struct cgraph_node *node)
51216 {
51217 const char *str = NULL;
51218 gcc_assert (node->decl == cfun->decl);
51219 switch (node->simdclone->vecsize_mangle)
51220 {
51221 case 'b':
51222 if (!TARGET_SSE2)
51223 str = "sse2";
51224 break;
51225 case 'c':
51226 if (!TARGET_AVX)
51227 str = "avx";
51228 break;
51229 case 'd':
51230 if (!TARGET_AVX2)
51231 str = "avx2";
51232 break;
51233 default:
51234 gcc_unreachable ();
51235 }
51236 if (str == NULL)
51237 return;
51238 push_cfun (NULL);
51239 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51240 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51241 gcc_assert (ok);
51242 pop_cfun ();
51243 ix86_reset_previous_fndecl ();
51244 ix86_set_current_function (node->decl);
51245 }
51246
51247 /* If SIMD clone NODE can't be used in a vectorized loop
51248 in current function, return -1, otherwise return a badness of using it
51249 (0 if it is most desirable from vecsize_mangle point of view, 1
51250 slightly less desirable, etc.). */
51251
51252 static int
51253 ix86_simd_clone_usable (struct cgraph_node *node)
51254 {
51255 switch (node->simdclone->vecsize_mangle)
51256 {
51257 case 'b':
51258 if (!TARGET_SSE2)
51259 return -1;
51260 if (!TARGET_AVX)
51261 return 0;
51262 return TARGET_AVX2 ? 2 : 1;
51263 case 'c':
51264 if (!TARGET_AVX)
51265 return -1;
51266 return TARGET_AVX2 ? 1 : 0;
51267 break;
51268 case 'd':
51269 if (!TARGET_AVX2)
51270 return -1;
51271 return 0;
51272 default:
51273 gcc_unreachable ();
51274 }
51275 }
51276
51277 /* This function adjusts the unroll factor based on
51278 the hardware capabilities. For ex, bdver3 has
51279 a loop buffer which makes unrolling of smaller
51280 loops less important. This function decides the
51281 unroll factor using number of memory references
51282 (value 32 is used) as a heuristic. */
51283
51284 static unsigned
51285 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51286 {
51287 basic_block *bbs;
51288 rtx_insn *insn;
51289 unsigned i;
51290 unsigned mem_count = 0;
51291
51292 if (!TARGET_ADJUST_UNROLL)
51293 return nunroll;
51294
51295 /* Count the number of memory references within the loop body.
51296 This value determines the unrolling factor for bdver3 and bdver4
51297 architectures. */
51298 subrtx_iterator::array_type array;
51299 bbs = get_loop_body (loop);
51300 for (i = 0; i < loop->num_nodes; i++)
51301 FOR_BB_INSNS (bbs[i], insn)
51302 if (NONDEBUG_INSN_P (insn))
51303 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51304 if (const_rtx x = *iter)
51305 if (MEM_P (x))
51306 {
51307 machine_mode mode = GET_MODE (x);
51308 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51309 if (n_words > 4)
51310 mem_count += 2;
51311 else
51312 mem_count += 1;
51313 }
51314 free (bbs);
51315
51316 if (mem_count && mem_count <=32)
51317 return 32/mem_count;
51318
51319 return nunroll;
51320 }
51321
51322
51323 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51324
51325 static bool
51326 ix86_float_exceptions_rounding_supported_p (void)
51327 {
51328 /* For x87 floating point with standard excess precision handling,
51329 there is no adddf3 pattern (since x87 floating point only has
51330 XFmode operations) so the default hook implementation gets this
51331 wrong. */
51332 return TARGET_80387 || TARGET_SSE_MATH;
51333 }
51334
51335 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51336
51337 static void
51338 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51339 {
51340 if (!TARGET_80387 && !TARGET_SSE_MATH)
51341 return;
51342 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
51343 if (TARGET_80387)
51344 {
51345 tree fenv_index_type = build_index_type (size_int (6));
51346 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51347 tree fenv_var = create_tmp_var (fenv_type, NULL);
51348 mark_addressable (fenv_var);
51349 tree fenv_ptr = build_pointer_type (fenv_type);
51350 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51351 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51352 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51353 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51354 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51355 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51356 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51357 tree hold_fnclex = build_call_expr (fnclex, 0);
51358 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51359 hold_fnclex);
51360 *clear = build_call_expr (fnclex, 0);
51361 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
51362 tree fnstsw_call = build_call_expr (fnstsw, 0);
51363 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51364 sw_var, fnstsw_call);
51365 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51366 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51367 exceptions_var, exceptions_x87);
51368 *update = build2 (COMPOUND_EXPR, integer_type_node,
51369 sw_mod, update_mod);
51370 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51371 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51372 }
51373 if (TARGET_SSE_MATH)
51374 {
51375 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
51376 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
51377 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51378 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51379 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51380 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51381 mxcsr_orig_var, stmxcsr_hold_call);
51382 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51383 mxcsr_orig_var,
51384 build_int_cst (unsigned_type_node, 0x1f80));
51385 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51386 build_int_cst (unsigned_type_node, 0xffffffc0));
51387 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51388 mxcsr_mod_var, hold_mod_val);
51389 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51390 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51391 hold_assign_orig, hold_assign_mod);
51392 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51393 ldmxcsr_hold_call);
51394 if (*hold)
51395 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51396 else
51397 *hold = hold_all;
51398 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51399 if (*clear)
51400 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51401 ldmxcsr_clear_call);
51402 else
51403 *clear = ldmxcsr_clear_call;
51404 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51405 tree exceptions_sse = fold_convert (integer_type_node,
51406 stxmcsr_update_call);
51407 if (*update)
51408 {
51409 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51410 exceptions_var, exceptions_sse);
51411 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51412 exceptions_var, exceptions_mod);
51413 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51414 exceptions_assign);
51415 }
51416 else
51417 *update = build2 (MODIFY_EXPR, integer_type_node,
51418 exceptions_var, exceptions_sse);
51419 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51420 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51421 ldmxcsr_update_call);
51422 }
51423 tree atomic_feraiseexcept
51424 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51425 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51426 1, exceptions_var);
51427 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51428 atomic_feraiseexcept_call);
51429 }
51430
51431 /* Return mode to be used for bounds or VOIDmode
51432 if bounds are not supported. */
51433
51434 static enum machine_mode
51435 ix86_mpx_bound_mode ()
51436 {
51437 /* Do not support pointer checker if MPX
51438 is not enabled. */
51439 if (!TARGET_MPX)
51440 {
51441 if (flag_check_pointer_bounds)
51442 warning (0, "Pointer Checker requires MPX support on this target."
51443 " Use -mmpx options to enable MPX.");
51444 return VOIDmode;
51445 }
51446
51447 return BNDmode;
51448 }
51449
51450 /* Return constant used to statically initialize constant bounds.
51451
51452 This function is used to create special bound values. For now
51453 only INIT bounds and NONE bounds are expected. More special
51454 values may be added later. */
51455
51456 static tree
51457 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51458 {
51459 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51460 : build_zero_cst (pointer_sized_int_node);
51461 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51462 : build_minus_one_cst (pointer_sized_int_node);
51463
51464 /* This function is supposed to be used to create INIT and
51465 NONE bounds only. */
51466 gcc_assert ((lb == 0 && ub == -1)
51467 || (lb == -1 && ub == 0));
51468
51469 return build_complex (NULL, low, high);
51470 }
51471
51472 /* Generate a list of statements STMTS to initialize pointer bounds
51473 variable VAR with bounds LB and UB. Return the number of generated
51474 statements. */
51475
51476 static int
51477 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51478 {
51479 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51480 tree lhs, modify, var_p;
51481
51482 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51483 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51484
51485 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51486 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51487 append_to_statement_list (modify, stmts);
51488
51489 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51490 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51491 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51492 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51493 append_to_statement_list (modify, stmts);
51494
51495 return 2;
51496 }
51497
51498 /* Initialize the GCC target structure. */
51499 #undef TARGET_RETURN_IN_MEMORY
51500 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51501
51502 #undef TARGET_LEGITIMIZE_ADDRESS
51503 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51504
51505 #undef TARGET_ATTRIBUTE_TABLE
51506 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51507 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51508 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51509 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51510 # undef TARGET_MERGE_DECL_ATTRIBUTES
51511 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51512 #endif
51513
51514 #undef TARGET_COMP_TYPE_ATTRIBUTES
51515 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51516
51517 #undef TARGET_INIT_BUILTINS
51518 #define TARGET_INIT_BUILTINS ix86_init_builtins
51519 #undef TARGET_BUILTIN_DECL
51520 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51521 #undef TARGET_EXPAND_BUILTIN
51522 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51523
51524 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51525 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51526 ix86_builtin_vectorized_function
51527
51528 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51529 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51530
51531 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51532 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51533
51534 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51535 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51536
51537 #undef TARGET_BUILTIN_RECIPROCAL
51538 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51539
51540 #undef TARGET_ASM_FUNCTION_EPILOGUE
51541 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51542
51543 #undef TARGET_ENCODE_SECTION_INFO
51544 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51545 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51546 #else
51547 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51548 #endif
51549
51550 #undef TARGET_ASM_OPEN_PAREN
51551 #define TARGET_ASM_OPEN_PAREN ""
51552 #undef TARGET_ASM_CLOSE_PAREN
51553 #define TARGET_ASM_CLOSE_PAREN ""
51554
51555 #undef TARGET_ASM_BYTE_OP
51556 #define TARGET_ASM_BYTE_OP ASM_BYTE
51557
51558 #undef TARGET_ASM_ALIGNED_HI_OP
51559 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51560 #undef TARGET_ASM_ALIGNED_SI_OP
51561 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51562 #ifdef ASM_QUAD
51563 #undef TARGET_ASM_ALIGNED_DI_OP
51564 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51565 #endif
51566
51567 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51568 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51569
51570 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51571 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51572
51573 #undef TARGET_ASM_UNALIGNED_HI_OP
51574 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51575 #undef TARGET_ASM_UNALIGNED_SI_OP
51576 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51577 #undef TARGET_ASM_UNALIGNED_DI_OP
51578 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51579
51580 #undef TARGET_PRINT_OPERAND
51581 #define TARGET_PRINT_OPERAND ix86_print_operand
51582 #undef TARGET_PRINT_OPERAND_ADDRESS
51583 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51584 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51585 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51586 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51587 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51588
51589 #undef TARGET_SCHED_INIT_GLOBAL
51590 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51591 #undef TARGET_SCHED_ADJUST_COST
51592 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51593 #undef TARGET_SCHED_ISSUE_RATE
51594 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51595 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51596 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51597 ia32_multipass_dfa_lookahead
51598 #undef TARGET_SCHED_MACRO_FUSION_P
51599 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51600 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51601 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51602
51603 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51604 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51605
51606 #undef TARGET_MEMMODEL_CHECK
51607 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51608
51609 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51610 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51611
51612 #ifdef HAVE_AS_TLS
51613 #undef TARGET_HAVE_TLS
51614 #define TARGET_HAVE_TLS true
51615 #endif
51616 #undef TARGET_CANNOT_FORCE_CONST_MEM
51617 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51618 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51619 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51620
51621 #undef TARGET_DELEGITIMIZE_ADDRESS
51622 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51623
51624 #undef TARGET_MS_BITFIELD_LAYOUT_P
51625 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51626
51627 #if TARGET_MACHO
51628 #undef TARGET_BINDS_LOCAL_P
51629 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51630 #endif
51631 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51632 #undef TARGET_BINDS_LOCAL_P
51633 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51634 #endif
51635
51636 #undef TARGET_ASM_OUTPUT_MI_THUNK
51637 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51638 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51639 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51640
51641 #undef TARGET_ASM_FILE_START
51642 #define TARGET_ASM_FILE_START x86_file_start
51643
51644 #undef TARGET_OPTION_OVERRIDE
51645 #define TARGET_OPTION_OVERRIDE ix86_option_override
51646
51647 #undef TARGET_REGISTER_MOVE_COST
51648 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51649 #undef TARGET_MEMORY_MOVE_COST
51650 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51651 #undef TARGET_RTX_COSTS
51652 #define TARGET_RTX_COSTS ix86_rtx_costs
51653 #undef TARGET_ADDRESS_COST
51654 #define TARGET_ADDRESS_COST ix86_address_cost
51655
51656 #undef TARGET_FIXED_CONDITION_CODE_REGS
51657 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51658 #undef TARGET_CC_MODES_COMPATIBLE
51659 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51660
51661 #undef TARGET_MACHINE_DEPENDENT_REORG
51662 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51663
51664 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51665 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51666
51667 #undef TARGET_BUILD_BUILTIN_VA_LIST
51668 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51669
51670 #undef TARGET_FOLD_BUILTIN
51671 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51672
51673 #undef TARGET_COMPARE_VERSION_PRIORITY
51674 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51675
51676 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51677 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51678 ix86_generate_version_dispatcher_body
51679
51680 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51681 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51682 ix86_get_function_versions_dispatcher
51683
51684 #undef TARGET_ENUM_VA_LIST_P
51685 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51686
51687 #undef TARGET_FN_ABI_VA_LIST
51688 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51689
51690 #undef TARGET_CANONICAL_VA_LIST_TYPE
51691 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51692
51693 #undef TARGET_EXPAND_BUILTIN_VA_START
51694 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51695
51696 #undef TARGET_MD_ASM_CLOBBERS
51697 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51698
51699 #undef TARGET_PROMOTE_PROTOTYPES
51700 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51701 #undef TARGET_SETUP_INCOMING_VARARGS
51702 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51703 #undef TARGET_MUST_PASS_IN_STACK
51704 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51705 #undef TARGET_FUNCTION_ARG_ADVANCE
51706 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51707 #undef TARGET_FUNCTION_ARG
51708 #define TARGET_FUNCTION_ARG ix86_function_arg
51709 #undef TARGET_INIT_PIC_REG
51710 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51711 #undef TARGET_USE_PSEUDO_PIC_REG
51712 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51713 #undef TARGET_FUNCTION_ARG_BOUNDARY
51714 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51715 #undef TARGET_PASS_BY_REFERENCE
51716 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51717 #undef TARGET_INTERNAL_ARG_POINTER
51718 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51719 #undef TARGET_UPDATE_STACK_BOUNDARY
51720 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51721 #undef TARGET_GET_DRAP_RTX
51722 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51723 #undef TARGET_STRICT_ARGUMENT_NAMING
51724 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51725 #undef TARGET_STATIC_CHAIN
51726 #define TARGET_STATIC_CHAIN ix86_static_chain
51727 #undef TARGET_TRAMPOLINE_INIT
51728 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51729 #undef TARGET_RETURN_POPS_ARGS
51730 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51731
51732 #undef TARGET_LEGITIMATE_COMBINED_INSN
51733 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51734
51735 #undef TARGET_ASAN_SHADOW_OFFSET
51736 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51737
51738 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51739 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51740
51741 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51742 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51743
51744 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51745 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51746
51747 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51748 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51749 ix86_libgcc_floating_mode_supported_p
51750
51751 #undef TARGET_C_MODE_FOR_SUFFIX
51752 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51753
51754 #ifdef HAVE_AS_TLS
51755 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51756 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51757 #endif
51758
51759 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51760 #undef TARGET_INSERT_ATTRIBUTES
51761 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51762 #endif
51763
51764 #undef TARGET_MANGLE_TYPE
51765 #define TARGET_MANGLE_TYPE ix86_mangle_type
51766
51767 #if !TARGET_MACHO
51768 #undef TARGET_STACK_PROTECT_FAIL
51769 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51770 #endif
51771
51772 #undef TARGET_FUNCTION_VALUE
51773 #define TARGET_FUNCTION_VALUE ix86_function_value
51774
51775 #undef TARGET_FUNCTION_VALUE_REGNO_P
51776 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51777
51778 #undef TARGET_PROMOTE_FUNCTION_MODE
51779 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51780
51781 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51782 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51783
51784 #undef TARGET_INSTANTIATE_DECLS
51785 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51786
51787 #undef TARGET_SECONDARY_RELOAD
51788 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51789
51790 #undef TARGET_CLASS_MAX_NREGS
51791 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51792
51793 #undef TARGET_PREFERRED_RELOAD_CLASS
51794 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51795 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51796 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51797 #undef TARGET_CLASS_LIKELY_SPILLED_P
51798 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51799
51800 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51801 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51802 ix86_builtin_vectorization_cost
51803 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51804 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51805 ix86_vectorize_vec_perm_const_ok
51806 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51807 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51808 ix86_preferred_simd_mode
51809 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51810 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51811 ix86_autovectorize_vector_sizes
51812 #undef TARGET_VECTORIZE_INIT_COST
51813 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51814 #undef TARGET_VECTORIZE_ADD_STMT_COST
51815 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51816 #undef TARGET_VECTORIZE_FINISH_COST
51817 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51818 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51819 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51820
51821 #undef TARGET_SET_CURRENT_FUNCTION
51822 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51823
51824 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51825 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51826
51827 #undef TARGET_OPTION_SAVE
51828 #define TARGET_OPTION_SAVE ix86_function_specific_save
51829
51830 #undef TARGET_OPTION_RESTORE
51831 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51832
51833 #undef TARGET_OPTION_PRINT
51834 #define TARGET_OPTION_PRINT ix86_function_specific_print
51835
51836 #undef TARGET_OPTION_FUNCTION_VERSIONS
51837 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51838
51839 #undef TARGET_CAN_INLINE_P
51840 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51841
51842 #undef TARGET_EXPAND_TO_RTL_HOOK
51843 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51844
51845 #undef TARGET_LEGITIMATE_ADDRESS_P
51846 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51847
51848 #undef TARGET_LRA_P
51849 #define TARGET_LRA_P hook_bool_void_true
51850
51851 #undef TARGET_REGISTER_PRIORITY
51852 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51853
51854 #undef TARGET_REGISTER_USAGE_LEVELING_P
51855 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51856
51857 #undef TARGET_LEGITIMATE_CONSTANT_P
51858 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51859
51860 #undef TARGET_FRAME_POINTER_REQUIRED
51861 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51862
51863 #undef TARGET_CAN_ELIMINATE
51864 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51865
51866 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51867 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51868
51869 #undef TARGET_ASM_CODE_END
51870 #define TARGET_ASM_CODE_END ix86_code_end
51871
51872 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51873 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51874
51875 #if TARGET_MACHO
51876 #undef TARGET_INIT_LIBFUNCS
51877 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
51878 #endif
51879
51880 #undef TARGET_LOOP_UNROLL_ADJUST
51881 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51882
51883 #undef TARGET_SPILL_CLASS
51884 #define TARGET_SPILL_CLASS ix86_spill_class
51885
51886 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51887 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51888 ix86_simd_clone_compute_vecsize_and_simdlen
51889
51890 #undef TARGET_SIMD_CLONE_ADJUST
51891 #define TARGET_SIMD_CLONE_ADJUST \
51892 ix86_simd_clone_adjust
51893
51894 #undef TARGET_SIMD_CLONE_USABLE
51895 #define TARGET_SIMD_CLONE_USABLE \
51896 ix86_simd_clone_usable
51897
51898 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51899 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51900 ix86_float_exceptions_rounding_supported_p
51901
51902 #undef TARGET_MODE_EMIT
51903 #define TARGET_MODE_EMIT ix86_emit_mode_set
51904
51905 #undef TARGET_MODE_NEEDED
51906 #define TARGET_MODE_NEEDED ix86_mode_needed
51907
51908 #undef TARGET_MODE_AFTER
51909 #define TARGET_MODE_AFTER ix86_mode_after
51910
51911 #undef TARGET_MODE_ENTRY
51912 #define TARGET_MODE_ENTRY ix86_mode_entry
51913
51914 #undef TARGET_MODE_EXIT
51915 #define TARGET_MODE_EXIT ix86_mode_exit
51916
51917 #undef TARGET_MODE_PRIORITY
51918 #define TARGET_MODE_PRIORITY ix86_mode_priority
51919
51920 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51921 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51922
51923 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51924 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51925
51926 #undef TARGET_STORE_BOUNDS_FOR_ARG
51927 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51928
51929 #undef TARGET_LOAD_RETURNED_BOUNDS
51930 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51931
51932 #undef TARGET_STORE_RETURNED_BOUNDS
51933 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51934
51935 #undef TARGET_CHKP_BOUND_MODE
51936 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51937
51938 #undef TARGET_BUILTIN_CHKP_FUNCTION
51939 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
51940
51941 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
51942 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
51943
51944 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
51945 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
51946
51947 #undef TARGET_CHKP_INITIALIZE_BOUNDS
51948 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
51949
51950 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51951 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51952
51953 #undef TARGET_OFFLOAD_OPTIONS
51954 #define TARGET_OFFLOAD_OPTIONS \
51955 ix86_offload_options
51956
51957 struct gcc_target targetm = TARGET_INITIALIZER;
51958 \f
51959 #include "gt-i386.h"