]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
PR target/72867
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "cgraph.h"
38 #include "diagnostic.h"
39 #include "cfgbuild.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "attribs.h"
43 #include "calls.h"
44 #include "stor-layout.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "except.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "gimplify.h"
57 #include "dwarf2.h"
58 #include "tm-constrs.h"
59 #include "params.h"
60 #include "cselib.h"
61 #include "sched-int.h"
62 #include "opts.h"
63 #include "tree-pass.h"
64 #include "context.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
70 #include "builtins.h"
71 #include "rtl-iter.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
74 #include "rtl-chkp.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
78 #include "dojump.h"
79
80 /* This file should be included last. */
81 #include "target-def.h"
82
83 static rtx legitimize_dllimport_symbol (rtx, bool);
84 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
85 static rtx legitimize_pe_coff_symbol (rtx, bool);
86 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
87
88 #ifndef CHECK_STACK_LIMIT
89 #define CHECK_STACK_LIMIT (-1)
90 #endif
91
92 /* Return index of given mode in mult and division cost tables. */
93 #define MODE_INDEX(mode) \
94 ((mode) == QImode ? 0 \
95 : (mode) == HImode ? 1 \
96 : (mode) == SImode ? 2 \
97 : (mode) == DImode ? 3 \
98 : 4)
99
100 /* Processor costs (relative to an add) */
101 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
102 #define COSTS_N_BYTES(N) ((N) * 2)
103
104 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
105
106 static stringop_algs ix86_size_memcpy[2] = {
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
109 static stringop_algs ix86_size_memset[2] = {
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
112
113 const
114 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
115 COSTS_N_BYTES (2), /* cost of an add instruction */
116 COSTS_N_BYTES (3), /* cost of a lea instruction */
117 COSTS_N_BYTES (2), /* variable shift costs */
118 COSTS_N_BYTES (3), /* constant shift costs */
119 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
120 COSTS_N_BYTES (3), /* HI */
121 COSTS_N_BYTES (3), /* SI */
122 COSTS_N_BYTES (3), /* DI */
123 COSTS_N_BYTES (5)}, /* other */
124 0, /* cost of multiply per each bit set */
125 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
126 COSTS_N_BYTES (3), /* HI */
127 COSTS_N_BYTES (3), /* SI */
128 COSTS_N_BYTES (3), /* DI */
129 COSTS_N_BYTES (5)}, /* other */
130 COSTS_N_BYTES (3), /* cost of movsx */
131 COSTS_N_BYTES (3), /* cost of movzx */
132 0, /* "large" insn */
133 2, /* MOVE_RATIO */
134 2, /* cost for loading QImode using movzbl */
135 {2, 2, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 2, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {2, 2, 2}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {2, 2, 2}, /* cost of storing fp registers
143 in SFmode, DFmode and XFmode */
144 3, /* cost of moving MMX register */
145 {3, 3}, /* cost of loading MMX registers
146 in SImode and DImode */
147 {3, 3}, /* cost of storing MMX registers
148 in SImode and DImode */
149 3, /* cost of moving SSE register */
150 {3, 3, 3}, /* cost of loading SSE registers
151 in SImode, DImode and TImode */
152 {3, 3, 3}, /* cost of storing SSE registers
153 in SImode, DImode and TImode */
154 3, /* MMX or SSE register to integer */
155 0, /* size of l1 cache */
156 0, /* size of l2 cache */
157 0, /* size of prefetch block */
158 0, /* number of parallel prefetches */
159 2, /* Branch cost */
160 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
161 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
162 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
163 COSTS_N_BYTES (2), /* cost of FABS instruction. */
164 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
165 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
166 ix86_size_memcpy,
167 ix86_size_memset,
168 1, /* scalar_stmt_cost. */
169 1, /* scalar load_cost. */
170 1, /* scalar_store_cost. */
171 1, /* vec_stmt_cost. */
172 1, /* vec_to_scalar_cost. */
173 1, /* scalar_to_vec_cost. */
174 1, /* vec_align_load_cost. */
175 1, /* vec_unalign_load_cost. */
176 1, /* vec_store_cost. */
177 1, /* cond_taken_branch_cost. */
178 1, /* cond_not_taken_branch_cost. */
179 };
180
181 /* Processor costs (relative to an add) */
182 static stringop_algs i386_memcpy[2] = {
183 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
184 DUMMY_STRINGOP_ALGS};
185 static stringop_algs i386_memset[2] = {
186 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
187 DUMMY_STRINGOP_ALGS};
188
189 static const
190 struct processor_costs i386_cost = { /* 386 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (6), /* HI */
197 COSTS_N_INSNS (6), /* SI */
198 COSTS_N_INSNS (6), /* DI */
199 COSTS_N_INSNS (6)}, /* other */
200 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (23), /* HI */
203 COSTS_N_INSNS (23), /* SI */
204 COSTS_N_INSNS (23), /* DI */
205 COSTS_N_INSNS (23)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
209 3, /* MOVE_RATIO */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of l1 cache */
232 0, /* size of l2 cache */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
235 1, /* Branch cost */
236 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
237 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
238 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
239 COSTS_N_INSNS (22), /* cost of FABS instruction. */
240 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
241 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
242 i386_memcpy,
243 i386_memset,
244 1, /* scalar_stmt_cost. */
245 1, /* scalar load_cost. */
246 1, /* scalar_store_cost. */
247 1, /* vec_stmt_cost. */
248 1, /* vec_to_scalar_cost. */
249 1, /* scalar_to_vec_cost. */
250 1, /* vec_align_load_cost. */
251 2, /* vec_unalign_load_cost. */
252 1, /* vec_store_cost. */
253 3, /* cond_taken_branch_cost. */
254 1, /* cond_not_taken_branch_cost. */
255 };
256
257 static stringop_algs i486_memcpy[2] = {
258 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
259 DUMMY_STRINGOP_ALGS};
260 static stringop_algs i486_memset[2] = {
261 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
262 DUMMY_STRINGOP_ALGS};
263
264 static const
265 struct processor_costs i486_cost = { /* 486 specific costs */
266 COSTS_N_INSNS (1), /* cost of an add instruction */
267 COSTS_N_INSNS (1), /* cost of a lea instruction */
268 COSTS_N_INSNS (3), /* variable shift costs */
269 COSTS_N_INSNS (2), /* constant shift costs */
270 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
271 COSTS_N_INSNS (12), /* HI */
272 COSTS_N_INSNS (12), /* SI */
273 COSTS_N_INSNS (12), /* DI */
274 COSTS_N_INSNS (12)}, /* other */
275 1, /* cost of multiply per each bit set */
276 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
277 COSTS_N_INSNS (40), /* HI */
278 COSTS_N_INSNS (40), /* SI */
279 COSTS_N_INSNS (40), /* DI */
280 COSTS_N_INSNS (40)}, /* other */
281 COSTS_N_INSNS (3), /* cost of movsx */
282 COSTS_N_INSNS (2), /* cost of movzx */
283 15, /* "large" insn */
284 3, /* MOVE_RATIO */
285 4, /* cost for loading QImode using movzbl */
286 {2, 4, 2}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 4, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {8, 8, 8}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {8, 8, 8}, /* cost of storing fp registers
294 in SFmode, DFmode and XFmode */
295 2, /* cost of moving MMX register */
296 {4, 8}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {4, 8}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {4, 8, 16}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {4, 8, 16}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 3, /* MMX or SSE register to integer */
306 4, /* size of l1 cache. 486 has 8kB cache
307 shared for code and data, so 4kB is
308 not really precise. */
309 4, /* size of l2 cache */
310 0, /* size of prefetch block */
311 0, /* number of parallel prefetches */
312 1, /* Branch cost */
313 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
314 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
315 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
316 COSTS_N_INSNS (3), /* cost of FABS instruction. */
317 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
318 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
319 i486_memcpy,
320 i486_memset,
321 1, /* scalar_stmt_cost. */
322 1, /* scalar load_cost. */
323 1, /* scalar_store_cost. */
324 1, /* vec_stmt_cost. */
325 1, /* vec_to_scalar_cost. */
326 1, /* scalar_to_vec_cost. */
327 1, /* vec_align_load_cost. */
328 2, /* vec_unalign_load_cost. */
329 1, /* vec_store_cost. */
330 3, /* cond_taken_branch_cost. */
331 1, /* cond_not_taken_branch_cost. */
332 };
333
334 static stringop_algs pentium_memcpy[2] = {
335 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
336 DUMMY_STRINGOP_ALGS};
337 static stringop_algs pentium_memset[2] = {
338 {libcall, {{-1, rep_prefix_4_byte, false}}},
339 DUMMY_STRINGOP_ALGS};
340
341 static const
342 struct processor_costs pentium_cost = {
343 COSTS_N_INSNS (1), /* cost of an add instruction */
344 COSTS_N_INSNS (1), /* cost of a lea instruction */
345 COSTS_N_INSNS (4), /* variable shift costs */
346 COSTS_N_INSNS (1), /* constant shift costs */
347 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
348 COSTS_N_INSNS (11), /* HI */
349 COSTS_N_INSNS (11), /* SI */
350 COSTS_N_INSNS (11), /* DI */
351 COSTS_N_INSNS (11)}, /* other */
352 0, /* cost of multiply per each bit set */
353 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
354 COSTS_N_INSNS (25), /* HI */
355 COSTS_N_INSNS (25), /* SI */
356 COSTS_N_INSNS (25), /* DI */
357 COSTS_N_INSNS (25)}, /* other */
358 COSTS_N_INSNS (3), /* cost of movsx */
359 COSTS_N_INSNS (2), /* cost of movzx */
360 8, /* "large" insn */
361 6, /* MOVE_RATIO */
362 6, /* cost for loading QImode using movzbl */
363 {2, 4, 2}, /* cost of loading integer registers
364 in QImode, HImode and SImode.
365 Relative to reg-reg move (2). */
366 {2, 4, 2}, /* cost of storing integer registers */
367 2, /* cost of reg,reg fld/fst */
368 {2, 2, 6}, /* cost of loading fp registers
369 in SFmode, DFmode and XFmode */
370 {4, 4, 6}, /* cost of storing fp registers
371 in SFmode, DFmode and XFmode */
372 8, /* cost of moving MMX register */
373 {8, 8}, /* cost of loading MMX registers
374 in SImode and DImode */
375 {8, 8}, /* cost of storing MMX registers
376 in SImode and DImode */
377 2, /* cost of moving SSE register */
378 {4, 8, 16}, /* cost of loading SSE registers
379 in SImode, DImode and TImode */
380 {4, 8, 16}, /* cost of storing SSE registers
381 in SImode, DImode and TImode */
382 3, /* MMX or SSE register to integer */
383 8, /* size of l1 cache. */
384 8, /* size of l2 cache */
385 0, /* size of prefetch block */
386 0, /* number of parallel prefetches */
387 2, /* Branch cost */
388 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
389 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
390 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
391 COSTS_N_INSNS (1), /* cost of FABS instruction. */
392 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
393 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
394 pentium_memcpy,
395 pentium_memset,
396 1, /* scalar_stmt_cost. */
397 1, /* scalar load_cost. */
398 1, /* scalar_store_cost. */
399 1, /* vec_stmt_cost. */
400 1, /* vec_to_scalar_cost. */
401 1, /* scalar_to_vec_cost. */
402 1, /* vec_align_load_cost. */
403 2, /* vec_unalign_load_cost. */
404 1, /* vec_store_cost. */
405 3, /* cond_taken_branch_cost. */
406 1, /* cond_not_taken_branch_cost. */
407 };
408
409 static const
410 struct processor_costs lakemont_cost = {
411 COSTS_N_INSNS (1), /* cost of an add instruction */
412 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
413 COSTS_N_INSNS (1), /* variable shift costs */
414 COSTS_N_INSNS (1), /* constant shift costs */
415 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
416 COSTS_N_INSNS (11), /* HI */
417 COSTS_N_INSNS (11), /* SI */
418 COSTS_N_INSNS (11), /* DI */
419 COSTS_N_INSNS (11)}, /* other */
420 0, /* cost of multiply per each bit set */
421 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
422 COSTS_N_INSNS (25), /* HI */
423 COSTS_N_INSNS (25), /* SI */
424 COSTS_N_INSNS (25), /* DI */
425 COSTS_N_INSNS (25)}, /* other */
426 COSTS_N_INSNS (3), /* cost of movsx */
427 COSTS_N_INSNS (2), /* cost of movzx */
428 8, /* "large" insn */
429 9, /* MOVE_RATIO */
430 6, /* cost for loading QImode using movzbl */
431 {2, 4, 2}, /* cost of loading integer registers
432 in QImode, HImode and SImode.
433 Relative to reg-reg move (2). */
434 {2, 4, 2}, /* cost of storing integer registers */
435 2, /* cost of reg,reg fld/fst */
436 {2, 2, 6}, /* cost of loading fp registers
437 in SFmode, DFmode and XFmode */
438 {4, 4, 6}, /* cost of storing fp registers
439 in SFmode, DFmode and XFmode */
440 8, /* cost of moving MMX register */
441 {8, 8}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {8, 8}, /* cost of storing MMX registers
444 in SImode and DImode */
445 2, /* cost of moving SSE register */
446 {4, 8, 16}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {4, 8, 16}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 3, /* MMX or SSE register to integer */
451 8, /* size of l1 cache. */
452 8, /* size of l2 cache */
453 0, /* size of prefetch block */
454 0, /* number of parallel prefetches */
455 2, /* Branch cost */
456 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
457 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
458 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
459 COSTS_N_INSNS (1), /* cost of FABS instruction. */
460 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
461 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
462 pentium_memcpy,
463 pentium_memset,
464 1, /* scalar_stmt_cost. */
465 1, /* scalar load_cost. */
466 1, /* scalar_store_cost. */
467 1, /* vec_stmt_cost. */
468 1, /* vec_to_scalar_cost. */
469 1, /* scalar_to_vec_cost. */
470 1, /* vec_align_load_cost. */
471 2, /* vec_unalign_load_cost. */
472 1, /* vec_store_cost. */
473 3, /* cond_taken_branch_cost. */
474 1, /* cond_not_taken_branch_cost. */
475 };
476
477 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
478 (we ensure the alignment). For small blocks inline loop is still a
479 noticeable win, for bigger blocks either rep movsl or rep movsb is
480 way to go. Rep movsb has apparently more expensive startup time in CPU,
481 but after 4K the difference is down in the noise. */
482 static stringop_algs pentiumpro_memcpy[2] = {
483 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
484 {8192, rep_prefix_4_byte, false},
485 {-1, rep_prefix_1_byte, false}}},
486 DUMMY_STRINGOP_ALGS};
487 static stringop_algs pentiumpro_memset[2] = {
488 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
489 {8192, rep_prefix_4_byte, false},
490 {-1, libcall, false}}},
491 DUMMY_STRINGOP_ALGS};
492 static const
493 struct processor_costs pentiumpro_cost = {
494 COSTS_N_INSNS (1), /* cost of an add instruction */
495 COSTS_N_INSNS (1), /* cost of a lea instruction */
496 COSTS_N_INSNS (1), /* variable shift costs */
497 COSTS_N_INSNS (1), /* constant shift costs */
498 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
499 COSTS_N_INSNS (4), /* HI */
500 COSTS_N_INSNS (4), /* SI */
501 COSTS_N_INSNS (4), /* DI */
502 COSTS_N_INSNS (4)}, /* other */
503 0, /* cost of multiply per each bit set */
504 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
505 COSTS_N_INSNS (17), /* HI */
506 COSTS_N_INSNS (17), /* SI */
507 COSTS_N_INSNS (17), /* DI */
508 COSTS_N_INSNS (17)}, /* other */
509 COSTS_N_INSNS (1), /* cost of movsx */
510 COSTS_N_INSNS (1), /* cost of movzx */
511 8, /* "large" insn */
512 6, /* MOVE_RATIO */
513 2, /* cost for loading QImode using movzbl */
514 {4, 4, 4}, /* cost of loading integer registers
515 in QImode, HImode and SImode.
516 Relative to reg-reg move (2). */
517 {2, 2, 2}, /* cost of storing integer registers */
518 2, /* cost of reg,reg fld/fst */
519 {2, 2, 6}, /* cost of loading fp registers
520 in SFmode, DFmode and XFmode */
521 {4, 4, 6}, /* cost of storing fp registers
522 in SFmode, DFmode and XFmode */
523 2, /* cost of moving MMX register */
524 {2, 2}, /* cost of loading MMX registers
525 in SImode and DImode */
526 {2, 2}, /* cost of storing MMX registers
527 in SImode and DImode */
528 2, /* cost of moving SSE register */
529 {2, 2, 8}, /* cost of loading SSE registers
530 in SImode, DImode and TImode */
531 {2, 2, 8}, /* cost of storing SSE registers
532 in SImode, DImode and TImode */
533 3, /* MMX or SSE register to integer */
534 8, /* size of l1 cache. */
535 256, /* size of l2 cache */
536 32, /* size of prefetch block */
537 6, /* number of parallel prefetches */
538 2, /* Branch cost */
539 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
540 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
541 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
542 COSTS_N_INSNS (2), /* cost of FABS instruction. */
543 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
544 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
545 pentiumpro_memcpy,
546 pentiumpro_memset,
547 1, /* scalar_stmt_cost. */
548 1, /* scalar load_cost. */
549 1, /* scalar_store_cost. */
550 1, /* vec_stmt_cost. */
551 1, /* vec_to_scalar_cost. */
552 1, /* scalar_to_vec_cost. */
553 1, /* vec_align_load_cost. */
554 2, /* vec_unalign_load_cost. */
555 1, /* vec_store_cost. */
556 3, /* cond_taken_branch_cost. */
557 1, /* cond_not_taken_branch_cost. */
558 };
559
560 static stringop_algs geode_memcpy[2] = {
561 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
562 DUMMY_STRINGOP_ALGS};
563 static stringop_algs geode_memset[2] = {
564 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
565 DUMMY_STRINGOP_ALGS};
566 static const
567 struct processor_costs geode_cost = {
568 COSTS_N_INSNS (1), /* cost of an add instruction */
569 COSTS_N_INSNS (1), /* cost of a lea instruction */
570 COSTS_N_INSNS (2), /* variable shift costs */
571 COSTS_N_INSNS (1), /* constant shift costs */
572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
573 COSTS_N_INSNS (4), /* HI */
574 COSTS_N_INSNS (7), /* SI */
575 COSTS_N_INSNS (7), /* DI */
576 COSTS_N_INSNS (7)}, /* other */
577 0, /* cost of multiply per each bit set */
578 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
579 COSTS_N_INSNS (23), /* HI */
580 COSTS_N_INSNS (39), /* SI */
581 COSTS_N_INSNS (39), /* DI */
582 COSTS_N_INSNS (39)}, /* other */
583 COSTS_N_INSNS (1), /* cost of movsx */
584 COSTS_N_INSNS (1), /* cost of movzx */
585 8, /* "large" insn */
586 4, /* MOVE_RATIO */
587 1, /* cost for loading QImode using movzbl */
588 {1, 1, 1}, /* cost of loading integer registers
589 in QImode, HImode and SImode.
590 Relative to reg-reg move (2). */
591 {1, 1, 1}, /* cost of storing integer registers */
592 1, /* cost of reg,reg fld/fst */
593 {1, 1, 1}, /* cost of loading fp registers
594 in SFmode, DFmode and XFmode */
595 {4, 6, 6}, /* cost of storing fp registers
596 in SFmode, DFmode and XFmode */
597
598 2, /* cost of moving MMX register */
599 {2, 2}, /* cost of loading MMX registers
600 in SImode and DImode */
601 {2, 2}, /* cost of storing MMX registers
602 in SImode and DImode */
603 2, /* cost of moving SSE register */
604 {2, 2, 8}, /* cost of loading SSE registers
605 in SImode, DImode and TImode */
606 {2, 2, 8}, /* cost of storing SSE registers
607 in SImode, DImode and TImode */
608 3, /* MMX or SSE register to integer */
609 64, /* size of l1 cache. */
610 128, /* size of l2 cache. */
611 32, /* size of prefetch block */
612 1, /* number of parallel prefetches */
613 1, /* Branch cost */
614 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (1), /* cost of FABS instruction. */
618 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
620 geode_memcpy,
621 geode_memset,
622 1, /* scalar_stmt_cost. */
623 1, /* scalar load_cost. */
624 1, /* scalar_store_cost. */
625 1, /* vec_stmt_cost. */
626 1, /* vec_to_scalar_cost. */
627 1, /* scalar_to_vec_cost. */
628 1, /* vec_align_load_cost. */
629 2, /* vec_unalign_load_cost. */
630 1, /* vec_store_cost. */
631 3, /* cond_taken_branch_cost. */
632 1, /* cond_not_taken_branch_cost. */
633 };
634
635 static stringop_algs k6_memcpy[2] = {
636 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
637 DUMMY_STRINGOP_ALGS};
638 static stringop_algs k6_memset[2] = {
639 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
640 DUMMY_STRINGOP_ALGS};
641 static const
642 struct processor_costs k6_cost = {
643 COSTS_N_INSNS (1), /* cost of an add instruction */
644 COSTS_N_INSNS (2), /* cost of a lea instruction */
645 COSTS_N_INSNS (1), /* variable shift costs */
646 COSTS_N_INSNS (1), /* constant shift costs */
647 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
648 COSTS_N_INSNS (3), /* HI */
649 COSTS_N_INSNS (3), /* SI */
650 COSTS_N_INSNS (3), /* DI */
651 COSTS_N_INSNS (3)}, /* other */
652 0, /* cost of multiply per each bit set */
653 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
654 COSTS_N_INSNS (18), /* HI */
655 COSTS_N_INSNS (18), /* SI */
656 COSTS_N_INSNS (18), /* DI */
657 COSTS_N_INSNS (18)}, /* other */
658 COSTS_N_INSNS (2), /* cost of movsx */
659 COSTS_N_INSNS (2), /* cost of movzx */
660 8, /* "large" insn */
661 4, /* MOVE_RATIO */
662 3, /* cost for loading QImode using movzbl */
663 {4, 5, 4}, /* cost of loading integer registers
664 in QImode, HImode and SImode.
665 Relative to reg-reg move (2). */
666 {2, 3, 2}, /* cost of storing integer registers */
667 4, /* cost of reg,reg fld/fst */
668 {6, 6, 6}, /* cost of loading fp registers
669 in SFmode, DFmode and XFmode */
670 {4, 4, 4}, /* cost of storing fp registers
671 in SFmode, DFmode and XFmode */
672 2, /* cost of moving MMX register */
673 {2, 2}, /* cost of loading MMX registers
674 in SImode and DImode */
675 {2, 2}, /* cost of storing MMX registers
676 in SImode and DImode */
677 2, /* cost of moving SSE register */
678 {2, 2, 8}, /* cost of loading SSE registers
679 in SImode, DImode and TImode */
680 {2, 2, 8}, /* cost of storing SSE registers
681 in SImode, DImode and TImode */
682 6, /* MMX or SSE register to integer */
683 32, /* size of l1 cache. */
684 32, /* size of l2 cache. Some models
685 have integrated l2 cache, but
686 optimizing for k6 is not important
687 enough to worry about that. */
688 32, /* size of prefetch block */
689 1, /* number of parallel prefetches */
690 1, /* Branch cost */
691 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
692 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
693 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
694 COSTS_N_INSNS (2), /* cost of FABS instruction. */
695 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
696 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
697 k6_memcpy,
698 k6_memset,
699 1, /* scalar_stmt_cost. */
700 1, /* scalar load_cost. */
701 1, /* scalar_store_cost. */
702 1, /* vec_stmt_cost. */
703 1, /* vec_to_scalar_cost. */
704 1, /* scalar_to_vec_cost. */
705 1, /* vec_align_load_cost. */
706 2, /* vec_unalign_load_cost. */
707 1, /* vec_store_cost. */
708 3, /* cond_taken_branch_cost. */
709 1, /* cond_not_taken_branch_cost. */
710 };
711
712 /* For some reason, Athlon deals better with REP prefix (relative to loops)
713 compared to K8. Alignment becomes important after 8 bytes for memcpy and
714 128 bytes for memset. */
715 static stringop_algs athlon_memcpy[2] = {
716 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
717 DUMMY_STRINGOP_ALGS};
718 static stringop_algs athlon_memset[2] = {
719 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
720 DUMMY_STRINGOP_ALGS};
721 static const
722 struct processor_costs athlon_cost = {
723 COSTS_N_INSNS (1), /* cost of an add instruction */
724 COSTS_N_INSNS (2), /* cost of a lea instruction */
725 COSTS_N_INSNS (1), /* variable shift costs */
726 COSTS_N_INSNS (1), /* constant shift costs */
727 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
728 COSTS_N_INSNS (5), /* HI */
729 COSTS_N_INSNS (5), /* SI */
730 COSTS_N_INSNS (5), /* DI */
731 COSTS_N_INSNS (5)}, /* other */
732 0, /* cost of multiply per each bit set */
733 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
734 COSTS_N_INSNS (26), /* HI */
735 COSTS_N_INSNS (42), /* SI */
736 COSTS_N_INSNS (74), /* DI */
737 COSTS_N_INSNS (74)}, /* other */
738 COSTS_N_INSNS (1), /* cost of movsx */
739 COSTS_N_INSNS (1), /* cost of movzx */
740 8, /* "large" insn */
741 9, /* MOVE_RATIO */
742 4, /* cost for loading QImode using movzbl */
743 {3, 4, 3}, /* cost of loading integer registers
744 in QImode, HImode and SImode.
745 Relative to reg-reg move (2). */
746 {3, 4, 3}, /* cost of storing integer registers */
747 4, /* cost of reg,reg fld/fst */
748 {4, 4, 12}, /* cost of loading fp registers
749 in SFmode, DFmode and XFmode */
750 {6, 6, 8}, /* cost of storing fp registers
751 in SFmode, DFmode and XFmode */
752 2, /* cost of moving MMX register */
753 {4, 4}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {4, 4, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 5}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 5, /* MMX or SSE register to integer */
763 64, /* size of l1 cache. */
764 256, /* size of l2 cache. */
765 64, /* size of prefetch block */
766 6, /* number of parallel prefetches */
767 5, /* Branch cost */
768 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
769 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
770 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
771 COSTS_N_INSNS (2), /* cost of FABS instruction. */
772 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
773 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
774 athlon_memcpy,
775 athlon_memset,
776 1, /* scalar_stmt_cost. */
777 1, /* scalar load_cost. */
778 1, /* scalar_store_cost. */
779 1, /* vec_stmt_cost. */
780 1, /* vec_to_scalar_cost. */
781 1, /* scalar_to_vec_cost. */
782 1, /* vec_align_load_cost. */
783 2, /* vec_unalign_load_cost. */
784 1, /* vec_store_cost. */
785 3, /* cond_taken_branch_cost. */
786 1, /* cond_not_taken_branch_cost. */
787 };
788
789 /* K8 has optimized REP instruction for medium sized blocks, but for very
790 small blocks it is better to use loop. For large blocks, libcall can
791 do nontemporary accesses and beat inline considerably. */
792 static stringop_algs k8_memcpy[2] = {
793 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
794 {-1, rep_prefix_4_byte, false}}},
795 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
796 {-1, libcall, false}}}};
797 static stringop_algs k8_memset[2] = {
798 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
799 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
800 {libcall, {{48, unrolled_loop, false},
801 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
802 static const
803 struct processor_costs k8_cost = {
804 COSTS_N_INSNS (1), /* cost of an add instruction */
805 COSTS_N_INSNS (2), /* cost of a lea instruction */
806 COSTS_N_INSNS (1), /* variable shift costs */
807 COSTS_N_INSNS (1), /* constant shift costs */
808 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
809 COSTS_N_INSNS (4), /* HI */
810 COSTS_N_INSNS (3), /* SI */
811 COSTS_N_INSNS (4), /* DI */
812 COSTS_N_INSNS (5)}, /* other */
813 0, /* cost of multiply per each bit set */
814 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
815 COSTS_N_INSNS (26), /* HI */
816 COSTS_N_INSNS (42), /* SI */
817 COSTS_N_INSNS (74), /* DI */
818 COSTS_N_INSNS (74)}, /* other */
819 COSTS_N_INSNS (1), /* cost of movsx */
820 COSTS_N_INSNS (1), /* cost of movzx */
821 8, /* "large" insn */
822 9, /* MOVE_RATIO */
823 4, /* cost for loading QImode using movzbl */
824 {3, 4, 3}, /* cost of loading integer registers
825 in QImode, HImode and SImode.
826 Relative to reg-reg move (2). */
827 {3, 4, 3}, /* cost of storing integer registers */
828 4, /* cost of reg,reg fld/fst */
829 {4, 4, 12}, /* cost of loading fp registers
830 in SFmode, DFmode and XFmode */
831 {6, 6, 8}, /* cost of storing fp registers
832 in SFmode, DFmode and XFmode */
833 2, /* cost of moving MMX register */
834 {3, 3}, /* cost of loading MMX registers
835 in SImode and DImode */
836 {4, 4}, /* cost of storing MMX registers
837 in SImode and DImode */
838 2, /* cost of moving SSE register */
839 {4, 3, 6}, /* cost of loading SSE registers
840 in SImode, DImode and TImode */
841 {4, 4, 5}, /* cost of storing SSE registers
842 in SImode, DImode and TImode */
843 5, /* MMX or SSE register to integer */
844 64, /* size of l1 cache. */
845 512, /* size of l2 cache. */
846 64, /* size of prefetch block */
847 /* New AMD processors never drop prefetches; if they cannot be performed
848 immediately, they are queued. We set number of simultaneous prefetches
849 to a large constant to reflect this (it probably is not a good idea not
850 to limit number of prefetches at all, as their execution also takes some
851 time). */
852 100, /* number of parallel prefetches */
853 3, /* Branch cost */
854 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
855 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
856 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
857 COSTS_N_INSNS (2), /* cost of FABS instruction. */
858 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
859 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
860
861 k8_memcpy,
862 k8_memset,
863 4, /* scalar_stmt_cost. */
864 2, /* scalar load_cost. */
865 2, /* scalar_store_cost. */
866 5, /* vec_stmt_cost. */
867 0, /* vec_to_scalar_cost. */
868 2, /* scalar_to_vec_cost. */
869 2, /* vec_align_load_cost. */
870 3, /* vec_unalign_load_cost. */
871 3, /* vec_store_cost. */
872 3, /* cond_taken_branch_cost. */
873 2, /* cond_not_taken_branch_cost. */
874 };
875
876 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
877 very small blocks it is better to use loop. For large blocks, libcall can
878 do nontemporary accesses and beat inline considerably. */
879 static stringop_algs amdfam10_memcpy[2] = {
880 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
881 {-1, rep_prefix_4_byte, false}}},
882 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
883 {-1, libcall, false}}}};
884 static stringop_algs amdfam10_memset[2] = {
885 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
886 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
887 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
888 {-1, libcall, false}}}};
889 struct processor_costs amdfam10_cost = {
890 COSTS_N_INSNS (1), /* cost of an add instruction */
891 COSTS_N_INSNS (2), /* cost of a lea instruction */
892 COSTS_N_INSNS (1), /* variable shift costs */
893 COSTS_N_INSNS (1), /* constant shift costs */
894 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
895 COSTS_N_INSNS (4), /* HI */
896 COSTS_N_INSNS (3), /* SI */
897 COSTS_N_INSNS (4), /* DI */
898 COSTS_N_INSNS (5)}, /* other */
899 0, /* cost of multiply per each bit set */
900 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
901 COSTS_N_INSNS (35), /* HI */
902 COSTS_N_INSNS (51), /* SI */
903 COSTS_N_INSNS (83), /* DI */
904 COSTS_N_INSNS (83)}, /* other */
905 COSTS_N_INSNS (1), /* cost of movsx */
906 COSTS_N_INSNS (1), /* cost of movzx */
907 8, /* "large" insn */
908 9, /* MOVE_RATIO */
909 4, /* cost for loading QImode using movzbl */
910 {3, 4, 3}, /* cost of loading integer registers
911 in QImode, HImode and SImode.
912 Relative to reg-reg move (2). */
913 {3, 4, 3}, /* cost of storing integer registers */
914 4, /* cost of reg,reg fld/fst */
915 {4, 4, 12}, /* cost of loading fp registers
916 in SFmode, DFmode and XFmode */
917 {6, 6, 8}, /* cost of storing fp registers
918 in SFmode, DFmode and XFmode */
919 2, /* cost of moving MMX register */
920 {3, 3}, /* cost of loading MMX registers
921 in SImode and DImode */
922 {4, 4}, /* cost of storing MMX registers
923 in SImode and DImode */
924 2, /* cost of moving SSE register */
925 {4, 4, 3}, /* cost of loading SSE registers
926 in SImode, DImode and TImode */
927 {4, 4, 5}, /* cost of storing SSE registers
928 in SImode, DImode and TImode */
929 3, /* MMX or SSE register to integer */
930 /* On K8:
931 MOVD reg64, xmmreg Double FSTORE 4
932 MOVD reg32, xmmreg Double FSTORE 4
933 On AMDFAM10:
934 MOVD reg64, xmmreg Double FADD 3
935 1/1 1/1
936 MOVD reg32, xmmreg Double FADD 3
937 1/1 1/1 */
938 64, /* size of l1 cache. */
939 512, /* size of l2 cache. */
940 64, /* size of prefetch block */
941 /* New AMD processors never drop prefetches; if they cannot be performed
942 immediately, they are queued. We set number of simultaneous prefetches
943 to a large constant to reflect this (it probably is not a good idea not
944 to limit number of prefetches at all, as their execution also takes some
945 time). */
946 100, /* number of parallel prefetches */
947 2, /* Branch cost */
948 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
949 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
950 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
951 COSTS_N_INSNS (2), /* cost of FABS instruction. */
952 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
953 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
954
955 amdfam10_memcpy,
956 amdfam10_memset,
957 4, /* scalar_stmt_cost. */
958 2, /* scalar load_cost. */
959 2, /* scalar_store_cost. */
960 6, /* vec_stmt_cost. */
961 0, /* vec_to_scalar_cost. */
962 2, /* scalar_to_vec_cost. */
963 2, /* vec_align_load_cost. */
964 2, /* vec_unalign_load_cost. */
965 2, /* vec_store_cost. */
966 2, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
968 };
969
970 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
971 very small blocks it is better to use loop. For large blocks, libcall
972 can do nontemporary accesses and beat inline considerably. */
973 static stringop_algs bdver1_memcpy[2] = {
974 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
975 {-1, rep_prefix_4_byte, false}}},
976 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
977 {-1, libcall, false}}}};
978 static stringop_algs bdver1_memset[2] = {
979 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
980 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
981 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
982 {-1, libcall, false}}}};
983
984 const struct processor_costs bdver1_cost = {
985 COSTS_N_INSNS (1), /* cost of an add instruction */
986 COSTS_N_INSNS (1), /* cost of a lea instruction */
987 COSTS_N_INSNS (1), /* variable shift costs */
988 COSTS_N_INSNS (1), /* constant shift costs */
989 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
990 COSTS_N_INSNS (4), /* HI */
991 COSTS_N_INSNS (4), /* SI */
992 COSTS_N_INSNS (6), /* DI */
993 COSTS_N_INSNS (6)}, /* other */
994 0, /* cost of multiply per each bit set */
995 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
996 COSTS_N_INSNS (35), /* HI */
997 COSTS_N_INSNS (51), /* SI */
998 COSTS_N_INSNS (83), /* DI */
999 COSTS_N_INSNS (83)}, /* other */
1000 COSTS_N_INSNS (1), /* cost of movsx */
1001 COSTS_N_INSNS (1), /* cost of movzx */
1002 8, /* "large" insn */
1003 9, /* MOVE_RATIO */
1004 4, /* cost for loading QImode using movzbl */
1005 {5, 5, 4}, /* cost of loading integer registers
1006 in QImode, HImode and SImode.
1007 Relative to reg-reg move (2). */
1008 {4, 4, 4}, /* cost of storing integer registers */
1009 2, /* cost of reg,reg fld/fst */
1010 {5, 5, 12}, /* cost of loading fp registers
1011 in SFmode, DFmode and XFmode */
1012 {4, 4, 8}, /* cost of storing fp registers
1013 in SFmode, DFmode and XFmode */
1014 2, /* cost of moving MMX register */
1015 {4, 4}, /* cost of loading MMX registers
1016 in SImode and DImode */
1017 {4, 4}, /* cost of storing MMX registers
1018 in SImode and DImode */
1019 2, /* cost of moving SSE register */
1020 {4, 4, 4}, /* cost of loading SSE registers
1021 in SImode, DImode and TImode */
1022 {4, 4, 4}, /* cost of storing SSE registers
1023 in SImode, DImode and TImode */
1024 2, /* MMX or SSE register to integer */
1025 /* On K8:
1026 MOVD reg64, xmmreg Double FSTORE 4
1027 MOVD reg32, xmmreg Double FSTORE 4
1028 On AMDFAM10:
1029 MOVD reg64, xmmreg Double FADD 3
1030 1/1 1/1
1031 MOVD reg32, xmmreg Double FADD 3
1032 1/1 1/1 */
1033 16, /* size of l1 cache. */
1034 2048, /* size of l2 cache. */
1035 64, /* size of prefetch block */
1036 /* New AMD processors never drop prefetches; if they cannot be performed
1037 immediately, they are queued. We set number of simultaneous prefetches
1038 to a large constant to reflect this (it probably is not a good idea not
1039 to limit number of prefetches at all, as their execution also takes some
1040 time). */
1041 100, /* number of parallel prefetches */
1042 2, /* Branch cost */
1043 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1044 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1045 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1046 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1047 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1048 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1049
1050 bdver1_memcpy,
1051 bdver1_memset,
1052 6, /* scalar_stmt_cost. */
1053 4, /* scalar load_cost. */
1054 4, /* scalar_store_cost. */
1055 6, /* vec_stmt_cost. */
1056 0, /* vec_to_scalar_cost. */
1057 2, /* scalar_to_vec_cost. */
1058 4, /* vec_align_load_cost. */
1059 4, /* vec_unalign_load_cost. */
1060 4, /* vec_store_cost. */
1061 4, /* cond_taken_branch_cost. */
1062 2, /* cond_not_taken_branch_cost. */
1063 };
1064
1065 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1066 very small blocks it is better to use loop. For large blocks, libcall
1067 can do nontemporary accesses and beat inline considerably. */
1068
1069 static stringop_algs bdver2_memcpy[2] = {
1070 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1071 {-1, rep_prefix_4_byte, false}}},
1072 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1073 {-1, libcall, false}}}};
1074 static stringop_algs bdver2_memset[2] = {
1075 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1076 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1077 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1078 {-1, libcall, false}}}};
1079
1080 const struct processor_costs bdver2_cost = {
1081 COSTS_N_INSNS (1), /* cost of an add instruction */
1082 COSTS_N_INSNS (1), /* cost of a lea instruction */
1083 COSTS_N_INSNS (1), /* variable shift costs */
1084 COSTS_N_INSNS (1), /* constant shift costs */
1085 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1086 COSTS_N_INSNS (4), /* HI */
1087 COSTS_N_INSNS (4), /* SI */
1088 COSTS_N_INSNS (6), /* DI */
1089 COSTS_N_INSNS (6)}, /* other */
1090 0, /* cost of multiply per each bit set */
1091 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1092 COSTS_N_INSNS (35), /* HI */
1093 COSTS_N_INSNS (51), /* SI */
1094 COSTS_N_INSNS (83), /* DI */
1095 COSTS_N_INSNS (83)}, /* other */
1096 COSTS_N_INSNS (1), /* cost of movsx */
1097 COSTS_N_INSNS (1), /* cost of movzx */
1098 8, /* "large" insn */
1099 9, /* MOVE_RATIO */
1100 4, /* cost for loading QImode using movzbl */
1101 {5, 5, 4}, /* cost of loading integer registers
1102 in QImode, HImode and SImode.
1103 Relative to reg-reg move (2). */
1104 {4, 4, 4}, /* cost of storing integer registers */
1105 2, /* cost of reg,reg fld/fst */
1106 {5, 5, 12}, /* cost of loading fp registers
1107 in SFmode, DFmode and XFmode */
1108 {4, 4, 8}, /* cost of storing fp registers
1109 in SFmode, DFmode and XFmode */
1110 2, /* cost of moving MMX register */
1111 {4, 4}, /* cost of loading MMX registers
1112 in SImode and DImode */
1113 {4, 4}, /* cost of storing MMX registers
1114 in SImode and DImode */
1115 2, /* cost of moving SSE register */
1116 {4, 4, 4}, /* cost of loading SSE registers
1117 in SImode, DImode and TImode */
1118 {4, 4, 4}, /* cost of storing SSE registers
1119 in SImode, DImode and TImode */
1120 2, /* MMX or SSE register to integer */
1121 /* On K8:
1122 MOVD reg64, xmmreg Double FSTORE 4
1123 MOVD reg32, xmmreg Double FSTORE 4
1124 On AMDFAM10:
1125 MOVD reg64, xmmreg Double FADD 3
1126 1/1 1/1
1127 MOVD reg32, xmmreg Double FADD 3
1128 1/1 1/1 */
1129 16, /* size of l1 cache. */
1130 2048, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1136 time). */
1137 100, /* number of parallel prefetches */
1138 2, /* Branch cost */
1139 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1145
1146 bdver2_memcpy,
1147 bdver2_memset,
1148 6, /* scalar_stmt_cost. */
1149 4, /* scalar load_cost. */
1150 4, /* scalar_store_cost. */
1151 6, /* vec_stmt_cost. */
1152 0, /* vec_to_scalar_cost. */
1153 2, /* scalar_to_vec_cost. */
1154 4, /* vec_align_load_cost. */
1155 4, /* vec_unalign_load_cost. */
1156 4, /* vec_store_cost. */
1157 4, /* cond_taken_branch_cost. */
1158 2, /* cond_not_taken_branch_cost. */
1159 };
1160
1161
1162 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1163 very small blocks it is better to use loop. For large blocks, libcall
1164 can do nontemporary accesses and beat inline considerably. */
1165 static stringop_algs bdver3_memcpy[2] = {
1166 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1167 {-1, rep_prefix_4_byte, false}}},
1168 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1169 {-1, libcall, false}}}};
1170 static stringop_algs bdver3_memset[2] = {
1171 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1172 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1173 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1174 {-1, libcall, false}}}};
1175 struct processor_costs bdver3_cost = {
1176 COSTS_N_INSNS (1), /* cost of an add instruction */
1177 COSTS_N_INSNS (1), /* cost of a lea instruction */
1178 COSTS_N_INSNS (1), /* variable shift costs */
1179 COSTS_N_INSNS (1), /* constant shift costs */
1180 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1181 COSTS_N_INSNS (4), /* HI */
1182 COSTS_N_INSNS (4), /* SI */
1183 COSTS_N_INSNS (6), /* DI */
1184 COSTS_N_INSNS (6)}, /* other */
1185 0, /* cost of multiply per each bit set */
1186 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1187 COSTS_N_INSNS (35), /* HI */
1188 COSTS_N_INSNS (51), /* SI */
1189 COSTS_N_INSNS (83), /* DI */
1190 COSTS_N_INSNS (83)}, /* other */
1191 COSTS_N_INSNS (1), /* cost of movsx */
1192 COSTS_N_INSNS (1), /* cost of movzx */
1193 8, /* "large" insn */
1194 9, /* MOVE_RATIO */
1195 4, /* cost for loading QImode using movzbl */
1196 {5, 5, 4}, /* cost of loading integer registers
1197 in QImode, HImode and SImode.
1198 Relative to reg-reg move (2). */
1199 {4, 4, 4}, /* cost of storing integer registers */
1200 2, /* cost of reg,reg fld/fst */
1201 {5, 5, 12}, /* cost of loading fp registers
1202 in SFmode, DFmode and XFmode */
1203 {4, 4, 8}, /* cost of storing fp registers
1204 in SFmode, DFmode and XFmode */
1205 2, /* cost of moving MMX register */
1206 {4, 4}, /* cost of loading MMX registers
1207 in SImode and DImode */
1208 {4, 4}, /* cost of storing MMX registers
1209 in SImode and DImode */
1210 2, /* cost of moving SSE register */
1211 {4, 4, 4}, /* cost of loading SSE registers
1212 in SImode, DImode and TImode */
1213 {4, 4, 4}, /* cost of storing SSE registers
1214 in SImode, DImode and TImode */
1215 2, /* MMX or SSE register to integer */
1216 16, /* size of l1 cache. */
1217 2048, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1232
1233 bdver3_memcpy,
1234 bdver3_memset,
1235 6, /* scalar_stmt_cost. */
1236 4, /* scalar load_cost. */
1237 4, /* scalar_store_cost. */
1238 6, /* vec_stmt_cost. */
1239 0, /* vec_to_scalar_cost. */
1240 2, /* scalar_to_vec_cost. */
1241 4, /* vec_align_load_cost. */
1242 4, /* vec_unalign_load_cost. */
1243 4, /* vec_store_cost. */
1244 4, /* cond_taken_branch_cost. */
1245 2, /* cond_not_taken_branch_cost. */
1246 };
1247
1248 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1249 very small blocks it is better to use loop. For large blocks, libcall
1250 can do nontemporary accesses and beat inline considerably. */
1251 static stringop_algs bdver4_memcpy[2] = {
1252 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1253 {-1, rep_prefix_4_byte, false}}},
1254 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1255 {-1, libcall, false}}}};
1256 static stringop_algs bdver4_memset[2] = {
1257 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1258 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1259 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1260 {-1, libcall, false}}}};
1261 struct processor_costs bdver4_cost = {
1262 COSTS_N_INSNS (1), /* cost of an add instruction */
1263 COSTS_N_INSNS (1), /* cost of a lea instruction */
1264 COSTS_N_INSNS (1), /* variable shift costs */
1265 COSTS_N_INSNS (1), /* constant shift costs */
1266 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1267 COSTS_N_INSNS (4), /* HI */
1268 COSTS_N_INSNS (4), /* SI */
1269 COSTS_N_INSNS (6), /* DI */
1270 COSTS_N_INSNS (6)}, /* other */
1271 0, /* cost of multiply per each bit set */
1272 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1273 COSTS_N_INSNS (35), /* HI */
1274 COSTS_N_INSNS (51), /* SI */
1275 COSTS_N_INSNS (83), /* DI */
1276 COSTS_N_INSNS (83)}, /* other */
1277 COSTS_N_INSNS (1), /* cost of movsx */
1278 COSTS_N_INSNS (1), /* cost of movzx */
1279 8, /* "large" insn */
1280 9, /* MOVE_RATIO */
1281 4, /* cost for loading QImode using movzbl */
1282 {5, 5, 4}, /* cost of loading integer registers
1283 in QImode, HImode and SImode.
1284 Relative to reg-reg move (2). */
1285 {4, 4, 4}, /* cost of storing integer registers */
1286 2, /* cost of reg,reg fld/fst */
1287 {5, 5, 12}, /* cost of loading fp registers
1288 in SFmode, DFmode and XFmode */
1289 {4, 4, 8}, /* cost of storing fp registers
1290 in SFmode, DFmode and XFmode */
1291 2, /* cost of moving MMX register */
1292 {4, 4}, /* cost of loading MMX registers
1293 in SImode and DImode */
1294 {4, 4}, /* cost of storing MMX registers
1295 in SImode and DImode */
1296 2, /* cost of moving SSE register */
1297 {4, 4, 4}, /* cost of loading SSE registers
1298 in SImode, DImode and TImode */
1299 {4, 4, 4}, /* cost of storing SSE registers
1300 in SImode, DImode and TImode */
1301 2, /* MMX or SSE register to integer */
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1309 time). */
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1318
1319 bdver4_memcpy,
1320 bdver4_memset,
1321 6, /* scalar_stmt_cost. */
1322 4, /* scalar load_cost. */
1323 4, /* scalar_store_cost. */
1324 6, /* vec_stmt_cost. */
1325 0, /* vec_to_scalar_cost. */
1326 2, /* scalar_to_vec_cost. */
1327 4, /* vec_align_load_cost. */
1328 4, /* vec_unalign_load_cost. */
1329 4, /* vec_store_cost. */
1330 4, /* cond_taken_branch_cost. */
1331 2, /* cond_not_taken_branch_cost. */
1332 };
1333
1334
1335 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1336 very small blocks it is better to use loop. For large blocks, libcall
1337 can do nontemporary accesses and beat inline considerably. */
1338 static stringop_algs znver1_memcpy[2] = {
1339 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1340 {-1, rep_prefix_4_byte, false}}},
1341 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1342 {-1, libcall, false}}}};
1343 static stringop_algs znver1_memset[2] = {
1344 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1345 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1346 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1347 {-1, libcall, false}}}};
1348 struct processor_costs znver1_cost = {
1349 COSTS_N_INSNS (1), /* cost of an add instruction. */
1350 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1351 COSTS_N_INSNS (1), /* variable shift costs. */
1352 COSTS_N_INSNS (1), /* constant shift costs. */
1353 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1354 COSTS_N_INSNS (3), /* HI. */
1355 COSTS_N_INSNS (3), /* SI. */
1356 COSTS_N_INSNS (4), /* DI. */
1357 COSTS_N_INSNS (4)}, /* other. */
1358 0, /* cost of multiply per each bit
1359 set. */
1360 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1361 COSTS_N_INSNS (35), /* HI. */
1362 COSTS_N_INSNS (51), /* SI. */
1363 COSTS_N_INSNS (83), /* DI. */
1364 COSTS_N_INSNS (83)}, /* other. */
1365 COSTS_N_INSNS (1), /* cost of movsx. */
1366 COSTS_N_INSNS (1), /* cost of movzx. */
1367 8, /* "large" insn. */
1368 9, /* MOVE_RATIO. */
1369 4, /* cost for loading QImode using
1370 movzbl. */
1371 {5, 5, 4}, /* cost of loading integer registers
1372 in QImode, HImode and SImode.
1373 Relative to reg-reg move (2). */
1374 {4, 4, 4}, /* cost of storing integer
1375 registers. */
1376 2, /* cost of reg,reg fld/fst. */
1377 {5, 5, 12}, /* cost of loading fp registers
1378 in SFmode, DFmode and XFmode. */
1379 {4, 4, 8}, /* cost of storing fp registers
1380 in SFmode, DFmode and XFmode. */
1381 2, /* cost of moving MMX register. */
1382 {4, 4}, /* cost of loading MMX registers
1383 in SImode and DImode. */
1384 {4, 4}, /* cost of storing MMX registers
1385 in SImode and DImode. */
1386 2, /* cost of moving SSE register. */
1387 {4, 4, 4}, /* cost of loading SSE registers
1388 in SImode, DImode and TImode. */
1389 {4, 4, 4}, /* cost of storing SSE registers
1390 in SImode, DImode and TImode. */
1391 2, /* MMX or SSE register to integer. */
1392 32, /* size of l1 cache. */
1393 512, /* size of l2 cache. */
1394 64, /* size of prefetch block. */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1399 time). */
1400 100, /* number of parallel prefetches. */
1401 2, /* Branch cost. */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408
1409 znver1_memcpy,
1410 znver1_memset,
1411 6, /* scalar_stmt_cost. */
1412 4, /* scalar load_cost. */
1413 4, /* scalar_store_cost. */
1414 6, /* vec_stmt_cost. */
1415 0, /* vec_to_scalar_cost. */
1416 2, /* scalar_to_vec_cost. */
1417 4, /* vec_align_load_cost. */
1418 4, /* vec_unalign_load_cost. */
1419 4, /* vec_store_cost. */
1420 4, /* cond_taken_branch_cost. */
1421 2, /* cond_not_taken_branch_cost. */
1422 };
1423
1424 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1425 very small blocks it is better to use loop. For large blocks, libcall can
1426 do nontemporary accesses and beat inline considerably. */
1427 static stringop_algs btver1_memcpy[2] = {
1428 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1429 {-1, rep_prefix_4_byte, false}}},
1430 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1431 {-1, libcall, false}}}};
1432 static stringop_algs btver1_memset[2] = {
1433 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1434 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1435 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1436 {-1, libcall, false}}}};
1437 const struct processor_costs btver1_cost = {
1438 COSTS_N_INSNS (1), /* cost of an add instruction */
1439 COSTS_N_INSNS (2), /* cost of a lea instruction */
1440 COSTS_N_INSNS (1), /* variable shift costs */
1441 COSTS_N_INSNS (1), /* constant shift costs */
1442 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1443 COSTS_N_INSNS (4), /* HI */
1444 COSTS_N_INSNS (3), /* SI */
1445 COSTS_N_INSNS (4), /* DI */
1446 COSTS_N_INSNS (5)}, /* other */
1447 0, /* cost of multiply per each bit set */
1448 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1449 COSTS_N_INSNS (35), /* HI */
1450 COSTS_N_INSNS (51), /* SI */
1451 COSTS_N_INSNS (83), /* DI */
1452 COSTS_N_INSNS (83)}, /* other */
1453 COSTS_N_INSNS (1), /* cost of movsx */
1454 COSTS_N_INSNS (1), /* cost of movzx */
1455 8, /* "large" insn */
1456 9, /* MOVE_RATIO */
1457 4, /* cost for loading QImode using movzbl */
1458 {3, 4, 3}, /* cost of loading integer registers
1459 in QImode, HImode and SImode.
1460 Relative to reg-reg move (2). */
1461 {3, 4, 3}, /* cost of storing integer registers */
1462 4, /* cost of reg,reg fld/fst */
1463 {4, 4, 12}, /* cost of loading fp registers
1464 in SFmode, DFmode and XFmode */
1465 {6, 6, 8}, /* cost of storing fp registers
1466 in SFmode, DFmode and XFmode */
1467 2, /* cost of moving MMX register */
1468 {3, 3}, /* cost of loading MMX registers
1469 in SImode and DImode */
1470 {4, 4}, /* cost of storing MMX registers
1471 in SImode and DImode */
1472 2, /* cost of moving SSE register */
1473 {4, 4, 3}, /* cost of loading SSE registers
1474 in SImode, DImode and TImode */
1475 {4, 4, 5}, /* cost of storing SSE registers
1476 in SImode, DImode and TImode */
1477 3, /* MMX or SSE register to integer */
1478 /* On K8:
1479 MOVD reg64, xmmreg Double FSTORE 4
1480 MOVD reg32, xmmreg Double FSTORE 4
1481 On AMDFAM10:
1482 MOVD reg64, xmmreg Double FADD 3
1483 1/1 1/1
1484 MOVD reg32, xmmreg Double FADD 3
1485 1/1 1/1 */
1486 32, /* size of l1 cache. */
1487 512, /* size of l2 cache. */
1488 64, /* size of prefetch block */
1489 100, /* number of parallel prefetches */
1490 2, /* Branch cost */
1491 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1492 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1493 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1494 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1495 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1496 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1497
1498 btver1_memcpy,
1499 btver1_memset,
1500 4, /* scalar_stmt_cost. */
1501 2, /* scalar load_cost. */
1502 2, /* scalar_store_cost. */
1503 6, /* vec_stmt_cost. */
1504 0, /* vec_to_scalar_cost. */
1505 2, /* scalar_to_vec_cost. */
1506 2, /* vec_align_load_cost. */
1507 2, /* vec_unalign_load_cost. */
1508 2, /* vec_store_cost. */
1509 2, /* cond_taken_branch_cost. */
1510 1, /* cond_not_taken_branch_cost. */
1511 };
1512
1513 static stringop_algs btver2_memcpy[2] = {
1514 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1515 {-1, rep_prefix_4_byte, false}}},
1516 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1517 {-1, libcall, false}}}};
1518 static stringop_algs btver2_memset[2] = {
1519 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1520 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1521 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1522 {-1, libcall, false}}}};
1523 const struct processor_costs btver2_cost = {
1524 COSTS_N_INSNS (1), /* cost of an add instruction */
1525 COSTS_N_INSNS (2), /* cost of a lea instruction */
1526 COSTS_N_INSNS (1), /* variable shift costs */
1527 COSTS_N_INSNS (1), /* constant shift costs */
1528 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1529 COSTS_N_INSNS (4), /* HI */
1530 COSTS_N_INSNS (3), /* SI */
1531 COSTS_N_INSNS (4), /* DI */
1532 COSTS_N_INSNS (5)}, /* other */
1533 0, /* cost of multiply per each bit set */
1534 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1535 COSTS_N_INSNS (35), /* HI */
1536 COSTS_N_INSNS (51), /* SI */
1537 COSTS_N_INSNS (83), /* DI */
1538 COSTS_N_INSNS (83)}, /* other */
1539 COSTS_N_INSNS (1), /* cost of movsx */
1540 COSTS_N_INSNS (1), /* cost of movzx */
1541 8, /* "large" insn */
1542 9, /* MOVE_RATIO */
1543 4, /* cost for loading QImode using movzbl */
1544 {3, 4, 3}, /* cost of loading integer registers
1545 in QImode, HImode and SImode.
1546 Relative to reg-reg move (2). */
1547 {3, 4, 3}, /* cost of storing integer registers */
1548 4, /* cost of reg,reg fld/fst */
1549 {4, 4, 12}, /* cost of loading fp registers
1550 in SFmode, DFmode and XFmode */
1551 {6, 6, 8}, /* cost of storing fp registers
1552 in SFmode, DFmode and XFmode */
1553 2, /* cost of moving MMX register */
1554 {3, 3}, /* cost of loading MMX registers
1555 in SImode and DImode */
1556 {4, 4}, /* cost of storing MMX registers
1557 in SImode and DImode */
1558 2, /* cost of moving SSE register */
1559 {4, 4, 3}, /* cost of loading SSE registers
1560 in SImode, DImode and TImode */
1561 {4, 4, 5}, /* cost of storing SSE registers
1562 in SImode, DImode and TImode */
1563 3, /* MMX or SSE register to integer */
1564 /* On K8:
1565 MOVD reg64, xmmreg Double FSTORE 4
1566 MOVD reg32, xmmreg Double FSTORE 4
1567 On AMDFAM10:
1568 MOVD reg64, xmmreg Double FADD 3
1569 1/1 1/1
1570 MOVD reg32, xmmreg Double FADD 3
1571 1/1 1/1 */
1572 32, /* size of l1 cache. */
1573 2048, /* size of l2 cache. */
1574 64, /* size of prefetch block */
1575 100, /* number of parallel prefetches */
1576 2, /* Branch cost */
1577 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1578 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1579 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1580 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1581 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1582 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1583 btver2_memcpy,
1584 btver2_memset,
1585 4, /* scalar_stmt_cost. */
1586 2, /* scalar load_cost. */
1587 2, /* scalar_store_cost. */
1588 6, /* vec_stmt_cost. */
1589 0, /* vec_to_scalar_cost. */
1590 2, /* scalar_to_vec_cost. */
1591 2, /* vec_align_load_cost. */
1592 2, /* vec_unalign_load_cost. */
1593 2, /* vec_store_cost. */
1594 2, /* cond_taken_branch_cost. */
1595 1, /* cond_not_taken_branch_cost. */
1596 };
1597
1598 static stringop_algs pentium4_memcpy[2] = {
1599 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1600 DUMMY_STRINGOP_ALGS};
1601 static stringop_algs pentium4_memset[2] = {
1602 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1603 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1604 DUMMY_STRINGOP_ALGS};
1605
1606 static const
1607 struct processor_costs pentium4_cost = {
1608 COSTS_N_INSNS (1), /* cost of an add instruction */
1609 COSTS_N_INSNS (3), /* cost of a lea instruction */
1610 COSTS_N_INSNS (4), /* variable shift costs */
1611 COSTS_N_INSNS (4), /* constant shift costs */
1612 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1613 COSTS_N_INSNS (15), /* HI */
1614 COSTS_N_INSNS (15), /* SI */
1615 COSTS_N_INSNS (15), /* DI */
1616 COSTS_N_INSNS (15)}, /* other */
1617 0, /* cost of multiply per each bit set */
1618 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1619 COSTS_N_INSNS (56), /* HI */
1620 COSTS_N_INSNS (56), /* SI */
1621 COSTS_N_INSNS (56), /* DI */
1622 COSTS_N_INSNS (56)}, /* other */
1623 COSTS_N_INSNS (1), /* cost of movsx */
1624 COSTS_N_INSNS (1), /* cost of movzx */
1625 16, /* "large" insn */
1626 6, /* MOVE_RATIO */
1627 2, /* cost for loading QImode using movzbl */
1628 {4, 5, 4}, /* cost of loading integer registers
1629 in QImode, HImode and SImode.
1630 Relative to reg-reg move (2). */
1631 {2, 3, 2}, /* cost of storing integer registers */
1632 2, /* cost of reg,reg fld/fst */
1633 {2, 2, 6}, /* cost of loading fp registers
1634 in SFmode, DFmode and XFmode */
1635 {4, 4, 6}, /* cost of storing fp registers
1636 in SFmode, DFmode and XFmode */
1637 2, /* cost of moving MMX register */
1638 {2, 2}, /* cost of loading MMX registers
1639 in SImode and DImode */
1640 {2, 2}, /* cost of storing MMX registers
1641 in SImode and DImode */
1642 12, /* cost of moving SSE register */
1643 {12, 12, 12}, /* cost of loading SSE registers
1644 in SImode, DImode and TImode */
1645 {2, 2, 8}, /* cost of storing SSE registers
1646 in SImode, DImode and TImode */
1647 10, /* MMX or SSE register to integer */
1648 8, /* size of l1 cache. */
1649 256, /* size of l2 cache. */
1650 64, /* size of prefetch block */
1651 6, /* number of parallel prefetches */
1652 2, /* Branch cost */
1653 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1654 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1655 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1656 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1657 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1658 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1659 pentium4_memcpy,
1660 pentium4_memset,
1661 1, /* scalar_stmt_cost. */
1662 1, /* scalar load_cost. */
1663 1, /* scalar_store_cost. */
1664 1, /* vec_stmt_cost. */
1665 1, /* vec_to_scalar_cost. */
1666 1, /* scalar_to_vec_cost. */
1667 1, /* vec_align_load_cost. */
1668 2, /* vec_unalign_load_cost. */
1669 1, /* vec_store_cost. */
1670 3, /* cond_taken_branch_cost. */
1671 1, /* cond_not_taken_branch_cost. */
1672 };
1673
1674 static stringop_algs nocona_memcpy[2] = {
1675 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1676 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1677 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1678
1679 static stringop_algs nocona_memset[2] = {
1680 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1681 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1682 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1683 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1684
1685 static const
1686 struct processor_costs nocona_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1), /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (10), /* HI */
1693 COSTS_N_INSNS (10), /* SI */
1694 COSTS_N_INSNS (10), /* DI */
1695 COSTS_N_INSNS (10)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (66), /* HI */
1699 COSTS_N_INSNS (66), /* SI */
1700 COSTS_N_INSNS (66), /* DI */
1701 COSTS_N_INSNS (66)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 16, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 3, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {4, 4, 4}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 6, /* cost of moving MMX register */
1717 {12, 12}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {12, 12}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 6, /* cost of moving SSE register */
1722 {12, 12, 12}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {12, 12, 12}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 8, /* MMX or SSE register to integer */
1727 8, /* size of l1 cache. */
1728 1024, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 8, /* number of parallel prefetches */
1731 1, /* Branch cost */
1732 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1738 nocona_memcpy,
1739 nocona_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 1, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs atom_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs atom_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs atom_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (4), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 atom_memcpy,
1816 atom_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 1, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 static stringop_algs slm_memcpy[2] = {
1831 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1832 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1833 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1834 static stringop_algs slm_memset[2] = {
1835 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1836 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1837 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1838 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1839 static const
1840 struct processor_costs slm_cost = {
1841 COSTS_N_INSNS (1), /* cost of an add instruction */
1842 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1843 COSTS_N_INSNS (1), /* variable shift costs */
1844 COSTS_N_INSNS (1), /* constant shift costs */
1845 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1846 COSTS_N_INSNS (3), /* HI */
1847 COSTS_N_INSNS (3), /* SI */
1848 COSTS_N_INSNS (4), /* DI */
1849 COSTS_N_INSNS (2)}, /* other */
1850 0, /* cost of multiply per each bit set */
1851 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1852 COSTS_N_INSNS (26), /* HI */
1853 COSTS_N_INSNS (42), /* SI */
1854 COSTS_N_INSNS (74), /* DI */
1855 COSTS_N_INSNS (74)}, /* other */
1856 COSTS_N_INSNS (1), /* cost of movsx */
1857 COSTS_N_INSNS (1), /* cost of movzx */
1858 8, /* "large" insn */
1859 17, /* MOVE_RATIO */
1860 4, /* cost for loading QImode using movzbl */
1861 {4, 4, 4}, /* cost of loading integer registers
1862 in QImode, HImode and SImode.
1863 Relative to reg-reg move (2). */
1864 {4, 4, 4}, /* cost of storing integer registers */
1865 4, /* cost of reg,reg fld/fst */
1866 {12, 12, 12}, /* cost of loading fp registers
1867 in SFmode, DFmode and XFmode */
1868 {6, 6, 8}, /* cost of storing fp registers
1869 in SFmode, DFmode and XFmode */
1870 2, /* cost of moving MMX register */
1871 {8, 8}, /* cost of loading MMX registers
1872 in SImode and DImode */
1873 {8, 8}, /* cost of storing MMX registers
1874 in SImode and DImode */
1875 2, /* cost of moving SSE register */
1876 {8, 8, 8}, /* cost of loading SSE registers
1877 in SImode, DImode and TImode */
1878 {8, 8, 8}, /* cost of storing SSE registers
1879 in SImode, DImode and TImode */
1880 5, /* MMX or SSE register to integer */
1881 32, /* size of l1 cache. */
1882 256, /* size of l2 cache. */
1883 64, /* size of prefetch block */
1884 6, /* number of parallel prefetches */
1885 3, /* Branch cost */
1886 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1887 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1888 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1889 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1890 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1891 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1892 slm_memcpy,
1893 slm_memset,
1894 1, /* scalar_stmt_cost. */
1895 1, /* scalar load_cost. */
1896 1, /* scalar_store_cost. */
1897 1, /* vec_stmt_cost. */
1898 4, /* vec_to_scalar_cost. */
1899 1, /* scalar_to_vec_cost. */
1900 1, /* vec_align_load_cost. */
1901 2, /* vec_unalign_load_cost. */
1902 1, /* vec_store_cost. */
1903 3, /* cond_taken_branch_cost. */
1904 1, /* cond_not_taken_branch_cost. */
1905 };
1906
1907 static stringop_algs intel_memcpy[2] = {
1908 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1909 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1910 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1911 static stringop_algs intel_memset[2] = {
1912 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1914 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1915 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1916 static const
1917 struct processor_costs intel_cost = {
1918 COSTS_N_INSNS (1), /* cost of an add instruction */
1919 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1920 COSTS_N_INSNS (1), /* variable shift costs */
1921 COSTS_N_INSNS (1), /* constant shift costs */
1922 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1923 COSTS_N_INSNS (3), /* HI */
1924 COSTS_N_INSNS (3), /* SI */
1925 COSTS_N_INSNS (4), /* DI */
1926 COSTS_N_INSNS (2)}, /* other */
1927 0, /* cost of multiply per each bit set */
1928 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1929 COSTS_N_INSNS (26), /* HI */
1930 COSTS_N_INSNS (42), /* SI */
1931 COSTS_N_INSNS (74), /* DI */
1932 COSTS_N_INSNS (74)}, /* other */
1933 COSTS_N_INSNS (1), /* cost of movsx */
1934 COSTS_N_INSNS (1), /* cost of movzx */
1935 8, /* "large" insn */
1936 17, /* MOVE_RATIO */
1937 4, /* cost for loading QImode using movzbl */
1938 {4, 4, 4}, /* cost of loading integer registers
1939 in QImode, HImode and SImode.
1940 Relative to reg-reg move (2). */
1941 {4, 4, 4}, /* cost of storing integer registers */
1942 4, /* cost of reg,reg fld/fst */
1943 {12, 12, 12}, /* cost of loading fp registers
1944 in SFmode, DFmode and XFmode */
1945 {6, 6, 8}, /* cost of storing fp registers
1946 in SFmode, DFmode and XFmode */
1947 2, /* cost of moving MMX register */
1948 {8, 8}, /* cost of loading MMX registers
1949 in SImode and DImode */
1950 {8, 8}, /* cost of storing MMX registers
1951 in SImode and DImode */
1952 2, /* cost of moving SSE register */
1953 {8, 8, 8}, /* cost of loading SSE registers
1954 in SImode, DImode and TImode */
1955 {8, 8, 8}, /* cost of storing SSE registers
1956 in SImode, DImode and TImode */
1957 5, /* MMX or SSE register to integer */
1958 32, /* size of l1 cache. */
1959 256, /* size of l2 cache. */
1960 64, /* size of prefetch block */
1961 6, /* number of parallel prefetches */
1962 3, /* Branch cost */
1963 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1964 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1965 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1966 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1967 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1968 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1969 intel_memcpy,
1970 intel_memset,
1971 1, /* scalar_stmt_cost. */
1972 1, /* scalar load_cost. */
1973 1, /* scalar_store_cost. */
1974 1, /* vec_stmt_cost. */
1975 4, /* vec_to_scalar_cost. */
1976 1, /* scalar_to_vec_cost. */
1977 1, /* vec_align_load_cost. */
1978 2, /* vec_unalign_load_cost. */
1979 1, /* vec_store_cost. */
1980 3, /* cond_taken_branch_cost. */
1981 1, /* cond_not_taken_branch_cost. */
1982 };
1983
1984 /* Generic should produce code tuned for Core-i7 (and newer chips)
1985 and btver1 (and newer chips). */
1986
1987 static stringop_algs generic_memcpy[2] = {
1988 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1989 {-1, libcall, false}}},
1990 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1991 {-1, libcall, false}}}};
1992 static stringop_algs generic_memset[2] = {
1993 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1994 {-1, libcall, false}}},
1995 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1996 {-1, libcall, false}}}};
1997 static const
1998 struct processor_costs generic_cost = {
1999 COSTS_N_INSNS (1), /* cost of an add instruction */
2000 /* On all chips taken into consideration lea is 2 cycles and more. With
2001 this cost however our current implementation of synth_mult results in
2002 use of unnecessary temporary registers causing regression on several
2003 SPECfp benchmarks. */
2004 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2005 COSTS_N_INSNS (1), /* variable shift costs */
2006 COSTS_N_INSNS (1), /* constant shift costs */
2007 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2008 COSTS_N_INSNS (4), /* HI */
2009 COSTS_N_INSNS (3), /* SI */
2010 COSTS_N_INSNS (4), /* DI */
2011 COSTS_N_INSNS (2)}, /* other */
2012 0, /* cost of multiply per each bit set */
2013 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2014 COSTS_N_INSNS (26), /* HI */
2015 COSTS_N_INSNS (42), /* SI */
2016 COSTS_N_INSNS (74), /* DI */
2017 COSTS_N_INSNS (74)}, /* other */
2018 COSTS_N_INSNS (1), /* cost of movsx */
2019 COSTS_N_INSNS (1), /* cost of movzx */
2020 8, /* "large" insn */
2021 17, /* MOVE_RATIO */
2022 4, /* cost for loading QImode using movzbl */
2023 {4, 4, 4}, /* cost of loading integer registers
2024 in QImode, HImode and SImode.
2025 Relative to reg-reg move (2). */
2026 {4, 4, 4}, /* cost of storing integer registers */
2027 4, /* cost of reg,reg fld/fst */
2028 {12, 12, 12}, /* cost of loading fp registers
2029 in SFmode, DFmode and XFmode */
2030 {6, 6, 8}, /* cost of storing fp registers
2031 in SFmode, DFmode and XFmode */
2032 2, /* cost of moving MMX register */
2033 {8, 8}, /* cost of loading MMX registers
2034 in SImode and DImode */
2035 {8, 8}, /* cost of storing MMX registers
2036 in SImode and DImode */
2037 2, /* cost of moving SSE register */
2038 {8, 8, 8}, /* cost of loading SSE registers
2039 in SImode, DImode and TImode */
2040 {8, 8, 8}, /* cost of storing SSE registers
2041 in SImode, DImode and TImode */
2042 5, /* MMX or SSE register to integer */
2043 32, /* size of l1 cache. */
2044 512, /* size of l2 cache. */
2045 64, /* size of prefetch block */
2046 6, /* number of parallel prefetches */
2047 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2048 value is increased to perhaps more appropriate value of 5. */
2049 3, /* Branch cost */
2050 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2051 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2052 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2053 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2054 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2055 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2056 generic_memcpy,
2057 generic_memset,
2058 1, /* scalar_stmt_cost. */
2059 1, /* scalar load_cost. */
2060 1, /* scalar_store_cost. */
2061 1, /* vec_stmt_cost. */
2062 1, /* vec_to_scalar_cost. */
2063 1, /* scalar_to_vec_cost. */
2064 1, /* vec_align_load_cost. */
2065 2, /* vec_unalign_load_cost. */
2066 1, /* vec_store_cost. */
2067 3, /* cond_taken_branch_cost. */
2068 1, /* cond_not_taken_branch_cost. */
2069 };
2070
2071 /* core_cost should produce code tuned for Core familly of CPUs. */
2072 static stringop_algs core_memcpy[2] = {
2073 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2074 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2075 {-1, libcall, false}}}};
2076 static stringop_algs core_memset[2] = {
2077 {libcall, {{6, loop_1_byte, true},
2078 {24, loop, true},
2079 {8192, rep_prefix_4_byte, true},
2080 {-1, libcall, false}}},
2081 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2082 {-1, libcall, false}}}};
2083
2084 static const
2085 struct processor_costs core_cost = {
2086 COSTS_N_INSNS (1), /* cost of an add instruction */
2087 /* On all chips taken into consideration lea is 2 cycles and more. With
2088 this cost however our current implementation of synth_mult results in
2089 use of unnecessary temporary registers causing regression on several
2090 SPECfp benchmarks. */
2091 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2092 COSTS_N_INSNS (1), /* variable shift costs */
2093 COSTS_N_INSNS (1), /* constant shift costs */
2094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2095 COSTS_N_INSNS (4), /* HI */
2096 COSTS_N_INSNS (3), /* SI */
2097 COSTS_N_INSNS (4), /* DI */
2098 COSTS_N_INSNS (2)}, /* other */
2099 0, /* cost of multiply per each bit set */
2100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2101 COSTS_N_INSNS (26), /* HI */
2102 COSTS_N_INSNS (42), /* SI */
2103 COSTS_N_INSNS (74), /* DI */
2104 COSTS_N_INSNS (74)}, /* other */
2105 COSTS_N_INSNS (1), /* cost of movsx */
2106 COSTS_N_INSNS (1), /* cost of movzx */
2107 8, /* "large" insn */
2108 17, /* MOVE_RATIO */
2109 4, /* cost for loading QImode using movzbl */
2110 {4, 4, 4}, /* cost of loading integer registers
2111 in QImode, HImode and SImode.
2112 Relative to reg-reg move (2). */
2113 {4, 4, 4}, /* cost of storing integer registers */
2114 4, /* cost of reg,reg fld/fst */
2115 {12, 12, 12}, /* cost of loading fp registers
2116 in SFmode, DFmode and XFmode */
2117 {6, 6, 8}, /* cost of storing fp registers
2118 in SFmode, DFmode and XFmode */
2119 2, /* cost of moving MMX register */
2120 {8, 8}, /* cost of loading MMX registers
2121 in SImode and DImode */
2122 {8, 8}, /* cost of storing MMX registers
2123 in SImode and DImode */
2124 2, /* cost of moving SSE register */
2125 {8, 8, 8}, /* cost of loading SSE registers
2126 in SImode, DImode and TImode */
2127 {8, 8, 8}, /* cost of storing SSE registers
2128 in SImode, DImode and TImode */
2129 5, /* MMX or SSE register to integer */
2130 64, /* size of l1 cache. */
2131 512, /* size of l2 cache. */
2132 64, /* size of prefetch block */
2133 6, /* number of parallel prefetches */
2134 /* FIXME perhaps more appropriate value is 5. */
2135 3, /* Branch cost */
2136 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2137 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2138 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2139 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2140 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2141 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2142 core_memcpy,
2143 core_memset,
2144 1, /* scalar_stmt_cost. */
2145 1, /* scalar load_cost. */
2146 1, /* scalar_store_cost. */
2147 1, /* vec_stmt_cost. */
2148 1, /* vec_to_scalar_cost. */
2149 1, /* scalar_to_vec_cost. */
2150 1, /* vec_align_load_cost. */
2151 2, /* vec_unalign_load_cost. */
2152 1, /* vec_store_cost. */
2153 3, /* cond_taken_branch_cost. */
2154 1, /* cond_not_taken_branch_cost. */
2155 };
2156
2157
2158 /* Set by -mtune. */
2159 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2160
2161 /* Set by -mtune or -Os. */
2162 const struct processor_costs *ix86_cost = &pentium_cost;
2163
2164 /* Processor feature/optimization bitmasks. */
2165 #define m_386 (1<<PROCESSOR_I386)
2166 #define m_486 (1<<PROCESSOR_I486)
2167 #define m_PENT (1<<PROCESSOR_PENTIUM)
2168 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2169 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2170 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2171 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2172 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2173 #define m_CORE2 (1<<PROCESSOR_CORE2)
2174 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2175 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2176 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2177 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2178 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2179 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2180 #define m_KNL (1<<PROCESSOR_KNL)
2181 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2182 #define m_INTEL (1<<PROCESSOR_INTEL)
2183
2184 #define m_GEODE (1<<PROCESSOR_GEODE)
2185 #define m_K6 (1<<PROCESSOR_K6)
2186 #define m_K6_GEODE (m_K6 | m_GEODE)
2187 #define m_K8 (1<<PROCESSOR_K8)
2188 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2189 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2190 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2191 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2192 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2193 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2194 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2195 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2196 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2197 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2198 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2199 #define m_BTVER (m_BTVER1 | m_BTVER2)
2200 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2201 | m_ZNVER1)
2202
2203 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2204
2205 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2206 #undef DEF_TUNE
2207 #define DEF_TUNE(tune, name, selector) name,
2208 #include "x86-tune.def"
2209 #undef DEF_TUNE
2210 };
2211
2212 /* Feature tests against the various tunings. */
2213 unsigned char ix86_tune_features[X86_TUNE_LAST];
2214
2215 /* Feature tests against the various tunings used to create ix86_tune_features
2216 based on the processor mask. */
2217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2218 #undef DEF_TUNE
2219 #define DEF_TUNE(tune, name, selector) selector,
2220 #include "x86-tune.def"
2221 #undef DEF_TUNE
2222 };
2223
2224 /* Feature tests against the various architecture variations. */
2225 unsigned char ix86_arch_features[X86_ARCH_LAST];
2226
2227 /* Feature tests against the various architecture variations, used to create
2228 ix86_arch_features based on the processor mask. */
2229 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2230 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2231 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2232
2233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2234 ~m_386,
2235
2236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2237 ~(m_386 | m_486),
2238
2239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2240 ~m_386,
2241
2242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2243 ~m_386,
2244 };
2245
2246 /* In case the average insn count for single function invocation is
2247 lower than this constant, emit fast (but longer) prologue and
2248 epilogue code. */
2249 #define FAST_PROLOGUE_INSN_COUNT 20
2250
2251 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2252 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2253 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2254 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2255
2256 /* Array of the smallest class containing reg number REGNO, indexed by
2257 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2258
2259 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2260 {
2261 /* ax, dx, cx, bx */
2262 AREG, DREG, CREG, BREG,
2263 /* si, di, bp, sp */
2264 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2265 /* FP registers */
2266 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2267 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2268 /* arg pointer */
2269 NON_Q_REGS,
2270 /* flags, fpsr, fpcr, frame */
2271 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2272 /* SSE registers */
2273 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2274 SSE_REGS, SSE_REGS,
2275 /* MMX registers */
2276 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2277 MMX_REGS, MMX_REGS,
2278 /* REX registers */
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2281 /* SSE REX registers */
2282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2283 SSE_REGS, SSE_REGS,
2284 /* AVX-512 SSE registers */
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2289 /* Mask registers. */
2290 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2292 /* MPX bound registers */
2293 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2294 };
2295
2296 /* The "default" register map used in 32bit mode. */
2297
2298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2299 {
2300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2308 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2309 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2310 101, 102, 103, 104, /* bound registers */
2311 };
2312
2313 /* The "default" register map used in 64bit mode. */
2314
2315 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2316 {
2317 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2318 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2319 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2320 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2321 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2322 8,9,10,11,12,13,14,15, /* extended integer registers */
2323 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2324 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2325 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2326 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2327 126, 127, 128, 129, /* bound registers */
2328 };
2329
2330 /* Define the register numbers to be used in Dwarf debugging information.
2331 The SVR4 reference port C compiler uses the following register numbers
2332 in its Dwarf output code:
2333 0 for %eax (gcc regno = 0)
2334 1 for %ecx (gcc regno = 2)
2335 2 for %edx (gcc regno = 1)
2336 3 for %ebx (gcc regno = 3)
2337 4 for %esp (gcc regno = 7)
2338 5 for %ebp (gcc regno = 6)
2339 6 for %esi (gcc regno = 4)
2340 7 for %edi (gcc regno = 5)
2341 The following three DWARF register numbers are never generated by
2342 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2343 believes these numbers have these meanings.
2344 8 for %eip (no gcc equivalent)
2345 9 for %eflags (gcc regno = 17)
2346 10 for %trapno (no gcc equivalent)
2347 It is not at all clear how we should number the FP stack registers
2348 for the x86 architecture. If the version of SDB on x86/svr4 were
2349 a bit less brain dead with respect to floating-point then we would
2350 have a precedent to follow with respect to DWARF register numbers
2351 for x86 FP registers, but the SDB on x86/svr4 is so completely
2352 broken with respect to FP registers that it is hardly worth thinking
2353 of it as something to strive for compatibility with.
2354 The version of x86/svr4 SDB I have at the moment does (partially)
2355 seem to believe that DWARF register number 11 is associated with
2356 the x86 register %st(0), but that's about all. Higher DWARF
2357 register numbers don't seem to be associated with anything in
2358 particular, and even for DWARF regno 11, SDB only seems to under-
2359 stand that it should say that a variable lives in %st(0) (when
2360 asked via an `=' command) if we said it was in DWARF regno 11,
2361 but SDB still prints garbage when asked for the value of the
2362 variable in question (via a `/' command).
2363 (Also note that the labels SDB prints for various FP stack regs
2364 when doing an `x' command are all wrong.)
2365 Note that these problems generally don't affect the native SVR4
2366 C compiler because it doesn't allow the use of -O with -g and
2367 because when it is *not* optimizing, it allocates a memory
2368 location for each floating-point variable, and the memory
2369 location is what gets described in the DWARF AT_location
2370 attribute for the variable in question.
2371 Regardless of the severe mental illness of the x86/svr4 SDB, we
2372 do something sensible here and we use the following DWARF
2373 register numbers. Note that these are all stack-top-relative
2374 numbers.
2375 11 for %st(0) (gcc regno = 8)
2376 12 for %st(1) (gcc regno = 9)
2377 13 for %st(2) (gcc regno = 10)
2378 14 for %st(3) (gcc regno = 11)
2379 15 for %st(4) (gcc regno = 12)
2380 16 for %st(5) (gcc regno = 13)
2381 17 for %st(6) (gcc regno = 14)
2382 18 for %st(7) (gcc regno = 15)
2383 */
2384 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2385 {
2386 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2387 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2388 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2389 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2390 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2394 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2395 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2396 101, 102, 103, 104, /* bound registers */
2397 };
2398
2399 /* Define parameter passing and return registers. */
2400
2401 static int const x86_64_int_parameter_registers[6] =
2402 {
2403 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2404 };
2405
2406 static int const x86_64_ms_abi_int_parameter_registers[4] =
2407 {
2408 CX_REG, DX_REG, R8_REG, R9_REG
2409 };
2410
2411 static int const x86_64_int_return_registers[4] =
2412 {
2413 AX_REG, DX_REG, DI_REG, SI_REG
2414 };
2415
2416 /* Additional registers that are clobbered by SYSV calls. */
2417
2418 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2419 {
2420 SI_REG, DI_REG,
2421 XMM6_REG, XMM7_REG,
2422 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2423 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2424 };
2425
2426 /* Define the structure for the machine field in struct function. */
2427
2428 struct GTY(()) stack_local_entry {
2429 unsigned short mode;
2430 unsigned short n;
2431 rtx rtl;
2432 struct stack_local_entry *next;
2433 };
2434
2435 /* Structure describing stack frame layout.
2436 Stack grows downward:
2437
2438 [arguments]
2439 <- ARG_POINTER
2440 saved pc
2441
2442 saved static chain if ix86_static_chain_on_stack
2443
2444 saved frame pointer if frame_pointer_needed
2445 <- HARD_FRAME_POINTER
2446 [saved regs]
2447 <- regs_save_offset
2448 [padding0]
2449
2450 [saved SSE regs]
2451 <- sse_regs_save_offset
2452 [padding1] |
2453 | <- FRAME_POINTER
2454 [va_arg registers] |
2455 |
2456 [frame] |
2457 |
2458 [padding2] | = to_allocate
2459 <- STACK_POINTER
2460 */
2461 struct ix86_frame
2462 {
2463 int nsseregs;
2464 int nregs;
2465 int va_arg_size;
2466 int red_zone_size;
2467 int outgoing_arguments_size;
2468
2469 /* The offsets relative to ARG_POINTER. */
2470 HOST_WIDE_INT frame_pointer_offset;
2471 HOST_WIDE_INT hard_frame_pointer_offset;
2472 HOST_WIDE_INT stack_pointer_offset;
2473 HOST_WIDE_INT hfp_save_offset;
2474 HOST_WIDE_INT reg_save_offset;
2475 HOST_WIDE_INT sse_reg_save_offset;
2476
2477 /* When save_regs_using_mov is set, emit prologue using
2478 move instead of push instructions. */
2479 bool save_regs_using_mov;
2480 };
2481
2482 /* Which cpu are we scheduling for. */
2483 enum attr_cpu ix86_schedule;
2484
2485 /* Which cpu are we optimizing for. */
2486 enum processor_type ix86_tune;
2487
2488 /* Which instruction set architecture to use. */
2489 enum processor_type ix86_arch;
2490
2491 /* True if processor has SSE prefetch instruction. */
2492 unsigned char x86_prefetch_sse;
2493
2494 /* -mstackrealign option */
2495 static const char ix86_force_align_arg_pointer_string[]
2496 = "force_align_arg_pointer";
2497
2498 static rtx (*ix86_gen_leave) (void);
2499 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2501 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2502 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2503 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2505 static rtx (*ix86_gen_clzero) (rtx);
2506 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2507 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2508 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2511 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2512
2513 /* Preferred alignment for stack boundary in bits. */
2514 unsigned int ix86_preferred_stack_boundary;
2515
2516 /* Alignment for incoming stack boundary in bits specified at
2517 command line. */
2518 static unsigned int ix86_user_incoming_stack_boundary;
2519
2520 /* Default alignment for incoming stack boundary in bits. */
2521 static unsigned int ix86_default_incoming_stack_boundary;
2522
2523 /* Alignment for incoming stack boundary in bits. */
2524 unsigned int ix86_incoming_stack_boundary;
2525
2526 /* Calling abi specific va_list type nodes. */
2527 static GTY(()) tree sysv_va_list_type_node;
2528 static GTY(()) tree ms_va_list_type_node;
2529
2530 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2531 char internal_label_prefix[16];
2532 int internal_label_prefix_len;
2533
2534 /* Fence to use after loop using movnt. */
2535 tree x86_mfence;
2536
2537 /* Register class used for passing given 64bit part of the argument.
2538 These represent classes as documented by the PS ABI, with the exception
2539 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2540 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2541
2542 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2543 whenever possible (upper half does contain padding). */
2544 enum x86_64_reg_class
2545 {
2546 X86_64_NO_CLASS,
2547 X86_64_INTEGER_CLASS,
2548 X86_64_INTEGERSI_CLASS,
2549 X86_64_SSE_CLASS,
2550 X86_64_SSESF_CLASS,
2551 X86_64_SSEDF_CLASS,
2552 X86_64_SSEUP_CLASS,
2553 X86_64_X87_CLASS,
2554 X86_64_X87UP_CLASS,
2555 X86_64_COMPLEX_X87_CLASS,
2556 X86_64_MEMORY_CLASS
2557 };
2558
2559 #define MAX_CLASSES 8
2560
2561 /* Table of constants used by fldpi, fldln2, etc.... */
2562 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2563 static bool ext_80387_constants_init = 0;
2564
2565 \f
2566 static struct machine_function * ix86_init_machine_status (void);
2567 static rtx ix86_function_value (const_tree, const_tree, bool);
2568 static bool ix86_function_value_regno_p (const unsigned int);
2569 static unsigned int ix86_function_arg_boundary (machine_mode,
2570 const_tree);
2571 static rtx ix86_static_chain (const_tree, bool);
2572 static int ix86_function_regparm (const_tree, const_tree);
2573 static void ix86_compute_frame_layout (struct ix86_frame *);
2574 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2575 rtx, rtx, int);
2576 static void ix86_add_new_builtins (HOST_WIDE_INT);
2577 static tree ix86_canonical_va_list_type (tree);
2578 static void predict_jump (int);
2579 static unsigned int split_stack_prologue_scratch_regno (void);
2580 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2581
2582 enum ix86_function_specific_strings
2583 {
2584 IX86_FUNCTION_SPECIFIC_ARCH,
2585 IX86_FUNCTION_SPECIFIC_TUNE,
2586 IX86_FUNCTION_SPECIFIC_MAX
2587 };
2588
2589 static char *ix86_target_string (HOST_WIDE_INT, int, int, const char *,
2590 const char *, enum fpmath_unit, bool);
2591 static void ix86_function_specific_save (struct cl_target_option *,
2592 struct gcc_options *opts);
2593 static void ix86_function_specific_restore (struct gcc_options *opts,
2594 struct cl_target_option *);
2595 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2596 static void ix86_function_specific_print (FILE *, int,
2597 struct cl_target_option *);
2598 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2599 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2600 struct gcc_options *,
2601 struct gcc_options *,
2602 struct gcc_options *);
2603 static bool ix86_can_inline_p (tree, tree);
2604 static void ix86_set_current_function (tree);
2605 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2606
2607 static enum calling_abi ix86_function_abi (const_tree);
2608
2609 \f
2610 #ifndef SUBTARGET32_DEFAULT_CPU
2611 #define SUBTARGET32_DEFAULT_CPU "i386"
2612 #endif
2613
2614 /* Whether -mtune= or -march= were specified */
2615 static int ix86_tune_defaulted;
2616 static int ix86_arch_specified;
2617
2618 /* Vectorization library interface and handlers. */
2619 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2620
2621 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2622 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2623
2624 /* Processor target table, indexed by processor number */
2625 struct ptt
2626 {
2627 const char *const name; /* processor name */
2628 const struct processor_costs *cost; /* Processor costs */
2629 const int align_loop; /* Default alignments. */
2630 const int align_loop_max_skip;
2631 const int align_jump;
2632 const int align_jump_max_skip;
2633 const int align_func;
2634 };
2635
2636 /* This table must be in sync with enum processor_type in i386.h. */
2637 static const struct ptt processor_target_table[PROCESSOR_max] =
2638 {
2639 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2640 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2641 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2642 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2643 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2644 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2645 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2646 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2647 {"core2", &core_cost, 16, 10, 16, 10, 16},
2648 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2649 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2650 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2651 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2652 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2653 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2654 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2655 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2656 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2657 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2658 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2659 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2660 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2661 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2662 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2663 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2664 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2665 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2666 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
2667 {"znver1", &znver1_cost, 16, 10, 16, 7, 11}
2668 };
2669 \f
2670 static unsigned int
2671 rest_of_handle_insert_vzeroupper (void)
2672 {
2673 int i;
2674
2675 /* vzeroupper instructions are inserted immediately after reload to
2676 account for possible spills from 256bit registers. The pass
2677 reuses mode switching infrastructure by re-running mode insertion
2678 pass, so disable entities that have already been processed. */
2679 for (i = 0; i < MAX_386_ENTITIES; i++)
2680 ix86_optimize_mode_switching[i] = 0;
2681
2682 ix86_optimize_mode_switching[AVX_U128] = 1;
2683
2684 /* Call optimize_mode_switching. */
2685 g->get_passes ()->execute_pass_mode_switching ();
2686 return 0;
2687 }
2688
2689 /* Return 1 if INSN uses or defines a hard register.
2690 Hard register uses in a memory address are ignored.
2691 Clobbers and flags definitions are ignored. */
2692
2693 static bool
2694 has_non_address_hard_reg (rtx_insn *insn)
2695 {
2696 df_ref ref;
2697 FOR_EACH_INSN_DEF (ref, insn)
2698 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2699 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2700 && DF_REF_REGNO (ref) != FLAGS_REG)
2701 return true;
2702
2703 FOR_EACH_INSN_USE (ref, insn)
2704 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2705 return true;
2706
2707 return false;
2708 }
2709
2710 /* Check if comparison INSN may be transformed
2711 into vector comparison. Currently we transform
2712 zero checks only which look like:
2713
2714 (set (reg:CCZ 17 flags)
2715 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2716 (subreg:SI (reg:DI x) 0))
2717 (const_int 0 [0]))) */
2718
2719 static bool
2720 convertible_comparison_p (rtx_insn *insn)
2721 {
2722 if (!TARGET_SSE4_1)
2723 return false;
2724
2725 rtx def_set = single_set (insn);
2726
2727 gcc_assert (def_set);
2728
2729 rtx src = SET_SRC (def_set);
2730 rtx dst = SET_DEST (def_set);
2731
2732 gcc_assert (GET_CODE (src) == COMPARE);
2733
2734 if (GET_CODE (dst) != REG
2735 || REGNO (dst) != FLAGS_REG
2736 || GET_MODE (dst) != CCZmode)
2737 return false;
2738
2739 rtx op1 = XEXP (src, 0);
2740 rtx op2 = XEXP (src, 1);
2741
2742 if (op2 != CONST0_RTX (GET_MODE (op2)))
2743 return false;
2744
2745 if (GET_CODE (op1) != IOR)
2746 return false;
2747
2748 op2 = XEXP (op1, 1);
2749 op1 = XEXP (op1, 0);
2750
2751 if (!SUBREG_P (op1)
2752 || !SUBREG_P (op2)
2753 || GET_MODE (op1) != SImode
2754 || GET_MODE (op2) != SImode
2755 || ((SUBREG_BYTE (op1) != 0
2756 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2757 && (SUBREG_BYTE (op2) != 0
2758 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2759 return false;
2760
2761 op1 = SUBREG_REG (op1);
2762 op2 = SUBREG_REG (op2);
2763
2764 if (op1 != op2
2765 || !REG_P (op1)
2766 || GET_MODE (op1) != DImode)
2767 return false;
2768
2769 return true;
2770 }
2771
2772 /* The DImode version of scalar_to_vector_candidate_p. */
2773
2774 static bool
2775 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
2776 {
2777 rtx def_set = single_set (insn);
2778
2779 if (!def_set)
2780 return false;
2781
2782 if (has_non_address_hard_reg (insn))
2783 return false;
2784
2785 rtx src = SET_SRC (def_set);
2786 rtx dst = SET_DEST (def_set);
2787
2788 if (GET_CODE (src) == COMPARE)
2789 return convertible_comparison_p (insn);
2790
2791 /* We are interested in DImode promotion only. */
2792 if ((GET_MODE (src) != DImode
2793 && !CONST_INT_P (src))
2794 || GET_MODE (dst) != DImode)
2795 return false;
2796
2797 if (!REG_P (dst) && !MEM_P (dst))
2798 return false;
2799
2800 switch (GET_CODE (src))
2801 {
2802 case PLUS:
2803 case MINUS:
2804 case IOR:
2805 case XOR:
2806 case AND:
2807 break;
2808
2809 case REG:
2810 return true;
2811
2812 case MEM:
2813 case CONST_INT:
2814 return REG_P (dst);
2815
2816 default:
2817 return false;
2818 }
2819
2820 if (!REG_P (XEXP (src, 0))
2821 && !MEM_P (XEXP (src, 0))
2822 && !CONST_INT_P (XEXP (src, 0))
2823 /* Check for andnot case. */
2824 && (GET_CODE (src) != AND
2825 || GET_CODE (XEXP (src, 0)) != NOT
2826 || !REG_P (XEXP (XEXP (src, 0), 0))))
2827 return false;
2828
2829 if (!REG_P (XEXP (src, 1))
2830 && !MEM_P (XEXP (src, 1))
2831 && !CONST_INT_P (XEXP (src, 1)))
2832 return false;
2833
2834 if ((GET_MODE (XEXP (src, 0)) != DImode
2835 && !CONST_INT_P (XEXP (src, 0)))
2836 || (GET_MODE (XEXP (src, 1)) != DImode
2837 && !CONST_INT_P (XEXP (src, 1))))
2838 return false;
2839
2840 return true;
2841 }
2842
2843 /* The TImode version of scalar_to_vector_candidate_p. */
2844
2845 static bool
2846 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
2847 {
2848 rtx def_set = single_set (insn);
2849
2850 if (!def_set)
2851 return false;
2852
2853 if (has_non_address_hard_reg (insn))
2854 return false;
2855
2856 rtx src = SET_SRC (def_set);
2857 rtx dst = SET_DEST (def_set);
2858
2859 /* Only TImode load and store are allowed. */
2860 if (GET_MODE (dst) != TImode)
2861 return false;
2862
2863 if (MEM_P (dst))
2864 {
2865 /* Check for store. Memory must be aligned or unaligned store
2866 is optimal. Only support store from register, standard SSE
2867 constant or CONST_WIDE_INT generated from piecewise store.
2868
2869 ??? Verify performance impact before enabling CONST_INT for
2870 __int128 store. */
2871 if (misaligned_operand (dst, TImode)
2872 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
2873 return false;
2874
2875 switch (GET_CODE (src))
2876 {
2877 default:
2878 return false;
2879
2880 case REG:
2881 case CONST_WIDE_INT:
2882 return true;
2883
2884 case CONST_INT:
2885 return standard_sse_constant_p (src, TImode);
2886 }
2887 }
2888 else if (MEM_P (src))
2889 {
2890 /* Check for load. Memory must be aligned or unaligned load is
2891 optimal. */
2892 return (REG_P (dst)
2893 && (!misaligned_operand (src, TImode)
2894 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
2895 }
2896
2897 return false;
2898 }
2899
2900 /* Return 1 if INSN may be converted into vector
2901 instruction. */
2902
2903 static bool
2904 scalar_to_vector_candidate_p (rtx_insn *insn)
2905 {
2906 if (TARGET_64BIT)
2907 return timode_scalar_to_vector_candidate_p (insn);
2908 else
2909 return dimode_scalar_to_vector_candidate_p (insn);
2910 }
2911
2912 /* The DImode version of remove_non_convertible_regs. */
2913
2914 static void
2915 dimode_remove_non_convertible_regs (bitmap candidates)
2916 {
2917 bitmap_iterator bi;
2918 unsigned id;
2919 bitmap regs = BITMAP_ALLOC (NULL);
2920
2921 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2922 {
2923 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2924 rtx reg = SET_DEST (def_set);
2925
2926 if (!REG_P (reg)
2927 || bitmap_bit_p (regs, REGNO (reg))
2928 || HARD_REGISTER_P (reg))
2929 continue;
2930
2931 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2932 def;
2933 def = DF_REF_NEXT_REG (def))
2934 {
2935 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2936 {
2937 if (dump_file)
2938 fprintf (dump_file,
2939 "r%d has non convertible definition in insn %d\n",
2940 REGNO (reg), DF_REF_INSN_UID (def));
2941
2942 bitmap_set_bit (regs, REGNO (reg));
2943 break;
2944 }
2945 }
2946 }
2947
2948 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2949 {
2950 for (df_ref def = DF_REG_DEF_CHAIN (id);
2951 def;
2952 def = DF_REF_NEXT_REG (def))
2953 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2954 {
2955 if (dump_file)
2956 fprintf (dump_file, "Removing insn %d from candidates list\n",
2957 DF_REF_INSN_UID (def));
2958
2959 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2960 }
2961 }
2962
2963 BITMAP_FREE (regs);
2964 }
2965
2966 /* For a register REGNO, scan instructions for its defs and uses.
2967 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
2968
2969 static void
2970 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
2971 unsigned int regno)
2972 {
2973 for (df_ref def = DF_REG_DEF_CHAIN (regno);
2974 def;
2975 def = DF_REF_NEXT_REG (def))
2976 {
2977 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2978 {
2979 if (dump_file)
2980 fprintf (dump_file,
2981 "r%d has non convertible def in insn %d\n",
2982 regno, DF_REF_INSN_UID (def));
2983
2984 bitmap_set_bit (regs, regno);
2985 break;
2986 }
2987 }
2988
2989 for (df_ref ref = DF_REG_USE_CHAIN (regno);
2990 ref;
2991 ref = DF_REF_NEXT_REG (ref))
2992 {
2993 /* Debug instructions are skipped. */
2994 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
2995 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
2996 {
2997 if (dump_file)
2998 fprintf (dump_file,
2999 "r%d has non convertible use in insn %d\n",
3000 regno, DF_REF_INSN_UID (ref));
3001
3002 bitmap_set_bit (regs, regno);
3003 break;
3004 }
3005 }
3006 }
3007
3008 /* The TImode version of remove_non_convertible_regs. */
3009
3010 static void
3011 timode_remove_non_convertible_regs (bitmap candidates)
3012 {
3013 bitmap_iterator bi;
3014 unsigned id;
3015 bitmap regs = BITMAP_ALLOC (NULL);
3016
3017 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
3018 {
3019 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
3020 rtx dest = SET_DEST (def_set);
3021 rtx src = SET_SRC (def_set);
3022
3023 if ((!REG_P (dest)
3024 || bitmap_bit_p (regs, REGNO (dest))
3025 || HARD_REGISTER_P (dest))
3026 && (!REG_P (src)
3027 || bitmap_bit_p (regs, REGNO (src))
3028 || HARD_REGISTER_P (src)))
3029 continue;
3030
3031 if (REG_P (dest))
3032 timode_check_non_convertible_regs (candidates, regs,
3033 REGNO (dest));
3034
3035 if (REG_P (src))
3036 timode_check_non_convertible_regs (candidates, regs,
3037 REGNO (src));
3038 }
3039
3040 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
3041 {
3042 for (df_ref def = DF_REG_DEF_CHAIN (id);
3043 def;
3044 def = DF_REF_NEXT_REG (def))
3045 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
3046 {
3047 if (dump_file)
3048 fprintf (dump_file, "Removing insn %d from candidates list\n",
3049 DF_REF_INSN_UID (def));
3050
3051 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
3052 }
3053
3054 for (df_ref ref = DF_REG_USE_CHAIN (id);
3055 ref;
3056 ref = DF_REF_NEXT_REG (ref))
3057 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
3058 {
3059 if (dump_file)
3060 fprintf (dump_file, "Removing insn %d from candidates list\n",
3061 DF_REF_INSN_UID (ref));
3062
3063 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
3064 }
3065 }
3066
3067 BITMAP_FREE (regs);
3068 }
3069
3070 /* For a given bitmap of insn UIDs scans all instruction and
3071 remove insn from CANDIDATES in case it has both convertible
3072 and not convertible definitions.
3073
3074 All insns in a bitmap are conversion candidates according to
3075 scalar_to_vector_candidate_p. Currently it implies all insns
3076 are single_set. */
3077
3078 static void
3079 remove_non_convertible_regs (bitmap candidates)
3080 {
3081 if (TARGET_64BIT)
3082 timode_remove_non_convertible_regs (candidates);
3083 else
3084 dimode_remove_non_convertible_regs (candidates);
3085 }
3086
3087 class scalar_chain
3088 {
3089 public:
3090 scalar_chain ();
3091 virtual ~scalar_chain ();
3092
3093 static unsigned max_id;
3094
3095 /* ID of a chain. */
3096 unsigned int chain_id;
3097 /* A queue of instructions to be included into a chain. */
3098 bitmap queue;
3099 /* Instructions included into a chain. */
3100 bitmap insns;
3101 /* All registers defined by a chain. */
3102 bitmap defs;
3103 /* Registers used in both vector and sclar modes. */
3104 bitmap defs_conv;
3105
3106 void build (bitmap candidates, unsigned insn_uid);
3107 virtual int compute_convert_gain () = 0;
3108 int convert ();
3109
3110 protected:
3111 void add_to_queue (unsigned insn_uid);
3112 void emit_conversion_insns (rtx insns, rtx_insn *pos);
3113
3114 private:
3115 void add_insn (bitmap candidates, unsigned insn_uid);
3116 void analyze_register_chain (bitmap candidates, df_ref ref);
3117 virtual void mark_dual_mode_def (df_ref def) = 0;
3118 virtual void convert_insn (rtx_insn *insn) = 0;
3119 virtual void convert_registers () = 0;
3120 };
3121
3122 class dimode_scalar_chain : public scalar_chain
3123 {
3124 public:
3125 int compute_convert_gain ();
3126 private:
3127 void mark_dual_mode_def (df_ref def);
3128 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
3129 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
3130 void convert_insn (rtx_insn *insn);
3131 void convert_op (rtx *op, rtx_insn *insn);
3132 void convert_reg (unsigned regno);
3133 void make_vector_copies (unsigned regno);
3134 void convert_registers ();
3135 int vector_const_cost (rtx exp);
3136 };
3137
3138 class timode_scalar_chain : public scalar_chain
3139 {
3140 public:
3141 /* Convert from TImode to V1TImode is always faster. */
3142 int compute_convert_gain () { return 1; }
3143
3144 private:
3145 void mark_dual_mode_def (df_ref def);
3146 void fix_debug_reg_uses (rtx reg);
3147 void convert_insn (rtx_insn *insn);
3148 /* We don't convert registers to difference size. */
3149 void convert_registers () {}
3150 };
3151
3152 unsigned scalar_chain::max_id = 0;
3153
3154 /* Initialize new chain. */
3155
3156 scalar_chain::scalar_chain ()
3157 {
3158 chain_id = ++max_id;
3159
3160 if (dump_file)
3161 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
3162
3163 bitmap_obstack_initialize (NULL);
3164 insns = BITMAP_ALLOC (NULL);
3165 defs = BITMAP_ALLOC (NULL);
3166 defs_conv = BITMAP_ALLOC (NULL);
3167 queue = NULL;
3168 }
3169
3170 /* Free chain's data. */
3171
3172 scalar_chain::~scalar_chain ()
3173 {
3174 BITMAP_FREE (insns);
3175 BITMAP_FREE (defs);
3176 BITMAP_FREE (defs_conv);
3177 bitmap_obstack_release (NULL);
3178 }
3179
3180 /* Add instruction into chains' queue. */
3181
3182 void
3183 scalar_chain::add_to_queue (unsigned insn_uid)
3184 {
3185 if (bitmap_bit_p (insns, insn_uid)
3186 || bitmap_bit_p (queue, insn_uid))
3187 return;
3188
3189 if (dump_file)
3190 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
3191 insn_uid, chain_id);
3192 bitmap_set_bit (queue, insn_uid);
3193 }
3194
3195 /* For DImode conversion, mark register defined by DEF as requiring
3196 conversion. */
3197
3198 void
3199 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
3200 {
3201 gcc_assert (DF_REF_REG_DEF_P (def));
3202
3203 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
3204 return;
3205
3206 if (dump_file)
3207 fprintf (dump_file,
3208 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
3209 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
3210
3211 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
3212 }
3213
3214 /* For TImode conversion, it is unused. */
3215
3216 void
3217 timode_scalar_chain::mark_dual_mode_def (df_ref)
3218 {
3219 gcc_unreachable ();
3220 }
3221
3222 /* Check REF's chain to add new insns into a queue
3223 and find registers requiring conversion. */
3224
3225 void
3226 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
3227 {
3228 df_link *chain;
3229
3230 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
3231 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
3232 add_to_queue (DF_REF_INSN_UID (ref));
3233
3234 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3235 {
3236 unsigned uid = DF_REF_INSN_UID (chain->ref);
3237
3238 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3239 continue;
3240
3241 if (!DF_REF_REG_MEM_P (chain->ref))
3242 {
3243 if (bitmap_bit_p (insns, uid))
3244 continue;
3245
3246 if (bitmap_bit_p (candidates, uid))
3247 {
3248 add_to_queue (uid);
3249 continue;
3250 }
3251 }
3252
3253 if (DF_REF_REG_DEF_P (chain->ref))
3254 {
3255 if (dump_file)
3256 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3257 DF_REF_REGNO (chain->ref), uid);
3258 mark_dual_mode_def (chain->ref);
3259 }
3260 else
3261 {
3262 if (dump_file)
3263 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3264 DF_REF_REGNO (chain->ref), uid);
3265 mark_dual_mode_def (ref);
3266 }
3267 }
3268 }
3269
3270 /* Add instruction into a chain. */
3271
3272 void
3273 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3274 {
3275 if (bitmap_bit_p (insns, insn_uid))
3276 return;
3277
3278 if (dump_file)
3279 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3280
3281 bitmap_set_bit (insns, insn_uid);
3282
3283 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3284 rtx def_set = single_set (insn);
3285 if (def_set && REG_P (SET_DEST (def_set))
3286 && !HARD_REGISTER_P (SET_DEST (def_set)))
3287 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3288
3289 df_ref ref;
3290 df_ref def;
3291 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3292 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3293 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3294 def;
3295 def = DF_REF_NEXT_REG (def))
3296 analyze_register_chain (candidates, def);
3297 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3298 if (!DF_REF_REG_MEM_P (ref))
3299 analyze_register_chain (candidates, ref);
3300 }
3301
3302 /* Build new chain starting from insn INSN_UID recursively
3303 adding all dependent uses and definitions. */
3304
3305 void
3306 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3307 {
3308 queue = BITMAP_ALLOC (NULL);
3309 bitmap_set_bit (queue, insn_uid);
3310
3311 if (dump_file)
3312 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3313
3314 while (!bitmap_empty_p (queue))
3315 {
3316 insn_uid = bitmap_first_set_bit (queue);
3317 bitmap_clear_bit (queue, insn_uid);
3318 bitmap_clear_bit (candidates, insn_uid);
3319 add_insn (candidates, insn_uid);
3320 }
3321
3322 if (dump_file)
3323 {
3324 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3325 fprintf (dump_file, " insns: ");
3326 dump_bitmap (dump_file, insns);
3327 if (!bitmap_empty_p (defs_conv))
3328 {
3329 bitmap_iterator bi;
3330 unsigned id;
3331 const char *comma = "";
3332 fprintf (dump_file, " defs to convert: ");
3333 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3334 {
3335 fprintf (dump_file, "%sr%d", comma, id);
3336 comma = ", ";
3337 }
3338 fprintf (dump_file, "\n");
3339 }
3340 }
3341
3342 BITMAP_FREE (queue);
3343 }
3344
3345 /* Return a cost of building a vector costant
3346 instead of using a scalar one. */
3347
3348 int
3349 dimode_scalar_chain::vector_const_cost (rtx exp)
3350 {
3351 gcc_assert (CONST_INT_P (exp));
3352
3353 if (standard_sse_constant_p (exp, V2DImode))
3354 return COSTS_N_INSNS (1);
3355 return ix86_cost->sse_load[1];
3356 }
3357
3358 /* Compute a gain for chain conversion. */
3359
3360 int
3361 dimode_scalar_chain::compute_convert_gain ()
3362 {
3363 bitmap_iterator bi;
3364 unsigned insn_uid;
3365 int gain = 0;
3366 int cost = 0;
3367
3368 if (dump_file)
3369 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3370
3371 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3372 {
3373 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3374 rtx def_set = single_set (insn);
3375 rtx src = SET_SRC (def_set);
3376 rtx dst = SET_DEST (def_set);
3377
3378 if (REG_P (src) && REG_P (dst))
3379 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3380 else if (REG_P (src) && MEM_P (dst))
3381 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3382 else if (MEM_P (src) && REG_P (dst))
3383 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3384 else if (GET_CODE (src) == PLUS
3385 || GET_CODE (src) == MINUS
3386 || GET_CODE (src) == IOR
3387 || GET_CODE (src) == XOR
3388 || GET_CODE (src) == AND)
3389 {
3390 gain += ix86_cost->add;
3391 if (CONST_INT_P (XEXP (src, 0)))
3392 gain -= vector_const_cost (XEXP (src, 0));
3393 if (CONST_INT_P (XEXP (src, 1)))
3394 gain -= vector_const_cost (XEXP (src, 1));
3395 }
3396 else if (GET_CODE (src) == COMPARE)
3397 {
3398 /* Assume comparison cost is the same. */
3399 }
3400 else if (GET_CODE (src) == CONST_INT)
3401 {
3402 if (REG_P (dst))
3403 gain += COSTS_N_INSNS (2);
3404 else if (MEM_P (dst))
3405 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3406 gain -= vector_const_cost (src);
3407 }
3408 else
3409 gcc_unreachable ();
3410 }
3411
3412 if (dump_file)
3413 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
3414
3415 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3416 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3417
3418 if (dump_file)
3419 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
3420
3421 gain -= cost;
3422
3423 if (dump_file)
3424 fprintf (dump_file, " Total gain: %d\n", gain);
3425
3426 return gain;
3427 }
3428
3429 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3430
3431 rtx
3432 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3433 {
3434 if (x == reg)
3435 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3436
3437 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3438 int i, j;
3439 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3440 {
3441 if (fmt[i] == 'e')
3442 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3443 else if (fmt[i] == 'E')
3444 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3445 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3446 reg, new_reg);
3447 }
3448
3449 return x;
3450 }
3451
3452 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3453
3454 void
3455 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
3456 rtx reg, rtx new_reg)
3457 {
3458 replace_with_subreg (single_set (insn), reg, new_reg);
3459 }
3460
3461 /* Insert generated conversion instruction sequence INSNS
3462 after instruction AFTER. New BB may be required in case
3463 instruction has EH region attached. */
3464
3465 void
3466 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3467 {
3468 if (!control_flow_insn_p (after))
3469 {
3470 emit_insn_after (insns, after);
3471 return;
3472 }
3473
3474 basic_block bb = BLOCK_FOR_INSN (after);
3475 edge e = find_fallthru_edge (bb->succs);
3476 gcc_assert (e);
3477
3478 basic_block new_bb = split_edge (e);
3479 emit_insn_after (insns, BB_HEAD (new_bb));
3480 }
3481
3482 /* Make vector copies for all register REGNO definitions
3483 and replace its uses in a chain. */
3484
3485 void
3486 dimode_scalar_chain::make_vector_copies (unsigned regno)
3487 {
3488 rtx reg = regno_reg_rtx[regno];
3489 rtx vreg = gen_reg_rtx (DImode);
3490 df_ref ref;
3491
3492 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3493 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3494 {
3495 rtx_insn *insn = DF_REF_INSN (ref);
3496
3497 start_sequence ();
3498 if (TARGET_SSE4_1)
3499 {
3500 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3501 CONST0_RTX (V4SImode),
3502 gen_rtx_SUBREG (SImode, reg, 0)));
3503 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3504 gen_rtx_SUBREG (V4SImode, vreg, 0),
3505 gen_rtx_SUBREG (SImode, reg, 4),
3506 GEN_INT (2)));
3507 }
3508 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3509 {
3510 rtx tmp = gen_reg_rtx (DImode);
3511 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3512 CONST0_RTX (V4SImode),
3513 gen_rtx_SUBREG (SImode, reg, 0)));
3514 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3515 CONST0_RTX (V4SImode),
3516 gen_rtx_SUBREG (SImode, reg, 4)));
3517 emit_insn (gen_vec_interleave_lowv4si
3518 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3519 gen_rtx_SUBREG (V4SImode, vreg, 0),
3520 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3521 }
3522 else
3523 {
3524 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3525 emit_move_insn (adjust_address (tmp, SImode, 0),
3526 gen_rtx_SUBREG (SImode, reg, 0));
3527 emit_move_insn (adjust_address (tmp, SImode, 4),
3528 gen_rtx_SUBREG (SImode, reg, 4));
3529 emit_move_insn (vreg, tmp);
3530 }
3531 rtx_insn *seq = get_insns ();
3532 end_sequence ();
3533 emit_conversion_insns (seq, insn);
3534
3535 if (dump_file)
3536 fprintf (dump_file,
3537 " Copied r%d to a vector register r%d for insn %d\n",
3538 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3539 }
3540
3541 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3542 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3543 {
3544 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3545
3546 if (dump_file)
3547 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3548 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3549 }
3550 }
3551
3552 /* Convert all definitions of register REGNO
3553 and fix its uses. Scalar copies may be created
3554 in case register is used in not convertible insn. */
3555
3556 void
3557 dimode_scalar_chain::convert_reg (unsigned regno)
3558 {
3559 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3560 rtx reg = regno_reg_rtx[regno];
3561 rtx scopy = NULL_RTX;
3562 df_ref ref;
3563 bitmap conv;
3564
3565 conv = BITMAP_ALLOC (NULL);
3566 bitmap_copy (conv, insns);
3567
3568 if (scalar_copy)
3569 scopy = gen_reg_rtx (DImode);
3570
3571 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3572 {
3573 rtx_insn *insn = DF_REF_INSN (ref);
3574 rtx def_set = single_set (insn);
3575 rtx src = SET_SRC (def_set);
3576 rtx reg = DF_REF_REG (ref);
3577
3578 if (!MEM_P (src))
3579 {
3580 replace_with_subreg_in_insn (insn, reg, reg);
3581 bitmap_clear_bit (conv, INSN_UID (insn));
3582 }
3583
3584 if (scalar_copy)
3585 {
3586 rtx vcopy = gen_reg_rtx (V2DImode);
3587
3588 start_sequence ();
3589 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3590 {
3591 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3592 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3593 gen_rtx_SUBREG (SImode, vcopy, 0));
3594 emit_move_insn (vcopy,
3595 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3596 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3597 gen_rtx_SUBREG (SImode, vcopy, 0));
3598 }
3599 else
3600 {
3601 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3602 emit_move_insn (tmp, reg);
3603 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3604 adjust_address (tmp, SImode, 0));
3605 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3606 adjust_address (tmp, SImode, 4));
3607 }
3608 rtx_insn *seq = get_insns ();
3609 end_sequence ();
3610 emit_conversion_insns (seq, insn);
3611
3612 if (dump_file)
3613 fprintf (dump_file,
3614 " Copied r%d to a scalar register r%d for insn %d\n",
3615 regno, REGNO (scopy), INSN_UID (insn));
3616 }
3617 }
3618
3619 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3620 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3621 {
3622 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3623 {
3624 rtx def_set = single_set (DF_REF_INSN (ref));
3625 if (!MEM_P (SET_DEST (def_set))
3626 || !REG_P (SET_SRC (def_set)))
3627 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3628 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3629 }
3630 }
3631 /* Skip debug insns and uninitialized uses. */
3632 else if (DF_REF_CHAIN (ref)
3633 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3634 {
3635 gcc_assert (scopy);
3636 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3637 df_insn_rescan (DF_REF_INSN (ref));
3638 }
3639
3640 BITMAP_FREE (conv);
3641 }
3642
3643 /* Convert operand OP in INSN. We should handle
3644 memory operands and uninitialized registers.
3645 All other register uses are converted during
3646 registers conversion. */
3647
3648 void
3649 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3650 {
3651 *op = copy_rtx_if_shared (*op);
3652
3653 if (GET_CODE (*op) == NOT)
3654 {
3655 convert_op (&XEXP (*op, 0), insn);
3656 PUT_MODE (*op, V2DImode);
3657 }
3658 else if (MEM_P (*op))
3659 {
3660 rtx tmp = gen_reg_rtx (DImode);
3661
3662 emit_insn_before (gen_move_insn (tmp, *op), insn);
3663 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3664
3665 if (dump_file)
3666 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3667 INSN_UID (insn), REGNO (tmp));
3668 }
3669 else if (REG_P (*op))
3670 {
3671 /* We may have not converted register usage in case
3672 this register has no definition. Otherwise it
3673 should be converted in convert_reg. */
3674 df_ref ref;
3675 FOR_EACH_INSN_USE (ref, insn)
3676 if (DF_REF_REGNO (ref) == REGNO (*op))
3677 {
3678 gcc_assert (!DF_REF_CHAIN (ref));
3679 break;
3680 }
3681 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
3682 }
3683 else if (CONST_INT_P (*op))
3684 {
3685 rtx vec_cst;
3686 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
3687
3688 /* Prefer all ones vector in case of -1. */
3689 if (constm1_operand (*op, GET_MODE (*op)))
3690 vec_cst = CONSTM1_RTX (V2DImode);
3691 else
3692 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
3693 gen_rtvec (2, *op, const0_rtx));
3694
3695 if (!standard_sse_constant_p (vec_cst, V2DImode))
3696 {
3697 start_sequence ();
3698 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
3699 rtx_insn *seq = get_insns ();
3700 end_sequence ();
3701 emit_insn_before (seq, insn);
3702 }
3703
3704 emit_insn_before (gen_move_insn (tmp, vec_cst), insn);
3705 *op = tmp;
3706 }
3707 else
3708 {
3709 gcc_assert (SUBREG_P (*op));
3710 gcc_assert (GET_MODE (*op) == V2DImode);
3711 }
3712 }
3713
3714 /* Convert INSN to vector mode. */
3715
3716 void
3717 dimode_scalar_chain::convert_insn (rtx_insn *insn)
3718 {
3719 rtx def_set = single_set (insn);
3720 rtx src = SET_SRC (def_set);
3721 rtx dst = SET_DEST (def_set);
3722 rtx subreg;
3723
3724 if (MEM_P (dst) && !REG_P (src))
3725 {
3726 /* There are no scalar integer instructions and therefore
3727 temporary register usage is required. */
3728 rtx tmp = gen_reg_rtx (DImode);
3729 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3730 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3731 }
3732
3733 switch (GET_CODE (src))
3734 {
3735 case PLUS:
3736 case MINUS:
3737 case IOR:
3738 case XOR:
3739 case AND:
3740 convert_op (&XEXP (src, 0), insn);
3741 convert_op (&XEXP (src, 1), insn);
3742 PUT_MODE (src, V2DImode);
3743 break;
3744
3745 case MEM:
3746 if (!REG_P (dst))
3747 convert_op (&src, insn);
3748 break;
3749
3750 case REG:
3751 if (!MEM_P (dst))
3752 convert_op (&src, insn);
3753 break;
3754
3755 case SUBREG:
3756 gcc_assert (GET_MODE (src) == V2DImode);
3757 break;
3758
3759 case COMPARE:
3760 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3761
3762 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3763 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3764
3765 if (REG_P (src))
3766 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3767 else
3768 subreg = copy_rtx_if_shared (src);
3769 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3770 copy_rtx_if_shared (subreg),
3771 copy_rtx_if_shared (subreg)),
3772 insn);
3773 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3774 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3775 copy_rtx_if_shared (src)),
3776 UNSPEC_PTEST);
3777 break;
3778
3779 case CONST_INT:
3780 convert_op (&src, insn);
3781 break;
3782
3783 default:
3784 gcc_unreachable ();
3785 }
3786
3787 SET_SRC (def_set) = src;
3788 SET_DEST (def_set) = dst;
3789
3790 /* Drop possible dead definitions. */
3791 PATTERN (insn) = def_set;
3792
3793 INSN_CODE (insn) = -1;
3794 recog_memoized (insn);
3795 df_insn_rescan (insn);
3796 }
3797
3798 /* Fix uses of converted REG in debug insns. */
3799
3800 void
3801 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
3802 {
3803 if (!flag_var_tracking)
3804 return;
3805
3806 df_ref ref;
3807 for (ref = DF_REG_USE_CHAIN (REGNO (reg));
3808 ref;
3809 ref = DF_REF_NEXT_REG (ref))
3810 {
3811 rtx_insn *insn = DF_REF_INSN (ref);
3812 if (DEBUG_INSN_P (insn))
3813 {
3814 /* It may be a debug insn with a TImode variable in
3815 register. */
3816 rtx val = PATTERN (insn);
3817 if (GET_MODE (val) != TImode)
3818 continue;
3819 gcc_assert (GET_CODE (val) == VAR_LOCATION);
3820 rtx loc = PAT_VAR_LOCATION_LOC (val);
3821 /* It may have been converted to TImode already. */
3822 if (GET_MODE (loc) == TImode)
3823 continue;
3824 gcc_assert (REG_P (loc)
3825 && GET_MODE (loc) == V1TImode);
3826 /* Convert V1TImode register, which has been updated by a SET
3827 insn before, to SUBREG TImode. */
3828 PAT_VAR_LOCATION_LOC (val) = gen_rtx_SUBREG (TImode, loc, 0);
3829 df_insn_rescan (insn);
3830 }
3831 }
3832 }
3833
3834 /* Convert INSN from TImode to V1T1mode. */
3835
3836 void
3837 timode_scalar_chain::convert_insn (rtx_insn *insn)
3838 {
3839 rtx def_set = single_set (insn);
3840 rtx src = SET_SRC (def_set);
3841 rtx dst = SET_DEST (def_set);
3842
3843 switch (GET_CODE (dst))
3844 {
3845 case REG:
3846 {
3847 rtx tmp = find_reg_equal_equiv_note (insn);
3848 if (tmp)
3849 PUT_MODE (XEXP (tmp, 0), V1TImode);
3850 PUT_MODE (dst, V1TImode);
3851 fix_debug_reg_uses (dst);
3852 }
3853 break;
3854 case MEM:
3855 PUT_MODE (dst, V1TImode);
3856 break;
3857
3858 default:
3859 gcc_unreachable ();
3860 }
3861
3862 switch (GET_CODE (src))
3863 {
3864 case REG:
3865 PUT_MODE (src, V1TImode);
3866 /* Call fix_debug_reg_uses only if SRC is never defined. */
3867 if (!DF_REG_DEF_CHAIN (REGNO (src)))
3868 fix_debug_reg_uses (src);
3869 break;
3870
3871 case MEM:
3872 PUT_MODE (src, V1TImode);
3873 break;
3874
3875 case CONST_WIDE_INT:
3876 if (NONDEBUG_INSN_P (insn))
3877 {
3878 /* Since there are no instructions to store 128-bit constant,
3879 temporary register usage is required. */
3880 rtx tmp = gen_reg_rtx (V1TImode);
3881 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
3882 src = validize_mem (force_const_mem (V1TImode, src));
3883 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
3884 dst = tmp;
3885 }
3886 break;
3887
3888 case CONST_INT:
3889 switch (standard_sse_constant_p (src, TImode))
3890 {
3891 case 1:
3892 src = CONST0_RTX (GET_MODE (dst));
3893 break;
3894 case 2:
3895 src = CONSTM1_RTX (GET_MODE (dst));
3896 break;
3897 default:
3898 gcc_unreachable ();
3899 }
3900 if (NONDEBUG_INSN_P (insn))
3901 {
3902 rtx tmp = gen_reg_rtx (V1TImode);
3903 /* Since there are no instructions to store standard SSE
3904 constant, temporary register usage is required. */
3905 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
3906 dst = tmp;
3907 }
3908 break;
3909
3910 default:
3911 gcc_unreachable ();
3912 }
3913
3914 SET_SRC (def_set) = src;
3915 SET_DEST (def_set) = dst;
3916
3917 /* Drop possible dead definitions. */
3918 PATTERN (insn) = def_set;
3919
3920 INSN_CODE (insn) = -1;
3921 recog_memoized (insn);
3922 df_insn_rescan (insn);
3923 }
3924
3925 void
3926 dimode_scalar_chain::convert_registers ()
3927 {
3928 bitmap_iterator bi;
3929 unsigned id;
3930
3931 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3932 convert_reg (id);
3933
3934 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3935 make_vector_copies (id);
3936 }
3937
3938 /* Convert whole chain creating required register
3939 conversions and copies. */
3940
3941 int
3942 scalar_chain::convert ()
3943 {
3944 bitmap_iterator bi;
3945 unsigned id;
3946 int converted_insns = 0;
3947
3948 if (!dbg_cnt (stv_conversion))
3949 return 0;
3950
3951 if (dump_file)
3952 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3953
3954 convert_registers ();
3955
3956 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3957 {
3958 convert_insn (DF_INSN_UID_GET (id)->insn);
3959 converted_insns++;
3960 }
3961
3962 return converted_insns;
3963 }
3964
3965 /* Main STV pass function. Find and convert scalar
3966 instructions into vector mode when profitable. */
3967
3968 static unsigned int
3969 convert_scalars_to_vector ()
3970 {
3971 basic_block bb;
3972 bitmap candidates;
3973 int converted_insns = 0;
3974
3975 bitmap_obstack_initialize (NULL);
3976 candidates = BITMAP_ALLOC (NULL);
3977
3978 calculate_dominance_info (CDI_DOMINATORS);
3979 df_set_flags (DF_DEFER_INSN_RESCAN);
3980 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3981 df_md_add_problem ();
3982 df_analyze ();
3983
3984 /* Find all instructions we want to convert into vector mode. */
3985 if (dump_file)
3986 fprintf (dump_file, "Searching for mode conversion candidates...\n");
3987
3988 FOR_EACH_BB_FN (bb, cfun)
3989 {
3990 rtx_insn *insn;
3991 FOR_BB_INSNS (bb, insn)
3992 if (scalar_to_vector_candidate_p (insn))
3993 {
3994 if (dump_file)
3995 fprintf (dump_file, " insn %d is marked as a candidate\n",
3996 INSN_UID (insn));
3997
3998 bitmap_set_bit (candidates, INSN_UID (insn));
3999 }
4000 }
4001
4002 remove_non_convertible_regs (candidates);
4003
4004 if (bitmap_empty_p (candidates))
4005 if (dump_file)
4006 fprintf (dump_file, "There are no candidates for optimization.\n");
4007
4008 while (!bitmap_empty_p (candidates))
4009 {
4010 unsigned uid = bitmap_first_set_bit (candidates);
4011 scalar_chain *chain;
4012
4013 if (TARGET_64BIT)
4014 chain = new timode_scalar_chain;
4015 else
4016 chain = new dimode_scalar_chain;
4017
4018 /* Find instructions chain we want to convert to vector mode.
4019 Check all uses and definitions to estimate all required
4020 conversions. */
4021 chain->build (candidates, uid);
4022
4023 if (chain->compute_convert_gain () > 0)
4024 converted_insns += chain->convert ();
4025 else
4026 if (dump_file)
4027 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
4028 chain->chain_id);
4029
4030 delete chain;
4031 }
4032
4033 if (dump_file)
4034 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
4035
4036 BITMAP_FREE (candidates);
4037 bitmap_obstack_release (NULL);
4038 df_process_deferred_rescans ();
4039
4040 /* Conversion means we may have 128bit register spills/fills
4041 which require aligned stack. */
4042 if (converted_insns)
4043 {
4044 if (crtl->stack_alignment_needed < 128)
4045 crtl->stack_alignment_needed = 128;
4046 if (crtl->stack_alignment_estimated < 128)
4047 crtl->stack_alignment_estimated = 128;
4048 }
4049
4050 return 0;
4051 }
4052
4053 namespace {
4054
4055 const pass_data pass_data_insert_vzeroupper =
4056 {
4057 RTL_PASS, /* type */
4058 "vzeroupper", /* name */
4059 OPTGROUP_NONE, /* optinfo_flags */
4060 TV_MACH_DEP, /* tv_id */
4061 0, /* properties_required */
4062 0, /* properties_provided */
4063 0, /* properties_destroyed */
4064 0, /* todo_flags_start */
4065 TODO_df_finish, /* todo_flags_finish */
4066 };
4067
4068 class pass_insert_vzeroupper : public rtl_opt_pass
4069 {
4070 public:
4071 pass_insert_vzeroupper(gcc::context *ctxt)
4072 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
4073 {}
4074
4075 /* opt_pass methods: */
4076 virtual bool gate (function *)
4077 {
4078 return TARGET_AVX && !TARGET_AVX512F
4079 && TARGET_VZEROUPPER && flag_expensive_optimizations
4080 && !optimize_size;
4081 }
4082
4083 virtual unsigned int execute (function *)
4084 {
4085 return rest_of_handle_insert_vzeroupper ();
4086 }
4087
4088 }; // class pass_insert_vzeroupper
4089
4090 const pass_data pass_data_stv =
4091 {
4092 RTL_PASS, /* type */
4093 "stv", /* name */
4094 OPTGROUP_NONE, /* optinfo_flags */
4095 TV_MACH_DEP, /* tv_id */
4096 0, /* properties_required */
4097 0, /* properties_provided */
4098 0, /* properties_destroyed */
4099 0, /* todo_flags_start */
4100 TODO_df_finish, /* todo_flags_finish */
4101 };
4102
4103 class pass_stv : public rtl_opt_pass
4104 {
4105 public:
4106 pass_stv (gcc::context *ctxt)
4107 : rtl_opt_pass (pass_data_stv, ctxt)
4108 {}
4109
4110 /* opt_pass methods: */
4111 virtual bool gate (function *)
4112 {
4113 return TARGET_STV && TARGET_SSE2 && optimize > 1;
4114 }
4115
4116 virtual unsigned int execute (function *)
4117 {
4118 return convert_scalars_to_vector ();
4119 }
4120
4121 }; // class pass_stv
4122
4123 } // anon namespace
4124
4125 rtl_opt_pass *
4126 make_pass_insert_vzeroupper (gcc::context *ctxt)
4127 {
4128 return new pass_insert_vzeroupper (ctxt);
4129 }
4130
4131 rtl_opt_pass *
4132 make_pass_stv (gcc::context *ctxt)
4133 {
4134 return new pass_stv (ctxt);
4135 }
4136
4137 /* Return true if a red-zone is in use. */
4138
4139 bool
4140 ix86_using_red_zone (void)
4141 {
4142 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
4143 }
4144 \f
4145 /* Return a string that documents the current -m options. The caller is
4146 responsible for freeing the string. */
4147
4148 static char *
4149 ix86_target_string (HOST_WIDE_INT isa, int flags, int ix86_flags,
4150 const char *arch, const char *tune,
4151 enum fpmath_unit fpmath, bool add_nl_p)
4152 {
4153 struct ix86_target_opts
4154 {
4155 const char *option; /* option string */
4156 HOST_WIDE_INT mask; /* isa mask options */
4157 };
4158
4159 /* This table is ordered so that options like -msse4.2 that imply
4160 preceding options while match those first. */
4161 static struct ix86_target_opts isa_opts[] =
4162 {
4163 { "-mfma4", OPTION_MASK_ISA_FMA4 },
4164 { "-mfma", OPTION_MASK_ISA_FMA },
4165 { "-mxop", OPTION_MASK_ISA_XOP },
4166 { "-mlwp", OPTION_MASK_ISA_LWP },
4167 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
4168 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
4169 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
4170 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
4171 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
4172 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
4173 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
4174 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
4175 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
4176 { "-msse4a", OPTION_MASK_ISA_SSE4A },
4177 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
4178 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
4179 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
4180 { "-msse3", OPTION_MASK_ISA_SSE3 },
4181 { "-msse2", OPTION_MASK_ISA_SSE2 },
4182 { "-msse", OPTION_MASK_ISA_SSE },
4183 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
4184 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
4185 { "-mmmx", OPTION_MASK_ISA_MMX },
4186 { "-mabm", OPTION_MASK_ISA_ABM },
4187 { "-mbmi", OPTION_MASK_ISA_BMI },
4188 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
4189 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
4190 { "-mhle", OPTION_MASK_ISA_HLE },
4191 { "-mfxsr", OPTION_MASK_ISA_FXSR },
4192 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
4193 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
4194 { "-madx", OPTION_MASK_ISA_ADX },
4195 { "-mtbm", OPTION_MASK_ISA_TBM },
4196 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
4197 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
4198 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
4199 { "-maes", OPTION_MASK_ISA_AES },
4200 { "-msha", OPTION_MASK_ISA_SHA },
4201 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
4202 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
4203 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
4204 { "-mf16c", OPTION_MASK_ISA_F16C },
4205 { "-mrtm", OPTION_MASK_ISA_RTM },
4206 { "-mxsave", OPTION_MASK_ISA_XSAVE },
4207 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
4208 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
4209 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
4210 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
4211 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
4212 { "-mmpx", OPTION_MASK_ISA_MPX },
4213 { "-mclwb", OPTION_MASK_ISA_CLWB },
4214 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
4215 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
4216 { "-mclzero", OPTION_MASK_ISA_CLZERO },
4217 { "-mpku", OPTION_MASK_ISA_PKU },
4218 };
4219
4220 /* Flag options. */
4221 static struct ix86_target_opts flag_opts[] =
4222 {
4223 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
4224 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
4225 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
4226 { "-m80387", MASK_80387 },
4227 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
4228 { "-malign-double", MASK_ALIGN_DOUBLE },
4229 { "-mcld", MASK_CLD },
4230 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
4231 { "-mieee-fp", MASK_IEEE_FP },
4232 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
4233 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
4234 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
4235 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
4236 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
4237 { "-mno-push-args", MASK_NO_PUSH_ARGS },
4238 { "-mno-red-zone", MASK_NO_RED_ZONE },
4239 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
4240 { "-mrecip", MASK_RECIP },
4241 { "-mrtd", MASK_RTD },
4242 { "-msseregparm", MASK_SSEREGPARM },
4243 { "-mstack-arg-probe", MASK_STACK_PROBE },
4244 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
4245 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
4246 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
4247 { "-mvzeroupper", MASK_VZEROUPPER },
4248 { "-mstv", MASK_STV},
4249 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
4250 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
4251 { "-mprefer-avx128", MASK_PREFER_AVX128},
4252 };
4253
4254 /* Additional flag options. */
4255 static struct ix86_target_opts ix86_flag_opts[] =
4256 {
4257 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY },
4258 };
4259
4260 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts)
4261 + ARRAY_SIZE (ix86_flag_opts) + 6][2];
4262
4263 char isa_other[40];
4264 char target_other[40];
4265 char ix86_target_other[40];
4266 unsigned num = 0;
4267 unsigned i, j;
4268 char *ret;
4269 char *ptr;
4270 size_t len;
4271 size_t line_len;
4272 size_t sep_len;
4273 const char *abi;
4274
4275 memset (opts, '\0', sizeof (opts));
4276
4277 /* Add -march= option. */
4278 if (arch)
4279 {
4280 opts[num][0] = "-march=";
4281 opts[num++][1] = arch;
4282 }
4283
4284 /* Add -mtune= option. */
4285 if (tune)
4286 {
4287 opts[num][0] = "-mtune=";
4288 opts[num++][1] = tune;
4289 }
4290
4291 /* Add -m32/-m64/-mx32. */
4292 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
4293 {
4294 if ((isa & OPTION_MASK_ABI_64) != 0)
4295 abi = "-m64";
4296 else
4297 abi = "-mx32";
4298 isa &= ~ (OPTION_MASK_ISA_64BIT
4299 | OPTION_MASK_ABI_64
4300 | OPTION_MASK_ABI_X32);
4301 }
4302 else
4303 abi = "-m32";
4304 opts[num++][0] = abi;
4305
4306 /* Pick out the options in isa options. */
4307 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
4308 {
4309 if ((isa & isa_opts[i].mask) != 0)
4310 {
4311 opts[num++][0] = isa_opts[i].option;
4312 isa &= ~ isa_opts[i].mask;
4313 }
4314 }
4315
4316 if (isa && add_nl_p)
4317 {
4318 opts[num++][0] = isa_other;
4319 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
4320 isa);
4321 }
4322
4323 /* Add flag options. */
4324 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
4325 {
4326 if ((flags & flag_opts[i].mask) != 0)
4327 {
4328 opts[num++][0] = flag_opts[i].option;
4329 flags &= ~ flag_opts[i].mask;
4330 }
4331 }
4332
4333 if (flags && add_nl_p)
4334 {
4335 opts[num++][0] = target_other;
4336 sprintf (target_other, "(other flags: %#x)", flags);
4337 }
4338
4339 /* Add additional flag options. */
4340 for (i = 0; i < ARRAY_SIZE (ix86_flag_opts); i++)
4341 {
4342 if ((ix86_flags & ix86_flag_opts[i].mask) != 0)
4343 {
4344 opts[num++][0] = ix86_flag_opts[i].option;
4345 ix86_flags &= ~ ix86_flag_opts[i].mask;
4346 }
4347 }
4348
4349 if (ix86_flags && add_nl_p)
4350 {
4351 opts[num++][0] = ix86_target_other;
4352 sprintf (ix86_target_other, "(other flags: %#x)", ix86_flags);
4353 }
4354
4355 /* Add -fpmath= option. */
4356 if (fpmath)
4357 {
4358 opts[num][0] = "-mfpmath=";
4359 switch ((int) fpmath)
4360 {
4361 case FPMATH_387:
4362 opts[num++][1] = "387";
4363 break;
4364
4365 case FPMATH_SSE:
4366 opts[num++][1] = "sse";
4367 break;
4368
4369 case FPMATH_387 | FPMATH_SSE:
4370 opts[num++][1] = "sse+387";
4371 break;
4372
4373 default:
4374 gcc_unreachable ();
4375 }
4376 }
4377
4378 /* Any options? */
4379 if (num == 0)
4380 return NULL;
4381
4382 gcc_assert (num < ARRAY_SIZE (opts));
4383
4384 /* Size the string. */
4385 len = 0;
4386 sep_len = (add_nl_p) ? 3 : 1;
4387 for (i = 0; i < num; i++)
4388 {
4389 len += sep_len;
4390 for (j = 0; j < 2; j++)
4391 if (opts[i][j])
4392 len += strlen (opts[i][j]);
4393 }
4394
4395 /* Build the string. */
4396 ret = ptr = (char *) xmalloc (len);
4397 line_len = 0;
4398
4399 for (i = 0; i < num; i++)
4400 {
4401 size_t len2[2];
4402
4403 for (j = 0; j < 2; j++)
4404 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
4405
4406 if (i != 0)
4407 {
4408 *ptr++ = ' ';
4409 line_len++;
4410
4411 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
4412 {
4413 *ptr++ = '\\';
4414 *ptr++ = '\n';
4415 line_len = 0;
4416 }
4417 }
4418
4419 for (j = 0; j < 2; j++)
4420 if (opts[i][j])
4421 {
4422 memcpy (ptr, opts[i][j], len2[j]);
4423 ptr += len2[j];
4424 line_len += len2[j];
4425 }
4426 }
4427
4428 *ptr = '\0';
4429 gcc_assert (ret + len >= ptr);
4430
4431 return ret;
4432 }
4433
4434 /* Return true, if profiling code should be emitted before
4435 prologue. Otherwise it returns false.
4436 Note: For x86 with "hotfix" it is sorried. */
4437 static bool
4438 ix86_profile_before_prologue (void)
4439 {
4440 return flag_fentry != 0;
4441 }
4442
4443 /* Function that is callable from the debugger to print the current
4444 options. */
4445 void ATTRIBUTE_UNUSED
4446 ix86_debug_options (void)
4447 {
4448 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
4449 ix86_target_flags,
4450 ix86_arch_string, ix86_tune_string,
4451 ix86_fpmath, true);
4452
4453 if (opts)
4454 {
4455 fprintf (stderr, "%s\n\n", opts);
4456 free (opts);
4457 }
4458 else
4459 fputs ("<no options>\n\n", stderr);
4460
4461 return;
4462 }
4463
4464 /* Return true if T is one of the bytes we should avoid with
4465 -fmitigate-rop. */
4466
4467 static bool
4468 ix86_rop_should_change_byte_p (int t)
4469 {
4470 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
4471 }
4472
4473 static const char *stringop_alg_names[] = {
4474 #define DEF_ENUM
4475 #define DEF_ALG(alg, name) #name,
4476 #include "stringop.def"
4477 #undef DEF_ENUM
4478 #undef DEF_ALG
4479 };
4480
4481 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
4482 The string is of the following form (or comma separated list of it):
4483
4484 strategy_alg:max_size:[align|noalign]
4485
4486 where the full size range for the strategy is either [0, max_size] or
4487 [min_size, max_size], in which min_size is the max_size + 1 of the
4488 preceding range. The last size range must have max_size == -1.
4489
4490 Examples:
4491
4492 1.
4493 -mmemcpy-strategy=libcall:-1:noalign
4494
4495 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4496
4497
4498 2.
4499 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4500
4501 This is to tell the compiler to use the following strategy for memset
4502 1) when the expected size is between [1, 16], use rep_8byte strategy;
4503 2) when the size is between [17, 2048], use vector_loop;
4504 3) when the size is > 2048, use libcall. */
4505
4506 struct stringop_size_range
4507 {
4508 int max;
4509 stringop_alg alg;
4510 bool noalign;
4511 };
4512
4513 static void
4514 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4515 {
4516 const struct stringop_algs *default_algs;
4517 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4518 char *curr_range_str, *next_range_str;
4519 int i = 0, n = 0;
4520
4521 if (is_memset)
4522 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4523 else
4524 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4525
4526 curr_range_str = strategy_str;
4527
4528 do
4529 {
4530 int maxs;
4531 char alg_name[128];
4532 char align[16];
4533 next_range_str = strchr (curr_range_str, ',');
4534 if (next_range_str)
4535 *next_range_str++ = '\0';
4536
4537 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4538 alg_name, &maxs, align))
4539 {
4540 error ("wrong arg %s to option %s", curr_range_str,
4541 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4542 return;
4543 }
4544
4545 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4546 {
4547 error ("size ranges of option %s should be increasing",
4548 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4549 return;
4550 }
4551
4552 for (i = 0; i < last_alg; i++)
4553 if (!strcmp (alg_name, stringop_alg_names[i]))
4554 break;
4555
4556 if (i == last_alg)
4557 {
4558 error ("wrong stringop strategy name %s specified for option %s",
4559 alg_name,
4560 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4561 return;
4562 }
4563
4564 if ((stringop_alg) i == rep_prefix_8_byte
4565 && !TARGET_64BIT)
4566 {
4567 /* rep; movq isn't available in 32-bit code. */
4568 error ("stringop strategy name %s specified for option %s "
4569 "not supported for 32-bit code",
4570 alg_name,
4571 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4572 return;
4573 }
4574
4575 input_ranges[n].max = maxs;
4576 input_ranges[n].alg = (stringop_alg) i;
4577 if (!strcmp (align, "align"))
4578 input_ranges[n].noalign = false;
4579 else if (!strcmp (align, "noalign"))
4580 input_ranges[n].noalign = true;
4581 else
4582 {
4583 error ("unknown alignment %s specified for option %s",
4584 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4585 return;
4586 }
4587 n++;
4588 curr_range_str = next_range_str;
4589 }
4590 while (curr_range_str);
4591
4592 if (input_ranges[n - 1].max != -1)
4593 {
4594 error ("the max value for the last size range should be -1"
4595 " for option %s",
4596 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4597 return;
4598 }
4599
4600 if (n > MAX_STRINGOP_ALGS)
4601 {
4602 error ("too many size ranges specified in option %s",
4603 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4604 return;
4605 }
4606
4607 /* Now override the default algs array. */
4608 for (i = 0; i < n; i++)
4609 {
4610 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4611 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4612 = input_ranges[i].alg;
4613 *const_cast<int *>(&default_algs->size[i].noalign)
4614 = input_ranges[i].noalign;
4615 }
4616 }
4617
4618 \f
4619 /* parse -mtune-ctrl= option. When DUMP is true,
4620 print the features that are explicitly set. */
4621
4622 static void
4623 parse_mtune_ctrl_str (bool dump)
4624 {
4625 if (!ix86_tune_ctrl_string)
4626 return;
4627
4628 char *next_feature_string = NULL;
4629 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4630 char *orig = curr_feature_string;
4631 int i;
4632 do
4633 {
4634 bool clear = false;
4635
4636 next_feature_string = strchr (curr_feature_string, ',');
4637 if (next_feature_string)
4638 *next_feature_string++ = '\0';
4639 if (*curr_feature_string == '^')
4640 {
4641 curr_feature_string++;
4642 clear = true;
4643 }
4644 for (i = 0; i < X86_TUNE_LAST; i++)
4645 {
4646 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4647 {
4648 ix86_tune_features[i] = !clear;
4649 if (dump)
4650 fprintf (stderr, "Explicitly %s feature %s\n",
4651 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4652 break;
4653 }
4654 }
4655 if (i == X86_TUNE_LAST)
4656 error ("Unknown parameter to option -mtune-ctrl: %s",
4657 clear ? curr_feature_string - 1 : curr_feature_string);
4658 curr_feature_string = next_feature_string;
4659 }
4660 while (curr_feature_string);
4661 free (orig);
4662 }
4663
4664 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4665 processor type. */
4666
4667 static void
4668 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4669 {
4670 unsigned int ix86_tune_mask = 1u << ix86_tune;
4671 int i;
4672
4673 for (i = 0; i < X86_TUNE_LAST; ++i)
4674 {
4675 if (ix86_tune_no_default)
4676 ix86_tune_features[i] = 0;
4677 else
4678 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4679 }
4680
4681 if (dump)
4682 {
4683 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4684 for (i = 0; i < X86_TUNE_LAST; i++)
4685 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4686 ix86_tune_features[i] ? "on" : "off");
4687 }
4688
4689 parse_mtune_ctrl_str (dump);
4690 }
4691
4692
4693 /* Default align_* from the processor table. */
4694
4695 static void
4696 ix86_default_align (struct gcc_options *opts)
4697 {
4698 if (opts->x_align_loops == 0)
4699 {
4700 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4701 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4702 }
4703 if (opts->x_align_jumps == 0)
4704 {
4705 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4706 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4707 }
4708 if (opts->x_align_functions == 0)
4709 {
4710 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4711 }
4712 }
4713
4714 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4715
4716 static void
4717 ix86_override_options_after_change (void)
4718 {
4719 ix86_default_align (&global_options);
4720 }
4721
4722 /* Override various settings based on options. If MAIN_ARGS_P, the
4723 options are from the command line, otherwise they are from
4724 attributes. */
4725
4726 static void
4727 ix86_option_override_internal (bool main_args_p,
4728 struct gcc_options *opts,
4729 struct gcc_options *opts_set)
4730 {
4731 int i;
4732 unsigned int ix86_arch_mask;
4733 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4734 const char *prefix;
4735 const char *suffix;
4736 const char *sw;
4737
4738 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4739 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4740 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4741 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4742 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4743 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4744 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4745 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4746 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4747 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4748 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4749 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4750 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4751 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4752 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4753 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4754 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4755 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4756 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4757 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4758 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4759 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4760 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4761 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4762 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4763 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4764 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4765 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4766 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4767 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4768 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4769 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4770 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4771 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4772 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4773 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4774 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4775 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4776 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4777 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4778 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4779 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4780 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4781 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4782 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4783 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4784 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4785 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4786 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4787 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4788 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4789 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4790 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4791 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4792 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4793 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4794 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4795 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4796 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4797 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4798 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4799
4800 #define PTA_CORE2 \
4801 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4802 | PTA_CX16 | PTA_FXSR)
4803 #define PTA_NEHALEM \
4804 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4805 #define PTA_WESTMERE \
4806 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4807 #define PTA_SANDYBRIDGE \
4808 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4809 #define PTA_IVYBRIDGE \
4810 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4811 #define PTA_HASWELL \
4812 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4813 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4814 #define PTA_BROADWELL \
4815 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4816 #define PTA_SKYLAKE \
4817 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4818 #define PTA_SKYLAKE_AVX512 \
4819 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4820 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4821 #define PTA_KNL \
4822 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4823 #define PTA_BONNELL \
4824 (PTA_CORE2 | PTA_MOVBE)
4825 #define PTA_SILVERMONT \
4826 (PTA_WESTMERE | PTA_MOVBE)
4827
4828 /* if this reaches 64, need to widen struct pta flags below */
4829
4830 static struct pta
4831 {
4832 const char *const name; /* processor name or nickname. */
4833 const enum processor_type processor;
4834 const enum attr_cpu schedule;
4835 const unsigned HOST_WIDE_INT flags;
4836 }
4837 const processor_alias_table[] =
4838 {
4839 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4840 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4841 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4842 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4843 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4844 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4845 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4846 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4847 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4848 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4849 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4850 PTA_MMX | PTA_SSE | PTA_FXSR},
4851 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4852 PTA_MMX | PTA_SSE | PTA_FXSR},
4853 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4854 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4855 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4856 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4857 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4858 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4859 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4860 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4861 PTA_MMX | PTA_SSE | PTA_FXSR},
4862 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4863 PTA_MMX | PTA_SSE | PTA_FXSR},
4864 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4865 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4866 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4867 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4868 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4869 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4870 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4871 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4872 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4873 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4874 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4875 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4876 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4877 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4878 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4879 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4880 PTA_SANDYBRIDGE},
4881 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4882 PTA_SANDYBRIDGE},
4883 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4884 PTA_IVYBRIDGE},
4885 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4886 PTA_IVYBRIDGE},
4887 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4888 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4889 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4890 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4891 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4892 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4893 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4894 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4895 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4896 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4897 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4898 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4900 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4901 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4902 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4903 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4904 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4905 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4907 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4909 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4911 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4913 {"x86-64", PROCESSOR_K8, CPU_K8,
4914 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4915 {"eden-x2", PROCESSOR_K8, CPU_K8,
4916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4917 {"nano", PROCESSOR_K8, CPU_K8,
4918 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4919 | PTA_SSSE3 | PTA_FXSR},
4920 {"nano-1000", PROCESSOR_K8, CPU_K8,
4921 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4922 | PTA_SSSE3 | PTA_FXSR},
4923 {"nano-2000", PROCESSOR_K8, CPU_K8,
4924 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4925 | PTA_SSSE3 | PTA_FXSR},
4926 {"nano-3000", PROCESSOR_K8, CPU_K8,
4927 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4928 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
4929 {"nano-x2", PROCESSOR_K8, CPU_K8,
4930 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4931 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
4932 {"eden-x4", PROCESSOR_K8, CPU_K8,
4933 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4934 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
4935 {"nano-x4", PROCESSOR_K8, CPU_K8,
4936 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4937 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
4938 {"k8", PROCESSOR_K8, CPU_K8,
4939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4940 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4941 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4943 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4944 {"opteron", PROCESSOR_K8, CPU_K8,
4945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4946 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4947 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4948 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4949 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4950 {"athlon64", PROCESSOR_K8, CPU_K8,
4951 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4952 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4953 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4954 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4955 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4956 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4957 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4958 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4959 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4960 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4961 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4962 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4963 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4964 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4965 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4966 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4967 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4968 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4969 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4970 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4971 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4972 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4973 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4974 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4975 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4976 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4977 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4978 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4979 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4980 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4981 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4982 | PTA_XSAVEOPT | PTA_FSGSBASE},
4983 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4985 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4986 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4987 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4988 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4989 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4990 | PTA_MOVBE | PTA_MWAITX},
4991 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4992 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4993 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4994 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4995 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4996 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4997 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4998 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4999 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
5000 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
5001 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
5002 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
5003 | PTA_FXSR | PTA_XSAVE},
5004 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
5005 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
5006 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
5007 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
5008 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
5009 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
5010
5011 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
5012 PTA_64BIT
5013 | PTA_HLE /* flags are only used for -march switch. */ },
5014 };
5015
5016 /* -mrecip options. */
5017 static struct
5018 {
5019 const char *string; /* option name */
5020 unsigned int mask; /* mask bits to set */
5021 }
5022 const recip_options[] =
5023 {
5024 { "all", RECIP_MASK_ALL },
5025 { "none", RECIP_MASK_NONE },
5026 { "div", RECIP_MASK_DIV },
5027 { "sqrt", RECIP_MASK_SQRT },
5028 { "vec-div", RECIP_MASK_VEC_DIV },
5029 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
5030 };
5031
5032 int const pta_size = ARRAY_SIZE (processor_alias_table);
5033
5034 /* Set up prefix/suffix so the error messages refer to either the command
5035 line argument, or the attribute(target). */
5036 if (main_args_p)
5037 {
5038 prefix = "-m";
5039 suffix = "";
5040 sw = "switch";
5041 }
5042 else
5043 {
5044 prefix = "option(\"";
5045 suffix = "\")";
5046 sw = "attribute";
5047 }
5048
5049 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
5050 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
5051 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
5052 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
5053 #ifdef TARGET_BI_ARCH
5054 else
5055 {
5056 #if TARGET_BI_ARCH == 1
5057 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
5058 is on and OPTION_MASK_ABI_X32 is off. We turn off
5059 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
5060 -mx32. */
5061 if (TARGET_X32_P (opts->x_ix86_isa_flags))
5062 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
5063 #else
5064 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
5065 on and OPTION_MASK_ABI_64 is off. We turn off
5066 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
5067 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
5068 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
5069 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
5070 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
5071 #endif
5072 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5073 && TARGET_IAMCU_P (opts->x_target_flags))
5074 sorry ("Intel MCU psABI isn%'t supported in %s mode",
5075 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
5076 }
5077 #endif
5078
5079 if (TARGET_X32_P (opts->x_ix86_isa_flags))
5080 {
5081 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
5082 OPTION_MASK_ABI_64 for TARGET_X32. */
5083 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
5084 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
5085 }
5086 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
5087 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
5088 | OPTION_MASK_ABI_X32
5089 | OPTION_MASK_ABI_64);
5090 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
5091 {
5092 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
5093 OPTION_MASK_ABI_X32 for TARGET_LP64. */
5094 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
5095 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
5096 }
5097
5098 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5099 SUBTARGET_OVERRIDE_OPTIONS;
5100 #endif
5101
5102 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5103 SUBSUBTARGET_OVERRIDE_OPTIONS;
5104 #endif
5105
5106 /* -fPIC is the default for x86_64. */
5107 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
5108 opts->x_flag_pic = 2;
5109
5110 /* Need to check -mtune=generic first. */
5111 if (opts->x_ix86_tune_string)
5112 {
5113 /* As special support for cross compilers we read -mtune=native
5114 as -mtune=generic. With native compilers we won't see the
5115 -mtune=native, as it was changed by the driver. */
5116 if (!strcmp (opts->x_ix86_tune_string, "native"))
5117 {
5118 opts->x_ix86_tune_string = "generic";
5119 }
5120 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
5121 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
5122 "%stune=k8%s or %stune=generic%s instead as appropriate",
5123 prefix, suffix, prefix, suffix, prefix, suffix);
5124 }
5125 else
5126 {
5127 if (opts->x_ix86_arch_string)
5128 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
5129 if (!opts->x_ix86_tune_string)
5130 {
5131 opts->x_ix86_tune_string
5132 = processor_target_table[TARGET_CPU_DEFAULT].name;
5133 ix86_tune_defaulted = 1;
5134 }
5135
5136 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
5137 or defaulted. We need to use a sensible tune option. */
5138 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
5139 {
5140 opts->x_ix86_tune_string = "generic";
5141 }
5142 }
5143
5144 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
5145 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
5146 {
5147 /* rep; movq isn't available in 32-bit code. */
5148 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
5149 opts->x_ix86_stringop_alg = no_stringop;
5150 }
5151
5152 if (!opts->x_ix86_arch_string)
5153 opts->x_ix86_arch_string
5154 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
5155 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
5156 else
5157 ix86_arch_specified = 1;
5158
5159 if (opts_set->x_ix86_pmode)
5160 {
5161 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
5162 && opts->x_ix86_pmode == PMODE_SI)
5163 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5164 && opts->x_ix86_pmode == PMODE_DI))
5165 error ("address mode %qs not supported in the %s bit mode",
5166 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
5167 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
5168 }
5169 else
5170 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
5171 ? PMODE_DI : PMODE_SI;
5172
5173 if (!opts_set->x_ix86_abi)
5174 opts->x_ix86_abi = DEFAULT_ABI;
5175
5176 /* For targets using ms ABI enable ms-extensions, if not
5177 explicit turned off. For non-ms ABI we turn off this
5178 option. */
5179 if (!opts_set->x_flag_ms_extensions)
5180 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
5181
5182 if (opts_set->x_ix86_cmodel)
5183 {
5184 switch (opts->x_ix86_cmodel)
5185 {
5186 case CM_SMALL:
5187 case CM_SMALL_PIC:
5188 if (opts->x_flag_pic)
5189 opts->x_ix86_cmodel = CM_SMALL_PIC;
5190 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5191 error ("code model %qs not supported in the %s bit mode",
5192 "small", "32");
5193 break;
5194
5195 case CM_MEDIUM:
5196 case CM_MEDIUM_PIC:
5197 if (opts->x_flag_pic)
5198 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
5199 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5200 error ("code model %qs not supported in the %s bit mode",
5201 "medium", "32");
5202 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
5203 error ("code model %qs not supported in x32 mode",
5204 "medium");
5205 break;
5206
5207 case CM_LARGE:
5208 case CM_LARGE_PIC:
5209 if (opts->x_flag_pic)
5210 opts->x_ix86_cmodel = CM_LARGE_PIC;
5211 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5212 error ("code model %qs not supported in the %s bit mode",
5213 "large", "32");
5214 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
5215 error ("code model %qs not supported in x32 mode",
5216 "large");
5217 break;
5218
5219 case CM_32:
5220 if (opts->x_flag_pic)
5221 error ("code model %s does not support PIC mode", "32");
5222 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5223 error ("code model %qs not supported in the %s bit mode",
5224 "32", "64");
5225 break;
5226
5227 case CM_KERNEL:
5228 if (opts->x_flag_pic)
5229 {
5230 error ("code model %s does not support PIC mode", "kernel");
5231 opts->x_ix86_cmodel = CM_32;
5232 }
5233 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5234 error ("code model %qs not supported in the %s bit mode",
5235 "kernel", "32");
5236 break;
5237
5238 default:
5239 gcc_unreachable ();
5240 }
5241 }
5242 else
5243 {
5244 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
5245 use of rip-relative addressing. This eliminates fixups that
5246 would otherwise be needed if this object is to be placed in a
5247 DLL, and is essentially just as efficient as direct addressing. */
5248 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5249 && (TARGET_RDOS || TARGET_PECOFF))
5250 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
5251 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5252 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
5253 else
5254 opts->x_ix86_cmodel = CM_32;
5255 }
5256 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
5257 {
5258 error ("-masm=intel not supported in this configuration");
5259 opts->x_ix86_asm_dialect = ASM_ATT;
5260 }
5261 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
5262 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
5263 sorry ("%i-bit mode not compiled in",
5264 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
5265
5266 for (i = 0; i < pta_size; i++)
5267 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
5268 {
5269 ix86_schedule = processor_alias_table[i].schedule;
5270 ix86_arch = processor_alias_table[i].processor;
5271 /* Default cpu tuning to the architecture. */
5272 ix86_tune = ix86_arch;
5273
5274 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5275 && !(processor_alias_table[i].flags & PTA_64BIT))
5276 error ("CPU you selected does not support x86-64 "
5277 "instruction set");
5278
5279 if (processor_alias_table[i].flags & PTA_MMX
5280 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
5281 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
5282 if (processor_alias_table[i].flags & PTA_3DNOW
5283 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
5284 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
5285 if (processor_alias_table[i].flags & PTA_3DNOW_A
5286 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
5287 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
5288 if (processor_alias_table[i].flags & PTA_SSE
5289 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
5290 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
5291 if (processor_alias_table[i].flags & PTA_SSE2
5292 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
5293 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
5294 if (processor_alias_table[i].flags & PTA_SSE3
5295 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
5296 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
5297 if (processor_alias_table[i].flags & PTA_SSSE3
5298 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
5299 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
5300 if (processor_alias_table[i].flags & PTA_SSE4_1
5301 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
5302 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
5303 if (processor_alias_table[i].flags & PTA_SSE4_2
5304 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
5305 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
5306 if (processor_alias_table[i].flags & PTA_AVX
5307 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
5308 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
5309 if (processor_alias_table[i].flags & PTA_AVX2
5310 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
5311 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
5312 if (processor_alias_table[i].flags & PTA_FMA
5313 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
5314 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
5315 if (processor_alias_table[i].flags & PTA_SSE4A
5316 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
5317 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
5318 if (processor_alias_table[i].flags & PTA_FMA4
5319 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
5320 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
5321 if (processor_alias_table[i].flags & PTA_XOP
5322 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
5323 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
5324 if (processor_alias_table[i].flags & PTA_LWP
5325 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
5326 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
5327 if (processor_alias_table[i].flags & PTA_ABM
5328 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
5329 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
5330 if (processor_alias_table[i].flags & PTA_BMI
5331 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
5332 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
5333 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
5334 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
5335 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
5336 if (processor_alias_table[i].flags & PTA_TBM
5337 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
5338 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
5339 if (processor_alias_table[i].flags & PTA_BMI2
5340 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
5341 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
5342 if (processor_alias_table[i].flags & PTA_CX16
5343 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
5344 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
5345 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
5346 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
5347 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
5348 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
5349 && (processor_alias_table[i].flags & PTA_NO_SAHF))
5350 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
5351 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
5352 if (processor_alias_table[i].flags & PTA_MOVBE
5353 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
5354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
5355 if (processor_alias_table[i].flags & PTA_AES
5356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
5357 ix86_isa_flags |= OPTION_MASK_ISA_AES;
5358 if (processor_alias_table[i].flags & PTA_SHA
5359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
5360 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
5361 if (processor_alias_table[i].flags & PTA_PCLMUL
5362 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
5363 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
5364 if (processor_alias_table[i].flags & PTA_FSGSBASE
5365 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
5366 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
5367 if (processor_alias_table[i].flags & PTA_RDRND
5368 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
5369 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
5370 if (processor_alias_table[i].flags & PTA_F16C
5371 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
5372 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
5373 if (processor_alias_table[i].flags & PTA_RTM
5374 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
5375 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
5376 if (processor_alias_table[i].flags & PTA_HLE
5377 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
5378 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
5379 if (processor_alias_table[i].flags & PTA_PRFCHW
5380 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
5381 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
5382 if (processor_alias_table[i].flags & PTA_RDSEED
5383 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
5384 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
5385 if (processor_alias_table[i].flags & PTA_ADX
5386 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
5387 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
5388 if (processor_alias_table[i].flags & PTA_FXSR
5389 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
5390 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
5391 if (processor_alias_table[i].flags & PTA_XSAVE
5392 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
5393 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
5394 if (processor_alias_table[i].flags & PTA_XSAVEOPT
5395 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
5396 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
5397 if (processor_alias_table[i].flags & PTA_AVX512F
5398 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
5399 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
5400 if (processor_alias_table[i].flags & PTA_AVX512ER
5401 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
5402 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
5403 if (processor_alias_table[i].flags & PTA_AVX512PF
5404 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
5405 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
5406 if (processor_alias_table[i].flags & PTA_AVX512CD
5407 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
5408 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
5409 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
5410 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
5411 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
5412 if (processor_alias_table[i].flags & PTA_PCOMMIT
5413 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
5414 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
5415 if (processor_alias_table[i].flags & PTA_CLWB
5416 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
5417 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
5418 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
5419 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
5420 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
5421 if (processor_alias_table[i].flags & PTA_CLZERO
5422 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
5423 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
5424 if (processor_alias_table[i].flags & PTA_XSAVEC
5425 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
5426 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
5427 if (processor_alias_table[i].flags & PTA_XSAVES
5428 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
5429 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
5430 if (processor_alias_table[i].flags & PTA_AVX512DQ
5431 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
5432 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
5433 if (processor_alias_table[i].flags & PTA_AVX512BW
5434 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
5435 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
5436 if (processor_alias_table[i].flags & PTA_AVX512VL
5437 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
5438 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
5439 if (processor_alias_table[i].flags & PTA_MPX
5440 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
5441 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
5442 if (processor_alias_table[i].flags & PTA_AVX512VBMI
5443 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
5444 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
5445 if (processor_alias_table[i].flags & PTA_AVX512IFMA
5446 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
5447 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
5448 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
5449 x86_prefetch_sse = true;
5450 if (processor_alias_table[i].flags & PTA_MWAITX
5451 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
5452 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
5453 if (processor_alias_table[i].flags & PTA_PKU
5454 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
5455 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
5456
5457 /* Don't enable x87 instructions if only
5458 general registers are allowed. */
5459 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
5460 && !(opts_set->x_target_flags & MASK_80387))
5461 {
5462 if (processor_alias_table[i].flags & PTA_NO_80387)
5463 opts->x_target_flags &= ~MASK_80387;
5464 else
5465 opts->x_target_flags |= MASK_80387;
5466 }
5467 break;
5468 }
5469
5470 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
5471 error ("Intel MPX does not support x32");
5472
5473 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
5474 error ("Intel MPX does not support x32");
5475
5476 if (!strcmp (opts->x_ix86_arch_string, "generic"))
5477 error ("generic CPU can be used only for %stune=%s %s",
5478 prefix, suffix, sw);
5479 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
5480 error ("intel CPU can be used only for %stune=%s %s",
5481 prefix, suffix, sw);
5482 else if (i == pta_size)
5483 error ("bad value (%s) for %sarch=%s %s",
5484 opts->x_ix86_arch_string, prefix, suffix, sw);
5485
5486 ix86_arch_mask = 1u << ix86_arch;
5487 for (i = 0; i < X86_ARCH_LAST; ++i)
5488 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5489
5490 for (i = 0; i < pta_size; i++)
5491 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
5492 {
5493 ix86_schedule = processor_alias_table[i].schedule;
5494 ix86_tune = processor_alias_table[i].processor;
5495 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5496 {
5497 if (!(processor_alias_table[i].flags & PTA_64BIT))
5498 {
5499 if (ix86_tune_defaulted)
5500 {
5501 opts->x_ix86_tune_string = "x86-64";
5502 for (i = 0; i < pta_size; i++)
5503 if (! strcmp (opts->x_ix86_tune_string,
5504 processor_alias_table[i].name))
5505 break;
5506 ix86_schedule = processor_alias_table[i].schedule;
5507 ix86_tune = processor_alias_table[i].processor;
5508 }
5509 else
5510 error ("CPU you selected does not support x86-64 "
5511 "instruction set");
5512 }
5513 }
5514 /* Intel CPUs have always interpreted SSE prefetch instructions as
5515 NOPs; so, we can enable SSE prefetch instructions even when
5516 -mtune (rather than -march) points us to a processor that has them.
5517 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5518 higher processors. */
5519 if (TARGET_CMOV
5520 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5521 x86_prefetch_sse = true;
5522 break;
5523 }
5524
5525 if (ix86_tune_specified && i == pta_size)
5526 error ("bad value (%s) for %stune=%s %s",
5527 opts->x_ix86_tune_string, prefix, suffix, sw);
5528
5529 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5530
5531 #ifndef USE_IX86_FRAME_POINTER
5532 #define USE_IX86_FRAME_POINTER 0
5533 #endif
5534
5535 #ifndef USE_X86_64_FRAME_POINTER
5536 #define USE_X86_64_FRAME_POINTER 0
5537 #endif
5538
5539 /* Set the default values for switches whose default depends on TARGET_64BIT
5540 in case they weren't overwritten by command line options. */
5541 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5542 {
5543 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5544 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5545 if (opts->x_flag_asynchronous_unwind_tables
5546 && !opts_set->x_flag_unwind_tables
5547 && TARGET_64BIT_MS_ABI)
5548 opts->x_flag_unwind_tables = 1;
5549 if (opts->x_flag_asynchronous_unwind_tables == 2)
5550 opts->x_flag_unwind_tables
5551 = opts->x_flag_asynchronous_unwind_tables = 1;
5552 if (opts->x_flag_pcc_struct_return == 2)
5553 opts->x_flag_pcc_struct_return = 0;
5554 }
5555 else
5556 {
5557 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5558 opts->x_flag_omit_frame_pointer
5559 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5560 if (opts->x_flag_asynchronous_unwind_tables == 2)
5561 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5562 if (opts->x_flag_pcc_struct_return == 2)
5563 {
5564 /* Intel MCU psABI specifies that -freg-struct-return should
5565 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5566 we check -miamcu so that -freg-struct-return is always
5567 turned on if -miamcu is used. */
5568 if (TARGET_IAMCU_P (opts->x_target_flags))
5569 opts->x_flag_pcc_struct_return = 0;
5570 else
5571 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5572 }
5573 }
5574
5575 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5576 /* TODO: ix86_cost should be chosen at instruction or function granuality
5577 so for cold code we use size_cost even in !optimize_size compilation. */
5578 if (opts->x_optimize_size)
5579 ix86_cost = &ix86_size_cost;
5580 else
5581 ix86_cost = ix86_tune_cost;
5582
5583 /* Arrange to set up i386_stack_locals for all functions. */
5584 init_machine_status = ix86_init_machine_status;
5585
5586 /* Validate -mregparm= value. */
5587 if (opts_set->x_ix86_regparm)
5588 {
5589 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5590 warning (0, "-mregparm is ignored in 64-bit mode");
5591 else if (TARGET_IAMCU_P (opts->x_target_flags))
5592 warning (0, "-mregparm is ignored for Intel MCU psABI");
5593 if (opts->x_ix86_regparm > REGPARM_MAX)
5594 {
5595 error ("-mregparm=%d is not between 0 and %d",
5596 opts->x_ix86_regparm, REGPARM_MAX);
5597 opts->x_ix86_regparm = 0;
5598 }
5599 }
5600 if (TARGET_IAMCU_P (opts->x_target_flags)
5601 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5602 opts->x_ix86_regparm = REGPARM_MAX;
5603
5604 /* Default align_* from the processor table. */
5605 ix86_default_align (opts);
5606
5607 /* Provide default for -mbranch-cost= value. */
5608 if (!opts_set->x_ix86_branch_cost)
5609 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5610
5611 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5612 {
5613 opts->x_target_flags
5614 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5615
5616 /* Enable by default the SSE and MMX builtins. Do allow the user to
5617 explicitly disable any of these. In particular, disabling SSE and
5618 MMX for kernel code is extremely useful. */
5619 if (!ix86_arch_specified)
5620 opts->x_ix86_isa_flags
5621 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5622 | TARGET_SUBTARGET64_ISA_DEFAULT)
5623 & ~opts->x_ix86_isa_flags_explicit);
5624
5625 if (TARGET_RTD_P (opts->x_target_flags))
5626 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5627 }
5628 else
5629 {
5630 opts->x_target_flags
5631 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5632
5633 if (!ix86_arch_specified)
5634 opts->x_ix86_isa_flags
5635 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5636
5637 /* i386 ABI does not specify red zone. It still makes sense to use it
5638 when programmer takes care to stack from being destroyed. */
5639 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5640 opts->x_target_flags |= MASK_NO_RED_ZONE;
5641 }
5642
5643 /* Keep nonleaf frame pointers. */
5644 if (opts->x_flag_omit_frame_pointer)
5645 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5646 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5647 opts->x_flag_omit_frame_pointer = 1;
5648
5649 /* If we're doing fast math, we don't care about comparison order
5650 wrt NaNs. This lets us use a shorter comparison sequence. */
5651 if (opts->x_flag_finite_math_only)
5652 opts->x_target_flags &= ~MASK_IEEE_FP;
5653
5654 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5655 since the insns won't need emulation. */
5656 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5657 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5658
5659 /* Likewise, if the target doesn't have a 387, or we've specified
5660 software floating point, don't use 387 inline intrinsics. */
5661 if (!TARGET_80387_P (opts->x_target_flags))
5662 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5663
5664 /* Turn on MMX builtins for -msse. */
5665 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5666 opts->x_ix86_isa_flags
5667 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5668
5669 /* Enable SSE prefetch. */
5670 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5671 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5672 x86_prefetch_sse = true;
5673
5674 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5675 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5676 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5677 opts->x_ix86_isa_flags
5678 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5679
5680 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5681 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5682 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5683 opts->x_ix86_isa_flags
5684 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5685
5686 /* Enable lzcnt instruction for -mabm. */
5687 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5688 opts->x_ix86_isa_flags
5689 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5690
5691 /* Validate -mpreferred-stack-boundary= value or default it to
5692 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5693 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5694 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5695 {
5696 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5697 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5698 int max = (TARGET_SEH ? 4 : 12);
5699
5700 if (opts->x_ix86_preferred_stack_boundary_arg < min
5701 || opts->x_ix86_preferred_stack_boundary_arg > max)
5702 {
5703 if (min == max)
5704 error ("-mpreferred-stack-boundary is not supported "
5705 "for this target");
5706 else
5707 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5708 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5709 }
5710 else
5711 ix86_preferred_stack_boundary
5712 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5713 }
5714
5715 /* Set the default value for -mstackrealign. */
5716 if (opts->x_ix86_force_align_arg_pointer == -1)
5717 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5718
5719 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5720
5721 /* Validate -mincoming-stack-boundary= value or default it to
5722 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5723 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5724 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5725 {
5726 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5727
5728 if (opts->x_ix86_incoming_stack_boundary_arg < min
5729 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5730 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5731 opts->x_ix86_incoming_stack_boundary_arg, min);
5732 else
5733 {
5734 ix86_user_incoming_stack_boundary
5735 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5736 ix86_incoming_stack_boundary
5737 = ix86_user_incoming_stack_boundary;
5738 }
5739 }
5740
5741 #ifndef NO_PROFILE_COUNTERS
5742 if (flag_nop_mcount)
5743 error ("-mnop-mcount is not compatible with this target");
5744 #endif
5745 if (flag_nop_mcount && flag_pic)
5746 error ("-mnop-mcount is not implemented for -fPIC");
5747
5748 /* Accept -msseregparm only if at least SSE support is enabled. */
5749 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5750 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5751 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5752
5753 if (opts_set->x_ix86_fpmath)
5754 {
5755 if (opts->x_ix86_fpmath & FPMATH_SSE)
5756 {
5757 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5758 {
5759 if (TARGET_80387_P (opts->x_target_flags))
5760 {
5761 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5762 opts->x_ix86_fpmath = FPMATH_387;
5763 }
5764 }
5765 else if ((opts->x_ix86_fpmath & FPMATH_387)
5766 && !TARGET_80387_P (opts->x_target_flags))
5767 {
5768 warning (0, "387 instruction set disabled, using SSE arithmetics");
5769 opts->x_ix86_fpmath = FPMATH_SSE;
5770 }
5771 }
5772 }
5773 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5774 fpmath=387. The second is however default at many targets since the
5775 extra 80bit precision of temporaries is considered to be part of ABI.
5776 Overwrite the default at least for -ffast-math.
5777 TODO: -mfpmath=both seems to produce same performing code with bit
5778 smaller binaries. It is however not clear if register allocation is
5779 ready for this setting.
5780 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5781 codegen. We may switch to 387 with -ffast-math for size optimized
5782 functions. */
5783 else if (fast_math_flags_set_p (&global_options)
5784 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5785 opts->x_ix86_fpmath = FPMATH_SSE;
5786 else
5787 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5788
5789 /* Use external vectorized library in vectorizing intrinsics. */
5790 if (opts_set->x_ix86_veclibabi_type)
5791 switch (opts->x_ix86_veclibabi_type)
5792 {
5793 case ix86_veclibabi_type_svml:
5794 ix86_veclib_handler = ix86_veclibabi_svml;
5795 break;
5796
5797 case ix86_veclibabi_type_acml:
5798 ix86_veclib_handler = ix86_veclibabi_acml;
5799 break;
5800
5801 default:
5802 gcc_unreachable ();
5803 }
5804
5805 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5806 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5807 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5808
5809 /* If stack probes are required, the space used for large function
5810 arguments on the stack must also be probed, so enable
5811 -maccumulate-outgoing-args so this happens in the prologue. */
5812 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5813 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5814 {
5815 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5816 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5817 "for correctness", prefix, suffix);
5818 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5819 }
5820
5821 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5822 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5823 if (fixed_regs[BP_REG]
5824 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5825 {
5826 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5827 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5828 prefix, suffix);
5829 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5830 }
5831
5832 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5833 {
5834 char *p;
5835 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5836 p = strchr (internal_label_prefix, 'X');
5837 internal_label_prefix_len = p - internal_label_prefix;
5838 *p = '\0';
5839 }
5840
5841 /* When scheduling description is not available, disable scheduler pass
5842 so it won't slow down the compilation and make x87 code slower. */
5843 if (!TARGET_SCHEDULE)
5844 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5845
5846 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5847 ix86_tune_cost->simultaneous_prefetches,
5848 opts->x_param_values,
5849 opts_set->x_param_values);
5850 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5851 ix86_tune_cost->prefetch_block,
5852 opts->x_param_values,
5853 opts_set->x_param_values);
5854 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5855 ix86_tune_cost->l1_cache_size,
5856 opts->x_param_values,
5857 opts_set->x_param_values);
5858 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5859 ix86_tune_cost->l2_cache_size,
5860 opts->x_param_values,
5861 opts_set->x_param_values);
5862
5863 /* Restrict number of if-converted SET insns to 1. */
5864 if (TARGET_ONE_IF_CONV_INSN)
5865 maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
5866 1,
5867 opts->x_param_values,
5868 opts_set->x_param_values);
5869
5870 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5871 if (opts->x_flag_prefetch_loop_arrays < 0
5872 && HAVE_prefetch
5873 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5874 && !opts->x_optimize_size
5875 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5876 opts->x_flag_prefetch_loop_arrays = 1;
5877
5878 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5879 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5880 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5881 targetm.expand_builtin_va_start = NULL;
5882
5883 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5884 {
5885 ix86_gen_leave = gen_leave_rex64;
5886 if (Pmode == DImode)
5887 {
5888 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5889 ix86_gen_tls_local_dynamic_base_64
5890 = gen_tls_local_dynamic_base_64_di;
5891 }
5892 else
5893 {
5894 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5895 ix86_gen_tls_local_dynamic_base_64
5896 = gen_tls_local_dynamic_base_64_si;
5897 }
5898 }
5899 else
5900 ix86_gen_leave = gen_leave;
5901
5902 if (Pmode == DImode)
5903 {
5904 ix86_gen_add3 = gen_adddi3;
5905 ix86_gen_sub3 = gen_subdi3;
5906 ix86_gen_sub3_carry = gen_subdi3_carry;
5907 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5908 ix86_gen_andsp = gen_anddi3;
5909 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5910 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5911 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5912 ix86_gen_monitor = gen_sse3_monitor_di;
5913 ix86_gen_monitorx = gen_monitorx_di;
5914 ix86_gen_clzero = gen_clzero_di;
5915 }
5916 else
5917 {
5918 ix86_gen_add3 = gen_addsi3;
5919 ix86_gen_sub3 = gen_subsi3;
5920 ix86_gen_sub3_carry = gen_subsi3_carry;
5921 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5922 ix86_gen_andsp = gen_andsi3;
5923 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5924 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5925 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5926 ix86_gen_monitor = gen_sse3_monitor_si;
5927 ix86_gen_monitorx = gen_monitorx_si;
5928 ix86_gen_clzero = gen_clzero_si;
5929 }
5930
5931 #ifdef USE_IX86_CLD
5932 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5933 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5934 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5935 #endif
5936
5937 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5938 {
5939 if (opts->x_flag_fentry > 0)
5940 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5941 "with -fpic");
5942 opts->x_flag_fentry = 0;
5943 }
5944 else if (TARGET_SEH)
5945 {
5946 if (opts->x_flag_fentry == 0)
5947 sorry ("-mno-fentry isn%'t compatible with SEH");
5948 opts->x_flag_fentry = 1;
5949 }
5950 else if (opts->x_flag_fentry < 0)
5951 {
5952 #if defined(PROFILE_BEFORE_PROLOGUE)
5953 opts->x_flag_fentry = 1;
5954 #else
5955 opts->x_flag_fentry = 0;
5956 #endif
5957 }
5958
5959 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5960 opts->x_target_flags |= MASK_VZEROUPPER;
5961 if (!(opts_set->x_target_flags & MASK_STV))
5962 opts->x_target_flags |= MASK_STV;
5963 /* Disable STV if -mpreferred-stack-boundary={2,3} or
5964 -mincoming-stack-boundary={2,3} - the needed
5965 stack realignment will be extra cost the pass doesn't take into
5966 account and the pass can't realign the stack. */
5967 if (ix86_preferred_stack_boundary < 128
5968 || ix86_incoming_stack_boundary < 128)
5969 opts->x_target_flags &= ~MASK_STV;
5970 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5971 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5972 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5973 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5974 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5975 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5976 /* Enable 128-bit AVX instruction generation
5977 for the auto-vectorizer. */
5978 if (TARGET_AVX128_OPTIMAL
5979 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5980 opts->x_target_flags |= MASK_PREFER_AVX128;
5981
5982 if (opts->x_ix86_recip_name)
5983 {
5984 char *p = ASTRDUP (opts->x_ix86_recip_name);
5985 char *q;
5986 unsigned int mask, i;
5987 bool invert;
5988
5989 while ((q = strtok (p, ",")) != NULL)
5990 {
5991 p = NULL;
5992 if (*q == '!')
5993 {
5994 invert = true;
5995 q++;
5996 }
5997 else
5998 invert = false;
5999
6000 if (!strcmp (q, "default"))
6001 mask = RECIP_MASK_ALL;
6002 else
6003 {
6004 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
6005 if (!strcmp (q, recip_options[i].string))
6006 {
6007 mask = recip_options[i].mask;
6008 break;
6009 }
6010
6011 if (i == ARRAY_SIZE (recip_options))
6012 {
6013 error ("unknown option for -mrecip=%s", q);
6014 invert = false;
6015 mask = RECIP_MASK_NONE;
6016 }
6017 }
6018
6019 opts->x_recip_mask_explicit |= mask;
6020 if (invert)
6021 opts->x_recip_mask &= ~mask;
6022 else
6023 opts->x_recip_mask |= mask;
6024 }
6025 }
6026
6027 if (TARGET_RECIP_P (opts->x_target_flags))
6028 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
6029 else if (opts_set->x_target_flags & MASK_RECIP)
6030 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
6031
6032 /* Default long double to 64-bit for 32-bit Bionic and to __float128
6033 for 64-bit Bionic. Also default long double to 64-bit for Intel
6034 MCU psABI. */
6035 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
6036 && !(opts_set->x_target_flags
6037 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
6038 opts->x_target_flags |= (TARGET_64BIT
6039 ? MASK_LONG_DOUBLE_128
6040 : MASK_LONG_DOUBLE_64);
6041
6042 /* Only one of them can be active. */
6043 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
6044 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
6045
6046 /* Save the initial options in case the user does function specific
6047 options. */
6048 if (main_args_p)
6049 target_option_default_node = target_option_current_node
6050 = build_target_option_node (opts);
6051
6052 /* Handle stack protector */
6053 if (!opts_set->x_ix86_stack_protector_guard)
6054 opts->x_ix86_stack_protector_guard
6055 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
6056
6057 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
6058 if (opts->x_ix86_tune_memcpy_strategy)
6059 {
6060 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
6061 ix86_parse_stringop_strategy_string (str, false);
6062 free (str);
6063 }
6064
6065 if (opts->x_ix86_tune_memset_strategy)
6066 {
6067 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
6068 ix86_parse_stringop_strategy_string (str, true);
6069 free (str);
6070 }
6071 }
6072
6073 /* Implement the TARGET_OPTION_OVERRIDE hook. */
6074
6075 static void
6076 ix86_option_override (void)
6077 {
6078 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
6079 struct register_pass_info insert_vzeroupper_info
6080 = { pass_insert_vzeroupper, "reload",
6081 1, PASS_POS_INSERT_AFTER
6082 };
6083 opt_pass *pass_stv = make_pass_stv (g);
6084 struct register_pass_info stv_info_dimode
6085 = { pass_stv, "combine",
6086 1, PASS_POS_INSERT_AFTER
6087 };
6088 /* Run the 64-bit STV pass before the CSE pass so that CONST0_RTX and
6089 CONSTM1_RTX generated by the STV pass can be CSEed. */
6090 struct register_pass_info stv_info_timode
6091 = { pass_stv, "cse2",
6092 1, PASS_POS_INSERT_BEFORE
6093 };
6094
6095 ix86_option_override_internal (true, &global_options, &global_options_set);
6096
6097
6098 /* This needs to be done at start up. It's convenient to do it here. */
6099 register_pass (&insert_vzeroupper_info);
6100 register_pass (TARGET_64BIT ? &stv_info_timode : &stv_info_dimode);
6101 }
6102
6103 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
6104 static char *
6105 ix86_offload_options (void)
6106 {
6107 if (TARGET_LP64)
6108 return xstrdup ("-foffload-abi=lp64");
6109 return xstrdup ("-foffload-abi=ilp32");
6110 }
6111
6112 /* Update register usage after having seen the compiler flags. */
6113
6114 static void
6115 ix86_conditional_register_usage (void)
6116 {
6117 int i, c_mask;
6118
6119 /* If there are no caller-saved registers, preserve all registers.
6120 except fixed_regs and registers used for function return value
6121 since aggregate_value_p checks call_used_regs[regno] on return
6122 value. */
6123 if (cfun && cfun->machine->no_caller_saved_registers)
6124 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6125 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
6126 call_used_regs[i] = 0;
6127
6128 /* For 32-bit targets, squash the REX registers. */
6129 if (! TARGET_64BIT)
6130 {
6131 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
6132 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6133 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
6134 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6135 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
6136 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6137 }
6138
6139 /* See the definition of CALL_USED_REGISTERS in i386.h. */
6140 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
6141
6142 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
6143
6144 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6145 {
6146 /* Set/reset conditionally defined registers from
6147 CALL_USED_REGISTERS initializer. */
6148 if (call_used_regs[i] > 1)
6149 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
6150
6151 /* Calculate registers of CLOBBERED_REGS register set
6152 as call used registers from GENERAL_REGS register set. */
6153 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
6154 && call_used_regs[i])
6155 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
6156 }
6157
6158 /* If MMX is disabled, squash the registers. */
6159 if (! TARGET_MMX)
6160 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6161 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
6162 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6163
6164 /* If SSE is disabled, squash the registers. */
6165 if (! TARGET_SSE)
6166 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6167 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
6168 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6169
6170 /* If the FPU is disabled, squash the registers. */
6171 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
6172 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6173 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
6174 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6175
6176 /* If AVX512F is disabled, squash the registers. */
6177 if (! TARGET_AVX512F)
6178 {
6179 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
6180 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6181
6182 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
6183 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6184 }
6185
6186 /* If MPX is disabled, squash the registers. */
6187 if (! TARGET_MPX)
6188 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
6189 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
6190 }
6191
6192 \f
6193 /* Save the current options */
6194
6195 static void
6196 ix86_function_specific_save (struct cl_target_option *ptr,
6197 struct gcc_options *opts)
6198 {
6199 ptr->arch = ix86_arch;
6200 ptr->schedule = ix86_schedule;
6201 ptr->prefetch_sse = x86_prefetch_sse;
6202 ptr->tune = ix86_tune;
6203 ptr->branch_cost = ix86_branch_cost;
6204 ptr->tune_defaulted = ix86_tune_defaulted;
6205 ptr->arch_specified = ix86_arch_specified;
6206 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
6207 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
6208 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
6209 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
6210 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
6211 ptr->x_ix86_abi = opts->x_ix86_abi;
6212 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
6213 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
6214 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
6215 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
6216 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
6217 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
6218 ptr->x_ix86_pmode = opts->x_ix86_pmode;
6219 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
6220 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
6221 ptr->x_ix86_regparm = opts->x_ix86_regparm;
6222 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
6223 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
6224 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
6225 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
6226 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
6227 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
6228 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
6229 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
6230 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
6231 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
6232
6233 /* The fields are char but the variables are not; make sure the
6234 values fit in the fields. */
6235 gcc_assert (ptr->arch == ix86_arch);
6236 gcc_assert (ptr->schedule == ix86_schedule);
6237 gcc_assert (ptr->tune == ix86_tune);
6238 gcc_assert (ptr->branch_cost == ix86_branch_cost);
6239 }
6240
6241 /* Restore the current options */
6242
6243 static void
6244 ix86_function_specific_restore (struct gcc_options *opts,
6245 struct cl_target_option *ptr)
6246 {
6247 enum processor_type old_tune = ix86_tune;
6248 enum processor_type old_arch = ix86_arch;
6249 unsigned int ix86_arch_mask;
6250 int i;
6251
6252 /* We don't change -fPIC. */
6253 opts->x_flag_pic = flag_pic;
6254
6255 ix86_arch = (enum processor_type) ptr->arch;
6256 ix86_schedule = (enum attr_cpu) ptr->schedule;
6257 ix86_tune = (enum processor_type) ptr->tune;
6258 x86_prefetch_sse = ptr->prefetch_sse;
6259 opts->x_ix86_branch_cost = ptr->branch_cost;
6260 ix86_tune_defaulted = ptr->tune_defaulted;
6261 ix86_arch_specified = ptr->arch_specified;
6262 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
6263 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
6264 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
6265 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
6266 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
6267 opts->x_ix86_abi = ptr->x_ix86_abi;
6268 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
6269 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
6270 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
6271 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
6272 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
6273 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
6274 opts->x_ix86_pmode = ptr->x_ix86_pmode;
6275 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
6276 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
6277 opts->x_ix86_regparm = ptr->x_ix86_regparm;
6278 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
6279 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
6280 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
6281 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
6282 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
6283 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
6284 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
6285 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
6286 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
6287 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
6288 ix86_tune_cost = processor_target_table[ix86_tune].cost;
6289 /* TODO: ix86_cost should be chosen at instruction or function granuality
6290 so for cold code we use size_cost even in !optimize_size compilation. */
6291 if (opts->x_optimize_size)
6292 ix86_cost = &ix86_size_cost;
6293 else
6294 ix86_cost = ix86_tune_cost;
6295
6296 /* Recreate the arch feature tests if the arch changed */
6297 if (old_arch != ix86_arch)
6298 {
6299 ix86_arch_mask = 1u << ix86_arch;
6300 for (i = 0; i < X86_ARCH_LAST; ++i)
6301 ix86_arch_features[i]
6302 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
6303 }
6304
6305 /* Recreate the tune optimization tests */
6306 if (old_tune != ix86_tune)
6307 set_ix86_tune_features (ix86_tune, false);
6308 }
6309
6310 /* Adjust target options after streaming them in. This is mainly about
6311 reconciling them with global options. */
6312
6313 static void
6314 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
6315 {
6316 /* flag_pic is a global option, but ix86_cmodel is target saved option
6317 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
6318 for PIC, or error out. */
6319 if (flag_pic)
6320 switch (ptr->x_ix86_cmodel)
6321 {
6322 case CM_SMALL:
6323 ptr->x_ix86_cmodel = CM_SMALL_PIC;
6324 break;
6325
6326 case CM_MEDIUM:
6327 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
6328 break;
6329
6330 case CM_LARGE:
6331 ptr->x_ix86_cmodel = CM_LARGE_PIC;
6332 break;
6333
6334 case CM_KERNEL:
6335 error ("code model %s does not support PIC mode", "kernel");
6336 break;
6337
6338 default:
6339 break;
6340 }
6341 else
6342 switch (ptr->x_ix86_cmodel)
6343 {
6344 case CM_SMALL_PIC:
6345 ptr->x_ix86_cmodel = CM_SMALL;
6346 break;
6347
6348 case CM_MEDIUM_PIC:
6349 ptr->x_ix86_cmodel = CM_MEDIUM;
6350 break;
6351
6352 case CM_LARGE_PIC:
6353 ptr->x_ix86_cmodel = CM_LARGE;
6354 break;
6355
6356 default:
6357 break;
6358 }
6359 }
6360
6361 /* Print the current options */
6362
6363 static void
6364 ix86_function_specific_print (FILE *file, int indent,
6365 struct cl_target_option *ptr)
6366 {
6367 char *target_string
6368 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
6369 ptr->x_ix86_target_flags, NULL, NULL,
6370 ptr->x_ix86_fpmath, false);
6371
6372 gcc_assert (ptr->arch < PROCESSOR_max);
6373 fprintf (file, "%*sarch = %d (%s)\n",
6374 indent, "",
6375 ptr->arch, processor_target_table[ptr->arch].name);
6376
6377 gcc_assert (ptr->tune < PROCESSOR_max);
6378 fprintf (file, "%*stune = %d (%s)\n",
6379 indent, "",
6380 ptr->tune, processor_target_table[ptr->tune].name);
6381
6382 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
6383
6384 if (target_string)
6385 {
6386 fprintf (file, "%*s%s\n", indent, "", target_string);
6387 free (target_string);
6388 }
6389 }
6390
6391 \f
6392 /* Inner function to process the attribute((target(...))), take an argument and
6393 set the current options from the argument. If we have a list, recursively go
6394 over the list. */
6395
6396 static bool
6397 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
6398 struct gcc_options *opts,
6399 struct gcc_options *opts_set,
6400 struct gcc_options *enum_opts_set)
6401 {
6402 char *next_optstr;
6403 bool ret = true;
6404
6405 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
6406 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
6407 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
6408 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
6409 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
6410
6411 enum ix86_opt_type
6412 {
6413 ix86_opt_unknown,
6414 ix86_opt_yes,
6415 ix86_opt_no,
6416 ix86_opt_str,
6417 ix86_opt_enum,
6418 ix86_opt_isa
6419 };
6420
6421 static const struct
6422 {
6423 const char *string;
6424 size_t len;
6425 enum ix86_opt_type type;
6426 int opt;
6427 int mask;
6428 } attrs[] = {
6429 /* isa options */
6430 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
6431 IX86_ATTR_ISA ("abm", OPT_mabm),
6432 IX86_ATTR_ISA ("bmi", OPT_mbmi),
6433 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
6434 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
6435 IX86_ATTR_ISA ("tbm", OPT_mtbm),
6436 IX86_ATTR_ISA ("aes", OPT_maes),
6437 IX86_ATTR_ISA ("sha", OPT_msha),
6438 IX86_ATTR_ISA ("avx", OPT_mavx),
6439 IX86_ATTR_ISA ("avx2", OPT_mavx2),
6440 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
6441 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
6442 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
6443 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
6444 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
6445 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
6446 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
6447 IX86_ATTR_ISA ("mmx", OPT_mmmx),
6448 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
6449 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
6450 IX86_ATTR_ISA ("sse", OPT_msse),
6451 IX86_ATTR_ISA ("sse2", OPT_msse2),
6452 IX86_ATTR_ISA ("sse3", OPT_msse3),
6453 IX86_ATTR_ISA ("sse4", OPT_msse4),
6454 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
6455 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
6456 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
6457 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
6458 IX86_ATTR_ISA ("fma4", OPT_mfma4),
6459 IX86_ATTR_ISA ("fma", OPT_mfma),
6460 IX86_ATTR_ISA ("xop", OPT_mxop),
6461 IX86_ATTR_ISA ("lwp", OPT_mlwp),
6462 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
6463 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
6464 IX86_ATTR_ISA ("f16c", OPT_mf16c),
6465 IX86_ATTR_ISA ("rtm", OPT_mrtm),
6466 IX86_ATTR_ISA ("hle", OPT_mhle),
6467 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
6468 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
6469 IX86_ATTR_ISA ("adx", OPT_madx),
6470 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
6471 IX86_ATTR_ISA ("xsave", OPT_mxsave),
6472 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
6473 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
6474 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
6475 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
6476 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
6477 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
6478 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
6479 IX86_ATTR_ISA ("clwb", OPT_mclwb),
6480 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
6481 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
6482 IX86_ATTR_ISA ("clzero", OPT_mclzero),
6483 IX86_ATTR_ISA ("pku", OPT_mpku),
6484
6485 /* enum options */
6486 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
6487
6488 /* string options */
6489 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
6490 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
6491
6492 /* flag options */
6493 IX86_ATTR_YES ("cld",
6494 OPT_mcld,
6495 MASK_CLD),
6496
6497 IX86_ATTR_NO ("fancy-math-387",
6498 OPT_mfancy_math_387,
6499 MASK_NO_FANCY_MATH_387),
6500
6501 IX86_ATTR_YES ("ieee-fp",
6502 OPT_mieee_fp,
6503 MASK_IEEE_FP),
6504
6505 IX86_ATTR_YES ("inline-all-stringops",
6506 OPT_minline_all_stringops,
6507 MASK_INLINE_ALL_STRINGOPS),
6508
6509 IX86_ATTR_YES ("inline-stringops-dynamically",
6510 OPT_minline_stringops_dynamically,
6511 MASK_INLINE_STRINGOPS_DYNAMICALLY),
6512
6513 IX86_ATTR_NO ("align-stringops",
6514 OPT_mno_align_stringops,
6515 MASK_NO_ALIGN_STRINGOPS),
6516
6517 IX86_ATTR_YES ("recip",
6518 OPT_mrecip,
6519 MASK_RECIP),
6520
6521 };
6522
6523 /* If this is a list, recurse to get the options. */
6524 if (TREE_CODE (args) == TREE_LIST)
6525 {
6526 bool ret = true;
6527
6528 for (; args; args = TREE_CHAIN (args))
6529 if (TREE_VALUE (args)
6530 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
6531 p_strings, opts, opts_set,
6532 enum_opts_set))
6533 ret = false;
6534
6535 return ret;
6536 }
6537
6538 else if (TREE_CODE (args) != STRING_CST)
6539 {
6540 error ("attribute %<target%> argument not a string");
6541 return false;
6542 }
6543
6544 /* Handle multiple arguments separated by commas. */
6545 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6546
6547 while (next_optstr && *next_optstr != '\0')
6548 {
6549 char *p = next_optstr;
6550 char *orig_p = p;
6551 char *comma = strchr (next_optstr, ',');
6552 const char *opt_string;
6553 size_t len, opt_len;
6554 int opt;
6555 bool opt_set_p;
6556 char ch;
6557 unsigned i;
6558 enum ix86_opt_type type = ix86_opt_unknown;
6559 int mask = 0;
6560
6561 if (comma)
6562 {
6563 *comma = '\0';
6564 len = comma - next_optstr;
6565 next_optstr = comma + 1;
6566 }
6567 else
6568 {
6569 len = strlen (p);
6570 next_optstr = NULL;
6571 }
6572
6573 /* Recognize no-xxx. */
6574 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6575 {
6576 opt_set_p = false;
6577 p += 3;
6578 len -= 3;
6579 }
6580 else
6581 opt_set_p = true;
6582
6583 /* Find the option. */
6584 ch = *p;
6585 opt = N_OPTS;
6586 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6587 {
6588 type = attrs[i].type;
6589 opt_len = attrs[i].len;
6590 if (ch == attrs[i].string[0]
6591 && ((type != ix86_opt_str && type != ix86_opt_enum)
6592 ? len == opt_len
6593 : len > opt_len)
6594 && memcmp (p, attrs[i].string, opt_len) == 0)
6595 {
6596 opt = attrs[i].opt;
6597 mask = attrs[i].mask;
6598 opt_string = attrs[i].string;
6599 break;
6600 }
6601 }
6602
6603 /* Process the option. */
6604 if (opt == N_OPTS)
6605 {
6606 error ("attribute(target(\"%s\")) is unknown", orig_p);
6607 ret = false;
6608 }
6609
6610 else if (type == ix86_opt_isa)
6611 {
6612 struct cl_decoded_option decoded;
6613
6614 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6615 ix86_handle_option (opts, opts_set,
6616 &decoded, input_location);
6617 }
6618
6619 else if (type == ix86_opt_yes || type == ix86_opt_no)
6620 {
6621 if (type == ix86_opt_no)
6622 opt_set_p = !opt_set_p;
6623
6624 if (opt_set_p)
6625 opts->x_target_flags |= mask;
6626 else
6627 opts->x_target_flags &= ~mask;
6628 }
6629
6630 else if (type == ix86_opt_str)
6631 {
6632 if (p_strings[opt])
6633 {
6634 error ("option(\"%s\") was already specified", opt_string);
6635 ret = false;
6636 }
6637 else
6638 p_strings[opt] = xstrdup (p + opt_len);
6639 }
6640
6641 else if (type == ix86_opt_enum)
6642 {
6643 bool arg_ok;
6644 int value;
6645
6646 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6647 if (arg_ok)
6648 set_option (opts, enum_opts_set, opt, value,
6649 p + opt_len, DK_UNSPECIFIED, input_location,
6650 global_dc);
6651 else
6652 {
6653 error ("attribute(target(\"%s\")) is unknown", orig_p);
6654 ret = false;
6655 }
6656 }
6657
6658 else
6659 gcc_unreachable ();
6660 }
6661
6662 return ret;
6663 }
6664
6665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6666
6667 tree
6668 ix86_valid_target_attribute_tree (tree args,
6669 struct gcc_options *opts,
6670 struct gcc_options *opts_set)
6671 {
6672 const char *orig_arch_string = opts->x_ix86_arch_string;
6673 const char *orig_tune_string = opts->x_ix86_tune_string;
6674 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6675 int orig_tune_defaulted = ix86_tune_defaulted;
6676 int orig_arch_specified = ix86_arch_specified;
6677 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6678 tree t = NULL_TREE;
6679 int i;
6680 struct cl_target_option *def
6681 = TREE_TARGET_OPTION (target_option_default_node);
6682 struct gcc_options enum_opts_set;
6683
6684 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6685
6686 /* Process each of the options on the chain. */
6687 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6688 opts_set, &enum_opts_set))
6689 return error_mark_node;
6690
6691 /* If the changed options are different from the default, rerun
6692 ix86_option_override_internal, and then save the options away.
6693 The string options are attribute options, and will be undone
6694 when we copy the save structure. */
6695 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6696 || opts->x_target_flags != def->x_target_flags
6697 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6698 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6699 || enum_opts_set.x_ix86_fpmath)
6700 {
6701 /* If we are using the default tune= or arch=, undo the string assigned,
6702 and use the default. */
6703 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6704 {
6705 opts->x_ix86_arch_string
6706 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6707
6708 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6709 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6710 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6711 | OPTION_MASK_ABI_64
6712 | OPTION_MASK_ABI_X32
6713 | OPTION_MASK_CODE16);
6714
6715 }
6716 else if (!orig_arch_specified)
6717 opts->x_ix86_arch_string = NULL;
6718
6719 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6720 opts->x_ix86_tune_string
6721 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6722 else if (orig_tune_defaulted)
6723 opts->x_ix86_tune_string = NULL;
6724
6725 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6726 if (enum_opts_set.x_ix86_fpmath)
6727 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6728 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6729 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6730 {
6731 if (TARGET_80387_P (opts->x_target_flags))
6732 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6733 | FPMATH_387);
6734 else
6735 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6736 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6737 }
6738
6739 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6740 ix86_option_override_internal (false, opts, opts_set);
6741
6742 /* Add any builtin functions with the new isa if any. */
6743 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6744
6745 /* Save the current options unless we are validating options for
6746 #pragma. */
6747 t = build_target_option_node (opts);
6748
6749 opts->x_ix86_arch_string = orig_arch_string;
6750 opts->x_ix86_tune_string = orig_tune_string;
6751 opts_set->x_ix86_fpmath = orig_fpmath_set;
6752
6753 /* Free up memory allocated to hold the strings */
6754 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6755 free (option_strings[i]);
6756 }
6757
6758 return t;
6759 }
6760
6761 /* Hook to validate attribute((target("string"))). */
6762
6763 static bool
6764 ix86_valid_target_attribute_p (tree fndecl,
6765 tree ARG_UNUSED (name),
6766 tree args,
6767 int ARG_UNUSED (flags))
6768 {
6769 struct gcc_options func_options;
6770 tree new_target, new_optimize;
6771 bool ret = true;
6772
6773 /* attribute((target("default"))) does nothing, beyond
6774 affecting multi-versioning. */
6775 if (TREE_VALUE (args)
6776 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6777 && TREE_CHAIN (args) == NULL_TREE
6778 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6779 return true;
6780
6781 tree old_optimize = build_optimization_node (&global_options);
6782
6783 /* Get the optimization options of the current function. */
6784 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6785
6786 if (!func_optimize)
6787 func_optimize = old_optimize;
6788
6789 /* Init func_options. */
6790 memset (&func_options, 0, sizeof (func_options));
6791 init_options_struct (&func_options, NULL);
6792 lang_hooks.init_options_struct (&func_options);
6793
6794 cl_optimization_restore (&func_options,
6795 TREE_OPTIMIZATION (func_optimize));
6796
6797 /* Initialize func_options to the default before its target options can
6798 be set. */
6799 cl_target_option_restore (&func_options,
6800 TREE_TARGET_OPTION (target_option_default_node));
6801
6802 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6803 &global_options_set);
6804
6805 new_optimize = build_optimization_node (&func_options);
6806
6807 if (new_target == error_mark_node)
6808 ret = false;
6809
6810 else if (fndecl && new_target)
6811 {
6812 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6813
6814 if (old_optimize != new_optimize)
6815 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6816 }
6817
6818 finalize_options_struct (&func_options);
6819
6820 return ret;
6821 }
6822
6823 \f
6824 /* Hook to determine if one function can safely inline another. */
6825
6826 static bool
6827 ix86_can_inline_p (tree caller, tree callee)
6828 {
6829 bool ret = false;
6830 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6831 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6832
6833 /* If callee has no option attributes, then it is ok to inline. */
6834 if (!callee_tree)
6835 ret = true;
6836
6837 /* If caller has no option attributes, but callee does then it is not ok to
6838 inline. */
6839 else if (!caller_tree)
6840 ret = false;
6841
6842 else
6843 {
6844 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6845 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6846
6847 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6848 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6849 function. */
6850 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6851 != callee_opts->x_ix86_isa_flags)
6852 ret = false;
6853
6854 /* See if we have the same non-isa options. */
6855 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6856 ret = false;
6857
6858 /* See if arch, tune, etc. are the same. */
6859 else if (caller_opts->arch != callee_opts->arch)
6860 ret = false;
6861
6862 else if (caller_opts->tune != callee_opts->tune)
6863 ret = false;
6864
6865 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6866 ret = false;
6867
6868 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6869 ret = false;
6870
6871 else
6872 ret = true;
6873 }
6874
6875 return ret;
6876 }
6877
6878 \f
6879 /* Remember the last target of ix86_set_current_function. */
6880 static GTY(()) tree ix86_previous_fndecl;
6881
6882 /* Set targets globals to the default (or current #pragma GCC target
6883 if active). Invalidate ix86_previous_fndecl cache. */
6884
6885 void
6886 ix86_reset_previous_fndecl (void)
6887 {
6888 tree new_tree = target_option_current_node;
6889 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6890 if (TREE_TARGET_GLOBALS (new_tree))
6891 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6892 else if (new_tree == target_option_default_node)
6893 restore_target_globals (&default_target_globals);
6894 else
6895 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6896 ix86_previous_fndecl = NULL_TREE;
6897 }
6898
6899 /* Set the func_type field from the function FNDECL. */
6900
6901 static void
6902 ix86_set_func_type (tree fndecl)
6903 {
6904 if (cfun->machine->func_type == TYPE_UNKNOWN)
6905 {
6906 if (lookup_attribute ("interrupt",
6907 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
6908 {
6909 int nargs = 0;
6910 for (tree arg = DECL_ARGUMENTS (fndecl);
6911 arg;
6912 arg = TREE_CHAIN (arg))
6913 nargs++;
6914 cfun->machine->no_caller_saved_registers = true;
6915 cfun->machine->func_type
6916 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
6917
6918 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
6919
6920 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
6921 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
6922 sorry ("Only DWARF debug format is supported for interrupt "
6923 "service routine.");
6924 }
6925 else
6926 {
6927 cfun->machine->func_type = TYPE_NORMAL;
6928 if (lookup_attribute ("no_caller_saved_registers",
6929 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
6930 cfun->machine->no_caller_saved_registers = true;
6931 }
6932 }
6933 }
6934
6935 /* Establish appropriate back-end context for processing the function
6936 FNDECL. The argument might be NULL to indicate processing at top
6937 level, outside of any function scope. */
6938 static void
6939 ix86_set_current_function (tree fndecl)
6940 {
6941 /* Only change the context if the function changes. This hook is called
6942 several times in the course of compiling a function, and we don't want to
6943 slow things down too much or call target_reinit when it isn't safe. */
6944 if (fndecl == ix86_previous_fndecl)
6945 {
6946 /* There may be 2 function bodies for the same function FNDECL,
6947 one is extern inline and one isn't. Call ix86_set_func_type
6948 to set the func_type field. */
6949 if (fndecl != NULL_TREE)
6950 ix86_set_func_type (fndecl);
6951 return;
6952 }
6953
6954 tree old_tree;
6955 if (ix86_previous_fndecl == NULL_TREE)
6956 old_tree = target_option_current_node;
6957 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6958 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6959 else
6960 old_tree = target_option_default_node;
6961
6962 if (fndecl == NULL_TREE)
6963 {
6964 if (old_tree != target_option_current_node)
6965 ix86_reset_previous_fndecl ();
6966 return;
6967 }
6968
6969 ix86_set_func_type (fndecl);
6970
6971 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6972 if (new_tree == NULL_TREE)
6973 new_tree = target_option_default_node;
6974
6975 if (old_tree != new_tree)
6976 {
6977 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6978 if (TREE_TARGET_GLOBALS (new_tree))
6979 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6980 else if (new_tree == target_option_default_node)
6981 restore_target_globals (&default_target_globals);
6982 else
6983 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6984 }
6985 ix86_previous_fndecl = fndecl;
6986
6987 static bool prev_no_caller_saved_registers;
6988
6989 /* 64-bit MS and SYSV ABI have different set of call used registers.
6990 Avoid expensive re-initialization of init_regs each time we switch
6991 function context. */
6992 if (TARGET_64BIT
6993 && (call_used_regs[SI_REG]
6994 == (cfun->machine->call_abi == MS_ABI)))
6995 reinit_regs ();
6996 /* Need to re-initialize init_regs if caller-saved registers are
6997 changed. */
6998 else if (prev_no_caller_saved_registers
6999 != cfun->machine->no_caller_saved_registers)
7000 reinit_regs ();
7001
7002 if (cfun->machine->func_type != TYPE_NORMAL
7003 || cfun->machine->no_caller_saved_registers)
7004 {
7005 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
7006 may change processor state. */
7007 const char *isa;
7008 if (TARGET_MPX)
7009 isa = "MPX";
7010 else if (TARGET_SSE)
7011 isa = "SSE";
7012 else if (TARGET_MMX)
7013 isa = "MMX/3Dnow";
7014 else if (TARGET_80387)
7015 isa = "80387";
7016 else
7017 isa = NULL;
7018 if (isa != NULL)
7019 {
7020 if (cfun->machine->func_type != TYPE_NORMAL)
7021 sorry ("%s instructions aren't allowed in %s service routine",
7022 isa, (cfun->machine->func_type == TYPE_EXCEPTION
7023 ? "exception" : "interrupt"));
7024 else
7025 sorry ("%s instructions aren't allowed in function with "
7026 "no_caller_saved_registers attribute", isa);
7027 /* Don't issue the same error twice. */
7028 cfun->machine->func_type = TYPE_NORMAL;
7029 cfun->machine->no_caller_saved_registers = false;
7030 }
7031 }
7032
7033 prev_no_caller_saved_registers
7034 = cfun->machine->no_caller_saved_registers;
7035 }
7036
7037 \f
7038 /* Return true if this goes in large data/bss. */
7039
7040 static bool
7041 ix86_in_large_data_p (tree exp)
7042 {
7043 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
7044 return false;
7045
7046 if (exp == NULL_TREE)
7047 return false;
7048
7049 /* Functions are never large data. */
7050 if (TREE_CODE (exp) == FUNCTION_DECL)
7051 return false;
7052
7053 /* Automatic variables are never large data. */
7054 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
7055 return false;
7056
7057 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7058 {
7059 const char *section = DECL_SECTION_NAME (exp);
7060 if (strcmp (section, ".ldata") == 0
7061 || strcmp (section, ".lbss") == 0)
7062 return true;
7063 return false;
7064 }
7065 else
7066 {
7067 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7068
7069 /* If this is an incomplete type with size 0, then we can't put it
7070 in data because it might be too big when completed. Also,
7071 int_size_in_bytes returns -1 if size can vary or is larger than
7072 an integer in which case also it is safer to assume that it goes in
7073 large data. */
7074 if (size <= 0 || size > ix86_section_threshold)
7075 return true;
7076 }
7077
7078 return false;
7079 }
7080
7081 /* i386-specific section flag to mark large sections. */
7082 #define SECTION_LARGE SECTION_MACH_DEP
7083
7084 /* Switch to the appropriate section for output of DECL.
7085 DECL is either a `VAR_DECL' node or a constant of some sort.
7086 RELOC indicates whether forming the initial value of DECL requires
7087 link-time relocations. */
7088
7089 ATTRIBUTE_UNUSED static section *
7090 x86_64_elf_select_section (tree decl, int reloc,
7091 unsigned HOST_WIDE_INT align)
7092 {
7093 if (ix86_in_large_data_p (decl))
7094 {
7095 const char *sname = NULL;
7096 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
7097 switch (categorize_decl_for_section (decl, reloc))
7098 {
7099 case SECCAT_DATA:
7100 sname = ".ldata";
7101 break;
7102 case SECCAT_DATA_REL:
7103 sname = ".ldata.rel";
7104 break;
7105 case SECCAT_DATA_REL_LOCAL:
7106 sname = ".ldata.rel.local";
7107 break;
7108 case SECCAT_DATA_REL_RO:
7109 sname = ".ldata.rel.ro";
7110 break;
7111 case SECCAT_DATA_REL_RO_LOCAL:
7112 sname = ".ldata.rel.ro.local";
7113 break;
7114 case SECCAT_BSS:
7115 sname = ".lbss";
7116 flags |= SECTION_BSS;
7117 break;
7118 case SECCAT_RODATA:
7119 case SECCAT_RODATA_MERGE_STR:
7120 case SECCAT_RODATA_MERGE_STR_INIT:
7121 case SECCAT_RODATA_MERGE_CONST:
7122 sname = ".lrodata";
7123 flags &= ~SECTION_WRITE;
7124 break;
7125 case SECCAT_SRODATA:
7126 case SECCAT_SDATA:
7127 case SECCAT_SBSS:
7128 gcc_unreachable ();
7129 case SECCAT_TEXT:
7130 case SECCAT_TDATA:
7131 case SECCAT_TBSS:
7132 /* We don't split these for medium model. Place them into
7133 default sections and hope for best. */
7134 break;
7135 }
7136 if (sname)
7137 {
7138 /* We might get called with string constants, but get_named_section
7139 doesn't like them as they are not DECLs. Also, we need to set
7140 flags in that case. */
7141 if (!DECL_P (decl))
7142 return get_section (sname, flags, NULL);
7143 return get_named_section (decl, sname, reloc);
7144 }
7145 }
7146 return default_elf_select_section (decl, reloc, align);
7147 }
7148
7149 /* Select a set of attributes for section NAME based on the properties
7150 of DECL and whether or not RELOC indicates that DECL's initializer
7151 might contain runtime relocations. */
7152
7153 static unsigned int ATTRIBUTE_UNUSED
7154 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
7155 {
7156 unsigned int flags = default_section_type_flags (decl, name, reloc);
7157
7158 if (ix86_in_large_data_p (decl))
7159 flags |= SECTION_LARGE;
7160
7161 if (decl == NULL_TREE
7162 && (strcmp (name, ".ldata.rel.ro") == 0
7163 || strcmp (name, ".ldata.rel.ro.local") == 0))
7164 flags |= SECTION_RELRO;
7165
7166 if (strcmp (name, ".lbss") == 0
7167 || strncmp (name, ".lbss.", 5) == 0
7168 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
7169 flags |= SECTION_BSS;
7170
7171 return flags;
7172 }
7173
7174 /* Build up a unique section name, expressed as a
7175 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
7176 RELOC indicates whether the initial value of EXP requires
7177 link-time relocations. */
7178
7179 static void ATTRIBUTE_UNUSED
7180 x86_64_elf_unique_section (tree decl, int reloc)
7181 {
7182 if (ix86_in_large_data_p (decl))
7183 {
7184 const char *prefix = NULL;
7185 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
7186 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
7187
7188 switch (categorize_decl_for_section (decl, reloc))
7189 {
7190 case SECCAT_DATA:
7191 case SECCAT_DATA_REL:
7192 case SECCAT_DATA_REL_LOCAL:
7193 case SECCAT_DATA_REL_RO:
7194 case SECCAT_DATA_REL_RO_LOCAL:
7195 prefix = one_only ? ".ld" : ".ldata";
7196 break;
7197 case SECCAT_BSS:
7198 prefix = one_only ? ".lb" : ".lbss";
7199 break;
7200 case SECCAT_RODATA:
7201 case SECCAT_RODATA_MERGE_STR:
7202 case SECCAT_RODATA_MERGE_STR_INIT:
7203 case SECCAT_RODATA_MERGE_CONST:
7204 prefix = one_only ? ".lr" : ".lrodata";
7205 break;
7206 case SECCAT_SRODATA:
7207 case SECCAT_SDATA:
7208 case SECCAT_SBSS:
7209 gcc_unreachable ();
7210 case SECCAT_TEXT:
7211 case SECCAT_TDATA:
7212 case SECCAT_TBSS:
7213 /* We don't split these for medium model. Place them into
7214 default sections and hope for best. */
7215 break;
7216 }
7217 if (prefix)
7218 {
7219 const char *name, *linkonce;
7220 char *string;
7221
7222 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7223 name = targetm.strip_name_encoding (name);
7224
7225 /* If we're using one_only, then there needs to be a .gnu.linkonce
7226 prefix to the section name. */
7227 linkonce = one_only ? ".gnu.linkonce" : "";
7228
7229 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
7230
7231 set_decl_section_name (decl, string);
7232 return;
7233 }
7234 }
7235 default_unique_section (decl, reloc);
7236 }
7237
7238 #ifdef COMMON_ASM_OP
7239
7240 #ifndef LARGECOMM_SECTION_ASM_OP
7241 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
7242 #endif
7243
7244 /* This says how to output assembler code to declare an
7245 uninitialized external linkage data object.
7246
7247 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
7248 large objects. */
7249 void
7250 x86_elf_aligned_decl_common (FILE *file, tree decl,
7251 const char *name, unsigned HOST_WIDE_INT size,
7252 int align)
7253 {
7254 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
7255 && size > (unsigned int)ix86_section_threshold)
7256 {
7257 switch_to_section (get_named_section (decl, ".lbss", 0));
7258 fputs (LARGECOMM_SECTION_ASM_OP, file);
7259 }
7260 else
7261 fputs (COMMON_ASM_OP, file);
7262 assemble_name (file, name);
7263 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
7264 size, align / BITS_PER_UNIT);
7265 }
7266 #endif
7267
7268 /* Utility function for targets to use in implementing
7269 ASM_OUTPUT_ALIGNED_BSS. */
7270
7271 void
7272 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
7273 unsigned HOST_WIDE_INT size, int align)
7274 {
7275 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
7276 && size > (unsigned int)ix86_section_threshold)
7277 switch_to_section (get_named_section (decl, ".lbss", 0));
7278 else
7279 switch_to_section (bss_section);
7280 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
7281 #ifdef ASM_DECLARE_OBJECT_NAME
7282 last_assemble_variable_decl = decl;
7283 ASM_DECLARE_OBJECT_NAME (file, name, decl);
7284 #else
7285 /* Standard thing is just output label for the object. */
7286 ASM_OUTPUT_LABEL (file, name);
7287 #endif /* ASM_DECLARE_OBJECT_NAME */
7288 ASM_OUTPUT_SKIP (file, size ? size : 1);
7289 }
7290 \f
7291 /* Decide whether we must probe the stack before any space allocation
7292 on this target. It's essentially TARGET_STACK_PROBE except when
7293 -fstack-check causes the stack to be already probed differently. */
7294
7295 bool
7296 ix86_target_stack_probe (void)
7297 {
7298 /* Do not probe the stack twice if static stack checking is enabled. */
7299 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
7300 return false;
7301
7302 return TARGET_STACK_PROBE;
7303 }
7304 \f
7305 /* Decide whether we can make a sibling call to a function. DECL is the
7306 declaration of the function being targeted by the call and EXP is the
7307 CALL_EXPR representing the call. */
7308
7309 static bool
7310 ix86_function_ok_for_sibcall (tree decl, tree exp)
7311 {
7312 tree type, decl_or_type;
7313 rtx a, b;
7314 bool bind_global = decl && !targetm.binds_local_p (decl);
7315
7316 /* Sibling call isn't OK if there are no caller-saved registers
7317 since all registers must be preserved before return. */
7318 if (cfun->machine->no_caller_saved_registers)
7319 return false;
7320
7321 /* If we are generating position-independent code, we cannot sibcall
7322 optimize direct calls to global functions, as the PLT requires
7323 %ebx be live. (Darwin does not have a PLT.) */
7324 if (!TARGET_MACHO
7325 && !TARGET_64BIT
7326 && flag_pic
7327 && flag_plt
7328 && bind_global)
7329 return false;
7330
7331 /* If we need to align the outgoing stack, then sibcalling would
7332 unalign the stack, which may break the called function. */
7333 if (ix86_minimum_incoming_stack_boundary (true)
7334 < PREFERRED_STACK_BOUNDARY)
7335 return false;
7336
7337 if (decl)
7338 {
7339 decl_or_type = decl;
7340 type = TREE_TYPE (decl);
7341 }
7342 else
7343 {
7344 /* We're looking at the CALL_EXPR, we need the type of the function. */
7345 type = CALL_EXPR_FN (exp); /* pointer expression */
7346 type = TREE_TYPE (type); /* pointer type */
7347 type = TREE_TYPE (type); /* function type */
7348 decl_or_type = type;
7349 }
7350
7351 /* Check that the return value locations are the same. Like
7352 if we are returning floats on the 80387 register stack, we cannot
7353 make a sibcall from a function that doesn't return a float to a
7354 function that does or, conversely, from a function that does return
7355 a float to a function that doesn't; the necessary stack adjustment
7356 would not be executed. This is also the place we notice
7357 differences in the return value ABI. Note that it is ok for one
7358 of the functions to have void return type as long as the return
7359 value of the other is passed in a register. */
7360 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
7361 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7362 cfun->decl, false);
7363 if (STACK_REG_P (a) || STACK_REG_P (b))
7364 {
7365 if (!rtx_equal_p (a, b))
7366 return false;
7367 }
7368 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7369 ;
7370 else if (!rtx_equal_p (a, b))
7371 return false;
7372
7373 if (TARGET_64BIT)
7374 {
7375 /* The SYSV ABI has more call-clobbered registers;
7376 disallow sibcalls from MS to SYSV. */
7377 if (cfun->machine->call_abi == MS_ABI
7378 && ix86_function_type_abi (type) == SYSV_ABI)
7379 return false;
7380 }
7381 else
7382 {
7383 /* If this call is indirect, we'll need to be able to use a
7384 call-clobbered register for the address of the target function.
7385 Make sure that all such registers are not used for passing
7386 parameters. Note that DLLIMPORT functions and call to global
7387 function via GOT slot are indirect. */
7388 if (!decl
7389 || (bind_global && flag_pic && !flag_plt)
7390 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
7391 {
7392 /* Check if regparm >= 3 since arg_reg_available is set to
7393 false if regparm == 0. If regparm is 1 or 2, there is
7394 always a call-clobbered register available.
7395
7396 ??? The symbol indirect call doesn't need a call-clobbered
7397 register. But we don't know if this is a symbol indirect
7398 call or not here. */
7399 if (ix86_function_regparm (type, NULL) >= 3
7400 && !cfun->machine->arg_reg_available)
7401 return false;
7402 }
7403 }
7404
7405 /* Otherwise okay. That also includes certain types of indirect calls. */
7406 return true;
7407 }
7408
7409 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
7410 and "sseregparm" calling convention attributes;
7411 arguments as in struct attribute_spec.handler. */
7412
7413 static tree
7414 ix86_handle_cconv_attribute (tree *node, tree name,
7415 tree args,
7416 int,
7417 bool *no_add_attrs)
7418 {
7419 if (TREE_CODE (*node) != FUNCTION_TYPE
7420 && TREE_CODE (*node) != METHOD_TYPE
7421 && TREE_CODE (*node) != FIELD_DECL
7422 && TREE_CODE (*node) != TYPE_DECL)
7423 {
7424 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7425 name);
7426 *no_add_attrs = true;
7427 return NULL_TREE;
7428 }
7429
7430 /* Can combine regparm with all attributes but fastcall, and thiscall. */
7431 if (is_attribute_p ("regparm", name))
7432 {
7433 tree cst;
7434
7435 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
7436 {
7437 error ("fastcall and regparm attributes are not compatible");
7438 }
7439
7440 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
7441 {
7442 error ("regparam and thiscall attributes are not compatible");
7443 }
7444
7445 cst = TREE_VALUE (args);
7446 if (TREE_CODE (cst) != INTEGER_CST)
7447 {
7448 warning (OPT_Wattributes,
7449 "%qE attribute requires an integer constant argument",
7450 name);
7451 *no_add_attrs = true;
7452 }
7453 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
7454 {
7455 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
7456 name, REGPARM_MAX);
7457 *no_add_attrs = true;
7458 }
7459
7460 return NULL_TREE;
7461 }
7462
7463 if (TARGET_64BIT)
7464 {
7465 /* Do not warn when emulating the MS ABI. */
7466 if ((TREE_CODE (*node) != FUNCTION_TYPE
7467 && TREE_CODE (*node) != METHOD_TYPE)
7468 || ix86_function_type_abi (*node) != MS_ABI)
7469 warning (OPT_Wattributes, "%qE attribute ignored",
7470 name);
7471 *no_add_attrs = true;
7472 return NULL_TREE;
7473 }
7474
7475 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
7476 if (is_attribute_p ("fastcall", name))
7477 {
7478 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
7479 {
7480 error ("fastcall and cdecl attributes are not compatible");
7481 }
7482 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
7483 {
7484 error ("fastcall and stdcall attributes are not compatible");
7485 }
7486 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
7487 {
7488 error ("fastcall and regparm attributes are not compatible");
7489 }
7490 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
7491 {
7492 error ("fastcall and thiscall attributes are not compatible");
7493 }
7494 }
7495
7496 /* Can combine stdcall with fastcall (redundant), regparm and
7497 sseregparm. */
7498 else if (is_attribute_p ("stdcall", name))
7499 {
7500 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
7501 {
7502 error ("stdcall and cdecl attributes are not compatible");
7503 }
7504 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
7505 {
7506 error ("stdcall and fastcall attributes are not compatible");
7507 }
7508 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
7509 {
7510 error ("stdcall and thiscall attributes are not compatible");
7511 }
7512 }
7513
7514 /* Can combine cdecl with regparm and sseregparm. */
7515 else if (is_attribute_p ("cdecl", name))
7516 {
7517 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
7518 {
7519 error ("stdcall and cdecl attributes are not compatible");
7520 }
7521 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
7522 {
7523 error ("fastcall and cdecl attributes are not compatible");
7524 }
7525 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
7526 {
7527 error ("cdecl and thiscall attributes are not compatible");
7528 }
7529 }
7530 else if (is_attribute_p ("thiscall", name))
7531 {
7532 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
7533 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
7534 name);
7535 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
7536 {
7537 error ("stdcall and thiscall attributes are not compatible");
7538 }
7539 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
7540 {
7541 error ("fastcall and thiscall attributes are not compatible");
7542 }
7543 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
7544 {
7545 error ("cdecl and thiscall attributes are not compatible");
7546 }
7547 }
7548
7549 /* Can combine sseregparm with all attributes. */
7550
7551 return NULL_TREE;
7552 }
7553
7554 /* The transactional memory builtins are implicitly regparm or fastcall
7555 depending on the ABI. Override the generic do-nothing attribute that
7556 these builtins were declared with, and replace it with one of the two
7557 attributes that we expect elsewhere. */
7558
7559 static tree
7560 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
7561 int flags, bool *no_add_attrs)
7562 {
7563 tree alt;
7564
7565 /* In no case do we want to add the placeholder attribute. */
7566 *no_add_attrs = true;
7567
7568 /* The 64-bit ABI is unchanged for transactional memory. */
7569 if (TARGET_64BIT)
7570 return NULL_TREE;
7571
7572 /* ??? Is there a better way to validate 32-bit windows? We have
7573 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
7574 if (CHECK_STACK_LIMIT > 0)
7575 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
7576 else
7577 {
7578 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
7579 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
7580 }
7581 decl_attributes (node, alt, flags);
7582
7583 return NULL_TREE;
7584 }
7585
7586 /* This function determines from TYPE the calling-convention. */
7587
7588 unsigned int
7589 ix86_get_callcvt (const_tree type)
7590 {
7591 unsigned int ret = 0;
7592 bool is_stdarg;
7593 tree attrs;
7594
7595 if (TARGET_64BIT)
7596 return IX86_CALLCVT_CDECL;
7597
7598 attrs = TYPE_ATTRIBUTES (type);
7599 if (attrs != NULL_TREE)
7600 {
7601 if (lookup_attribute ("cdecl", attrs))
7602 ret |= IX86_CALLCVT_CDECL;
7603 else if (lookup_attribute ("stdcall", attrs))
7604 ret |= IX86_CALLCVT_STDCALL;
7605 else if (lookup_attribute ("fastcall", attrs))
7606 ret |= IX86_CALLCVT_FASTCALL;
7607 else if (lookup_attribute ("thiscall", attrs))
7608 ret |= IX86_CALLCVT_THISCALL;
7609
7610 /* Regparam isn't allowed for thiscall and fastcall. */
7611 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
7612 {
7613 if (lookup_attribute ("regparm", attrs))
7614 ret |= IX86_CALLCVT_REGPARM;
7615 if (lookup_attribute ("sseregparm", attrs))
7616 ret |= IX86_CALLCVT_SSEREGPARM;
7617 }
7618
7619 if (IX86_BASE_CALLCVT(ret) != 0)
7620 return ret;
7621 }
7622
7623 is_stdarg = stdarg_p (type);
7624 if (TARGET_RTD && !is_stdarg)
7625 return IX86_CALLCVT_STDCALL | ret;
7626
7627 if (ret != 0
7628 || is_stdarg
7629 || TREE_CODE (type) != METHOD_TYPE
7630 || ix86_function_type_abi (type) != MS_ABI)
7631 return IX86_CALLCVT_CDECL | ret;
7632
7633 return IX86_CALLCVT_THISCALL;
7634 }
7635
7636 /* Return 0 if the attributes for two types are incompatible, 1 if they
7637 are compatible, and 2 if they are nearly compatible (which causes a
7638 warning to be generated). */
7639
7640 static int
7641 ix86_comp_type_attributes (const_tree type1, const_tree type2)
7642 {
7643 unsigned int ccvt1, ccvt2;
7644
7645 if (TREE_CODE (type1) != FUNCTION_TYPE
7646 && TREE_CODE (type1) != METHOD_TYPE)
7647 return 1;
7648
7649 ccvt1 = ix86_get_callcvt (type1);
7650 ccvt2 = ix86_get_callcvt (type2);
7651 if (ccvt1 != ccvt2)
7652 return 0;
7653 if (ix86_function_regparm (type1, NULL)
7654 != ix86_function_regparm (type2, NULL))
7655 return 0;
7656
7657 return 1;
7658 }
7659 \f
7660 /* Return the regparm value for a function with the indicated TYPE and DECL.
7661 DECL may be NULL when calling function indirectly
7662 or considering a libcall. */
7663
7664 static int
7665 ix86_function_regparm (const_tree type, const_tree decl)
7666 {
7667 tree attr;
7668 int regparm;
7669 unsigned int ccvt;
7670
7671 if (TARGET_64BIT)
7672 return (ix86_function_type_abi (type) == SYSV_ABI
7673 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7674 ccvt = ix86_get_callcvt (type);
7675 regparm = ix86_regparm;
7676
7677 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7678 {
7679 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7680 if (attr)
7681 {
7682 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7683 return regparm;
7684 }
7685 }
7686 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7687 return 2;
7688 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7689 return 1;
7690
7691 /* Use register calling convention for local functions when possible. */
7692 if (decl
7693 && TREE_CODE (decl) == FUNCTION_DECL)
7694 {
7695 cgraph_node *target = cgraph_node::get (decl);
7696 if (target)
7697 target = target->function_symbol ();
7698
7699 /* Caller and callee must agree on the calling convention, so
7700 checking here just optimize means that with
7701 __attribute__((optimize (...))) caller could use regparm convention
7702 and callee not, or vice versa. Instead look at whether the callee
7703 is optimized or not. */
7704 if (target && opt_for_fn (target->decl, optimize)
7705 && !(profile_flag && !flag_fentry))
7706 {
7707 cgraph_local_info *i = &target->local;
7708 if (i && i->local && i->can_change_signature)
7709 {
7710 int local_regparm, globals = 0, regno;
7711
7712 /* Make sure no regparm register is taken by a
7713 fixed register variable. */
7714 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7715 local_regparm++)
7716 if (fixed_regs[local_regparm])
7717 break;
7718
7719 /* We don't want to use regparm(3) for nested functions as
7720 these use a static chain pointer in the third argument. */
7721 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7722 local_regparm = 2;
7723
7724 /* Save a register for the split stack. */
7725 if (local_regparm == 3 && flag_split_stack)
7726 local_regparm = 2;
7727
7728 /* Each fixed register usage increases register pressure,
7729 so less registers should be used for argument passing.
7730 This functionality can be overriden by an explicit
7731 regparm value. */
7732 for (regno = AX_REG; regno <= DI_REG; regno++)
7733 if (fixed_regs[regno])
7734 globals++;
7735
7736 local_regparm
7737 = globals < local_regparm ? local_regparm - globals : 0;
7738
7739 if (local_regparm > regparm)
7740 regparm = local_regparm;
7741 }
7742 }
7743 }
7744
7745 return regparm;
7746 }
7747
7748 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7749 DFmode (2) arguments in SSE registers for a function with the
7750 indicated TYPE and DECL. DECL may be NULL when calling function
7751 indirectly or considering a libcall. Return -1 if any FP parameter
7752 should be rejected by error. This is used in siutation we imply SSE
7753 calling convetion but the function is called from another function with
7754 SSE disabled. Otherwise return 0. */
7755
7756 static int
7757 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7758 {
7759 gcc_assert (!TARGET_64BIT);
7760
7761 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7762 by the sseregparm attribute. */
7763 if (TARGET_SSEREGPARM
7764 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7765 {
7766 if (!TARGET_SSE)
7767 {
7768 if (warn)
7769 {
7770 if (decl)
7771 error ("calling %qD with attribute sseregparm without "
7772 "SSE/SSE2 enabled", decl);
7773 else
7774 error ("calling %qT with attribute sseregparm without "
7775 "SSE/SSE2 enabled", type);
7776 }
7777 return 0;
7778 }
7779
7780 return 2;
7781 }
7782
7783 if (!decl)
7784 return 0;
7785
7786 cgraph_node *target = cgraph_node::get (decl);
7787 if (target)
7788 target = target->function_symbol ();
7789
7790 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7791 (and DFmode for SSE2) arguments in SSE registers. */
7792 if (target
7793 /* TARGET_SSE_MATH */
7794 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7795 && opt_for_fn (target->decl, optimize)
7796 && !(profile_flag && !flag_fentry))
7797 {
7798 cgraph_local_info *i = &target->local;
7799 if (i && i->local && i->can_change_signature)
7800 {
7801 /* Refuse to produce wrong code when local function with SSE enabled
7802 is called from SSE disabled function.
7803 FIXME: We need a way to detect these cases cross-ltrans partition
7804 and avoid using SSE calling conventions on local functions called
7805 from function with SSE disabled. For now at least delay the
7806 warning until we know we are going to produce wrong code.
7807 See PR66047 */
7808 if (!TARGET_SSE && warn)
7809 return -1;
7810 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7811 ->x_ix86_isa_flags) ? 2 : 1;
7812 }
7813 }
7814
7815 return 0;
7816 }
7817
7818 /* Return true if EAX is live at the start of the function. Used by
7819 ix86_expand_prologue to determine if we need special help before
7820 calling allocate_stack_worker. */
7821
7822 static bool
7823 ix86_eax_live_at_start_p (void)
7824 {
7825 /* Cheat. Don't bother working forward from ix86_function_regparm
7826 to the function type to whether an actual argument is located in
7827 eax. Instead just look at cfg info, which is still close enough
7828 to correct at this point. This gives false positives for broken
7829 functions that might use uninitialized data that happens to be
7830 allocated in eax, but who cares? */
7831 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7832 }
7833
7834 static bool
7835 ix86_keep_aggregate_return_pointer (tree fntype)
7836 {
7837 tree attr;
7838
7839 if (!TARGET_64BIT)
7840 {
7841 attr = lookup_attribute ("callee_pop_aggregate_return",
7842 TYPE_ATTRIBUTES (fntype));
7843 if (attr)
7844 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7845
7846 /* For 32-bit MS-ABI the default is to keep aggregate
7847 return pointer. */
7848 if (ix86_function_type_abi (fntype) == MS_ABI)
7849 return true;
7850 }
7851 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7852 }
7853
7854 /* Value is the number of bytes of arguments automatically
7855 popped when returning from a subroutine call.
7856 FUNDECL is the declaration node of the function (as a tree),
7857 FUNTYPE is the data type of the function (as a tree),
7858 or for a library call it is an identifier node for the subroutine name.
7859 SIZE is the number of bytes of arguments passed on the stack.
7860
7861 On the 80386, the RTD insn may be used to pop them if the number
7862 of args is fixed, but if the number is variable then the caller
7863 must pop them all. RTD can't be used for library calls now
7864 because the library is compiled with the Unix compiler.
7865 Use of RTD is a selectable option, since it is incompatible with
7866 standard Unix calling sequences. If the option is not selected,
7867 the caller must always pop the args.
7868
7869 The attribute stdcall is equivalent to RTD on a per module basis. */
7870
7871 static int
7872 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7873 {
7874 unsigned int ccvt;
7875
7876 /* None of the 64-bit ABIs pop arguments. */
7877 if (TARGET_64BIT)
7878 return 0;
7879
7880 ccvt = ix86_get_callcvt (funtype);
7881
7882 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7883 | IX86_CALLCVT_THISCALL)) != 0
7884 && ! stdarg_p (funtype))
7885 return size;
7886
7887 /* Lose any fake structure return argument if it is passed on the stack. */
7888 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7889 && !ix86_keep_aggregate_return_pointer (funtype))
7890 {
7891 int nregs = ix86_function_regparm (funtype, fundecl);
7892 if (nregs == 0)
7893 return GET_MODE_SIZE (Pmode);
7894 }
7895
7896 return 0;
7897 }
7898
7899 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7900
7901 static bool
7902 ix86_legitimate_combined_insn (rtx_insn *insn)
7903 {
7904 /* Check operand constraints in case hard registers were propagated
7905 into insn pattern. This check prevents combine pass from
7906 generating insn patterns with invalid hard register operands.
7907 These invalid insns can eventually confuse reload to error out
7908 with a spill failure. See also PRs 46829 and 46843. */
7909 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7910 {
7911 int i;
7912
7913 extract_insn (insn);
7914 preprocess_constraints (insn);
7915
7916 int n_operands = recog_data.n_operands;
7917 int n_alternatives = recog_data.n_alternatives;
7918 for (i = 0; i < n_operands; i++)
7919 {
7920 rtx op = recog_data.operand[i];
7921 machine_mode mode = GET_MODE (op);
7922 const operand_alternative *op_alt;
7923 int offset = 0;
7924 bool win;
7925 int j;
7926
7927 /* A unary operator may be accepted by the predicate, but it
7928 is irrelevant for matching constraints. */
7929 if (UNARY_P (op))
7930 op = XEXP (op, 0);
7931
7932 if (SUBREG_P (op))
7933 {
7934 if (REG_P (SUBREG_REG (op))
7935 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7936 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7937 GET_MODE (SUBREG_REG (op)),
7938 SUBREG_BYTE (op),
7939 GET_MODE (op));
7940 op = SUBREG_REG (op);
7941 }
7942
7943 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7944 continue;
7945
7946 op_alt = recog_op_alt;
7947
7948 /* Operand has no constraints, anything is OK. */
7949 win = !n_alternatives;
7950
7951 alternative_mask preferred = get_preferred_alternatives (insn);
7952 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7953 {
7954 if (!TEST_BIT (preferred, j))
7955 continue;
7956 if (op_alt[i].anything_ok
7957 || (op_alt[i].matches != -1
7958 && operands_match_p
7959 (recog_data.operand[i],
7960 recog_data.operand[op_alt[i].matches]))
7961 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7962 {
7963 win = true;
7964 break;
7965 }
7966 }
7967
7968 if (!win)
7969 return false;
7970 }
7971 }
7972
7973 return true;
7974 }
7975 \f
7976 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7977
7978 static unsigned HOST_WIDE_INT
7979 ix86_asan_shadow_offset (void)
7980 {
7981 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7982 : HOST_WIDE_INT_C (0x7fff8000))
7983 : (HOST_WIDE_INT_1 << 29);
7984 }
7985 \f
7986 /* Argument support functions. */
7987
7988 /* Return true when register may be used to pass function parameters. */
7989 bool
7990 ix86_function_arg_regno_p (int regno)
7991 {
7992 int i;
7993 enum calling_abi call_abi;
7994 const int *parm_regs;
7995
7996 if (TARGET_MPX && BND_REGNO_P (regno))
7997 return true;
7998
7999 if (!TARGET_64BIT)
8000 {
8001 if (TARGET_MACHO)
8002 return (regno < REGPARM_MAX
8003 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
8004 else
8005 return (regno < REGPARM_MAX
8006 || (TARGET_MMX && MMX_REGNO_P (regno)
8007 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
8008 || (TARGET_SSE && SSE_REGNO_P (regno)
8009 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
8010 }
8011
8012 if (TARGET_SSE && SSE_REGNO_P (regno)
8013 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
8014 return true;
8015
8016 /* TODO: The function should depend on current function ABI but
8017 builtins.c would need updating then. Therefore we use the
8018 default ABI. */
8019 call_abi = ix86_cfun_abi ();
8020
8021 /* RAX is used as hidden argument to va_arg functions. */
8022 if (call_abi == SYSV_ABI && regno == AX_REG)
8023 return true;
8024
8025 if (call_abi == MS_ABI)
8026 parm_regs = x86_64_ms_abi_int_parameter_registers;
8027 else
8028 parm_regs = x86_64_int_parameter_registers;
8029
8030 for (i = 0; i < (call_abi == MS_ABI
8031 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
8032 if (regno == parm_regs[i])
8033 return true;
8034 return false;
8035 }
8036
8037 /* Return if we do not know how to pass TYPE solely in registers. */
8038
8039 static bool
8040 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
8041 {
8042 if (must_pass_in_stack_var_size_or_pad (mode, type))
8043 return true;
8044
8045 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
8046 The layout_type routine is crafty and tries to trick us into passing
8047 currently unsupported vector types on the stack by using TImode. */
8048 return (!TARGET_64BIT && mode == TImode
8049 && type && TREE_CODE (type) != VECTOR_TYPE);
8050 }
8051
8052 /* It returns the size, in bytes, of the area reserved for arguments passed
8053 in registers for the function represented by fndecl dependent to the used
8054 abi format. */
8055 int
8056 ix86_reg_parm_stack_space (const_tree fndecl)
8057 {
8058 enum calling_abi call_abi = SYSV_ABI;
8059 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
8060 call_abi = ix86_function_abi (fndecl);
8061 else
8062 call_abi = ix86_function_type_abi (fndecl);
8063 if (TARGET_64BIT && call_abi == MS_ABI)
8064 return 32;
8065 return 0;
8066 }
8067
8068 /* We add this as a workaround in order to use libc_has_function
8069 hook in i386.md. */
8070 bool
8071 ix86_libc_has_function (enum function_class fn_class)
8072 {
8073 return targetm.libc_has_function (fn_class);
8074 }
8075
8076 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
8077 specifying the call abi used. */
8078 enum calling_abi
8079 ix86_function_type_abi (const_tree fntype)
8080 {
8081 enum calling_abi abi = ix86_abi;
8082
8083 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
8084 return abi;
8085
8086 if (abi == SYSV_ABI
8087 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
8088 {
8089 if (TARGET_X32)
8090 error ("X32 does not support ms_abi attribute");
8091
8092 abi = MS_ABI;
8093 }
8094 else if (abi == MS_ABI
8095 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
8096 abi = SYSV_ABI;
8097
8098 return abi;
8099 }
8100
8101 static enum calling_abi
8102 ix86_function_abi (const_tree fndecl)
8103 {
8104 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
8105 }
8106
8107 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
8108 specifying the call abi used. */
8109 enum calling_abi
8110 ix86_cfun_abi (void)
8111 {
8112 return cfun ? cfun->machine->call_abi : ix86_abi;
8113 }
8114
8115 static bool
8116 ix86_function_ms_hook_prologue (const_tree fn)
8117 {
8118 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
8119 {
8120 if (decl_function_context (fn) != NULL_TREE)
8121 error_at (DECL_SOURCE_LOCATION (fn),
8122 "ms_hook_prologue is not compatible with nested function");
8123 else
8124 return true;
8125 }
8126 return false;
8127 }
8128
8129 /* Write the extra assembler code needed to declare a function properly. */
8130
8131 void
8132 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
8133 tree decl)
8134 {
8135 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
8136
8137 if (is_ms_hook)
8138 {
8139 int i, filler_count = (TARGET_64BIT ? 32 : 16);
8140 unsigned int filler_cc = 0xcccccccc;
8141
8142 for (i = 0; i < filler_count; i += 4)
8143 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
8144 }
8145
8146 #ifdef SUBTARGET_ASM_UNWIND_INIT
8147 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
8148 #endif
8149
8150 ASM_OUTPUT_LABEL (asm_out_file, fname);
8151
8152 /* Output magic byte marker, if hot-patch attribute is set. */
8153 if (is_ms_hook)
8154 {
8155 if (TARGET_64BIT)
8156 {
8157 /* leaq [%rsp + 0], %rsp */
8158 asm_fprintf (asm_out_file, ASM_BYTE
8159 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
8160 }
8161 else
8162 {
8163 /* movl.s %edi, %edi
8164 push %ebp
8165 movl.s %esp, %ebp */
8166 asm_fprintf (asm_out_file, ASM_BYTE
8167 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
8168 }
8169 }
8170 }
8171
8172 /* regclass.c */
8173 extern void init_regs (void);
8174
8175 /* Implementation of call abi switching target hook. Specific to FNDECL
8176 the specific call register sets are set. See also
8177 ix86_conditional_register_usage for more details. */
8178 void
8179 ix86_call_abi_override (const_tree fndecl)
8180 {
8181 cfun->machine->call_abi = ix86_function_abi (fndecl);
8182 }
8183
8184 /* Return 1 if pseudo register should be created and used to hold
8185 GOT address for PIC code. */
8186 bool
8187 ix86_use_pseudo_pic_reg (void)
8188 {
8189 if ((TARGET_64BIT
8190 && (ix86_cmodel == CM_SMALL_PIC
8191 || TARGET_PECOFF))
8192 || !flag_pic)
8193 return false;
8194 return true;
8195 }
8196
8197 /* Initialize large model PIC register. */
8198
8199 static void
8200 ix86_init_large_pic_reg (unsigned int tmp_regno)
8201 {
8202 rtx_code_label *label;
8203 rtx tmp_reg;
8204
8205 gcc_assert (Pmode == DImode);
8206 label = gen_label_rtx ();
8207 emit_label (label);
8208 LABEL_PRESERVE_P (label) = 1;
8209 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
8210 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
8211 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
8212 label));
8213 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8214 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
8215 pic_offset_table_rtx, tmp_reg));
8216 }
8217
8218 /* Create and initialize PIC register if required. */
8219 static void
8220 ix86_init_pic_reg (void)
8221 {
8222 edge entry_edge;
8223 rtx_insn *seq;
8224
8225 if (!ix86_use_pseudo_pic_reg ())
8226 return;
8227
8228 start_sequence ();
8229
8230 if (TARGET_64BIT)
8231 {
8232 if (ix86_cmodel == CM_LARGE_PIC)
8233 ix86_init_large_pic_reg (R11_REG);
8234 else
8235 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8236 }
8237 else
8238 {
8239 /* If there is future mcount call in the function it is more profitable
8240 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
8241 rtx reg = crtl->profile
8242 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
8243 : pic_offset_table_rtx;
8244 rtx_insn *insn = emit_insn (gen_set_got (reg));
8245 RTX_FRAME_RELATED_P (insn) = 1;
8246 if (crtl->profile)
8247 emit_move_insn (pic_offset_table_rtx, reg);
8248 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8249 }
8250
8251 seq = get_insns ();
8252 end_sequence ();
8253
8254 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8255 insert_insn_on_edge (seq, entry_edge);
8256 commit_one_edge_insertion (entry_edge);
8257 }
8258
8259 /* Initialize a variable CUM of type CUMULATIVE_ARGS
8260 for a call to a function whose data type is FNTYPE.
8261 For a library call, FNTYPE is 0. */
8262
8263 void
8264 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
8265 tree fntype, /* tree ptr for function decl */
8266 rtx libname, /* SYMBOL_REF of library name or 0 */
8267 tree fndecl,
8268 int caller)
8269 {
8270 struct cgraph_local_info *i = NULL;
8271 struct cgraph_node *target = NULL;
8272
8273 memset (cum, 0, sizeof (*cum));
8274
8275 if (fndecl)
8276 {
8277 target = cgraph_node::get (fndecl);
8278 if (target)
8279 {
8280 target = target->function_symbol ();
8281 i = cgraph_node::local_info (target->decl);
8282 cum->call_abi = ix86_function_abi (target->decl);
8283 }
8284 else
8285 cum->call_abi = ix86_function_abi (fndecl);
8286 }
8287 else
8288 cum->call_abi = ix86_function_type_abi (fntype);
8289
8290 cum->caller = caller;
8291
8292 /* Set up the number of registers to use for passing arguments. */
8293 cum->nregs = ix86_regparm;
8294 if (TARGET_64BIT)
8295 {
8296 cum->nregs = (cum->call_abi == SYSV_ABI
8297 ? X86_64_REGPARM_MAX
8298 : X86_64_MS_REGPARM_MAX);
8299 }
8300 if (TARGET_SSE)
8301 {
8302 cum->sse_nregs = SSE_REGPARM_MAX;
8303 if (TARGET_64BIT)
8304 {
8305 cum->sse_nregs = (cum->call_abi == SYSV_ABI
8306 ? X86_64_SSE_REGPARM_MAX
8307 : X86_64_MS_SSE_REGPARM_MAX);
8308 }
8309 }
8310 if (TARGET_MMX)
8311 cum->mmx_nregs = MMX_REGPARM_MAX;
8312 cum->warn_avx512f = true;
8313 cum->warn_avx = true;
8314 cum->warn_sse = true;
8315 cum->warn_mmx = true;
8316
8317 /* Because type might mismatch in between caller and callee, we need to
8318 use actual type of function for local calls.
8319 FIXME: cgraph_analyze can be told to actually record if function uses
8320 va_start so for local functions maybe_vaarg can be made aggressive
8321 helping K&R code.
8322 FIXME: once typesytem is fixed, we won't need this code anymore. */
8323 if (i && i->local && i->can_change_signature)
8324 fntype = TREE_TYPE (target->decl);
8325 cum->stdarg = stdarg_p (fntype);
8326 cum->maybe_vaarg = (fntype
8327 ? (!prototype_p (fntype) || stdarg_p (fntype))
8328 : !libname);
8329
8330 cum->bnd_regno = FIRST_BND_REG;
8331 cum->bnds_in_bt = 0;
8332 cum->force_bnd_pass = 0;
8333 cum->decl = fndecl;
8334
8335 if (!TARGET_64BIT)
8336 {
8337 /* If there are variable arguments, then we won't pass anything
8338 in registers in 32-bit mode. */
8339 if (stdarg_p (fntype))
8340 {
8341 cum->nregs = 0;
8342 /* Since in 32-bit, variable arguments are always passed on
8343 stack, there is scratch register available for indirect
8344 sibcall. */
8345 cfun->machine->arg_reg_available = true;
8346 cum->sse_nregs = 0;
8347 cum->mmx_nregs = 0;
8348 cum->warn_avx512f = false;
8349 cum->warn_avx = false;
8350 cum->warn_sse = false;
8351 cum->warn_mmx = false;
8352 return;
8353 }
8354
8355 /* Use ecx and edx registers if function has fastcall attribute,
8356 else look for regparm information. */
8357 if (fntype)
8358 {
8359 unsigned int ccvt = ix86_get_callcvt (fntype);
8360 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
8361 {
8362 cum->nregs = 1;
8363 cum->fastcall = 1; /* Same first register as in fastcall. */
8364 }
8365 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
8366 {
8367 cum->nregs = 2;
8368 cum->fastcall = 1;
8369 }
8370 else
8371 cum->nregs = ix86_function_regparm (fntype, fndecl);
8372 }
8373
8374 /* Set up the number of SSE registers used for passing SFmode
8375 and DFmode arguments. Warn for mismatching ABI. */
8376 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
8377 }
8378
8379 cfun->machine->arg_reg_available = (cum->nregs > 0);
8380 }
8381
8382 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
8383 But in the case of vector types, it is some vector mode.
8384
8385 When we have only some of our vector isa extensions enabled, then there
8386 are some modes for which vector_mode_supported_p is false. For these
8387 modes, the generic vector support in gcc will choose some non-vector mode
8388 in order to implement the type. By computing the natural mode, we'll
8389 select the proper ABI location for the operand and not depend on whatever
8390 the middle-end decides to do with these vector types.
8391
8392 The midde-end can't deal with the vector types > 16 bytes. In this
8393 case, we return the original mode and warn ABI change if CUM isn't
8394 NULL.
8395
8396 If INT_RETURN is true, warn ABI change if the vector mode isn't
8397 available for function return value. */
8398
8399 static machine_mode
8400 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
8401 bool in_return)
8402 {
8403 machine_mode mode = TYPE_MODE (type);
8404
8405 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
8406 {
8407 HOST_WIDE_INT size = int_size_in_bytes (type);
8408 if ((size == 8 || size == 16 || size == 32 || size == 64)
8409 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
8410 && TYPE_VECTOR_SUBPARTS (type) > 1)
8411 {
8412 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
8413
8414 /* There are no XFmode vector modes. */
8415 if (innermode == XFmode)
8416 return mode;
8417
8418 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
8419 mode = MIN_MODE_VECTOR_FLOAT;
8420 else
8421 mode = MIN_MODE_VECTOR_INT;
8422
8423 /* Get the mode which has this inner mode and number of units. */
8424 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
8425 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
8426 && GET_MODE_INNER (mode) == innermode)
8427 {
8428 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
8429 {
8430 static bool warnedavx512f;
8431 static bool warnedavx512f_ret;
8432
8433 if (cum && cum->warn_avx512f && !warnedavx512f)
8434 {
8435 if (warning (OPT_Wpsabi, "AVX512F vector argument "
8436 "without AVX512F enabled changes the ABI"))
8437 warnedavx512f = true;
8438 }
8439 else if (in_return && !warnedavx512f_ret)
8440 {
8441 if (warning (OPT_Wpsabi, "AVX512F vector return "
8442 "without AVX512F enabled changes the ABI"))
8443 warnedavx512f_ret = true;
8444 }
8445
8446 return TYPE_MODE (type);
8447 }
8448 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
8449 {
8450 static bool warnedavx;
8451 static bool warnedavx_ret;
8452
8453 if (cum && cum->warn_avx && !warnedavx)
8454 {
8455 if (warning (OPT_Wpsabi, "AVX vector argument "
8456 "without AVX enabled changes the ABI"))
8457 warnedavx = true;
8458 }
8459 else if (in_return && !warnedavx_ret)
8460 {
8461 if (warning (OPT_Wpsabi, "AVX vector return "
8462 "without AVX enabled changes the ABI"))
8463 warnedavx_ret = true;
8464 }
8465
8466 return TYPE_MODE (type);
8467 }
8468 else if (((size == 8 && TARGET_64BIT) || size == 16)
8469 && !TARGET_SSE
8470 && !TARGET_IAMCU)
8471 {
8472 static bool warnedsse;
8473 static bool warnedsse_ret;
8474
8475 if (cum && cum->warn_sse && !warnedsse)
8476 {
8477 if (warning (OPT_Wpsabi, "SSE vector argument "
8478 "without SSE enabled changes the ABI"))
8479 warnedsse = true;
8480 }
8481 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
8482 {
8483 if (warning (OPT_Wpsabi, "SSE vector return "
8484 "without SSE enabled changes the ABI"))
8485 warnedsse_ret = true;
8486 }
8487 }
8488 else if ((size == 8 && !TARGET_64BIT)
8489 && (!cfun
8490 || cfun->machine->func_type == TYPE_NORMAL)
8491 && !TARGET_MMX
8492 && !TARGET_IAMCU)
8493 {
8494 static bool warnedmmx;
8495 static bool warnedmmx_ret;
8496
8497 if (cum && cum->warn_mmx && !warnedmmx)
8498 {
8499 if (warning (OPT_Wpsabi, "MMX vector argument "
8500 "without MMX enabled changes the ABI"))
8501 warnedmmx = true;
8502 }
8503 else if (in_return && !warnedmmx_ret)
8504 {
8505 if (warning (OPT_Wpsabi, "MMX vector return "
8506 "without MMX enabled changes the ABI"))
8507 warnedmmx_ret = true;
8508 }
8509 }
8510 return mode;
8511 }
8512
8513 gcc_unreachable ();
8514 }
8515 }
8516
8517 return mode;
8518 }
8519
8520 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
8521 this may not agree with the mode that the type system has chosen for the
8522 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
8523 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
8524
8525 static rtx
8526 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
8527 unsigned int regno)
8528 {
8529 rtx tmp;
8530
8531 if (orig_mode != BLKmode)
8532 tmp = gen_rtx_REG (orig_mode, regno);
8533 else
8534 {
8535 tmp = gen_rtx_REG (mode, regno);
8536 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
8537 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
8538 }
8539
8540 return tmp;
8541 }
8542
8543 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
8544 of this code is to classify each 8bytes of incoming argument by the register
8545 class and assign registers accordingly. */
8546
8547 /* Return the union class of CLASS1 and CLASS2.
8548 See the x86-64 PS ABI for details. */
8549
8550 static enum x86_64_reg_class
8551 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
8552 {
8553 /* Rule #1: If both classes are equal, this is the resulting class. */
8554 if (class1 == class2)
8555 return class1;
8556
8557 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
8558 the other class. */
8559 if (class1 == X86_64_NO_CLASS)
8560 return class2;
8561 if (class2 == X86_64_NO_CLASS)
8562 return class1;
8563
8564 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
8565 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
8566 return X86_64_MEMORY_CLASS;
8567
8568 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
8569 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
8570 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
8571 return X86_64_INTEGERSI_CLASS;
8572 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
8573 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
8574 return X86_64_INTEGER_CLASS;
8575
8576 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
8577 MEMORY is used. */
8578 if (class1 == X86_64_X87_CLASS
8579 || class1 == X86_64_X87UP_CLASS
8580 || class1 == X86_64_COMPLEX_X87_CLASS
8581 || class2 == X86_64_X87_CLASS
8582 || class2 == X86_64_X87UP_CLASS
8583 || class2 == X86_64_COMPLEX_X87_CLASS)
8584 return X86_64_MEMORY_CLASS;
8585
8586 /* Rule #6: Otherwise class SSE is used. */
8587 return X86_64_SSE_CLASS;
8588 }
8589
8590 /* Classify the argument of type TYPE and mode MODE.
8591 CLASSES will be filled by the register class used to pass each word
8592 of the operand. The number of words is returned. In case the parameter
8593 should be passed in memory, 0 is returned. As a special case for zero
8594 sized containers, classes[0] will be NO_CLASS and 1 is returned.
8595
8596 BIT_OFFSET is used internally for handling records and specifies offset
8597 of the offset in bits modulo 512 to avoid overflow cases.
8598
8599 See the x86-64 PS ABI for details.
8600 */
8601
8602 static int
8603 classify_argument (machine_mode mode, const_tree type,
8604 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
8605 {
8606 HOST_WIDE_INT bytes =
8607 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8608 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
8609
8610 /* Variable sized entities are always passed/returned in memory. */
8611 if (bytes < 0)
8612 return 0;
8613
8614 if (mode != VOIDmode
8615 && targetm.calls.must_pass_in_stack (mode, type))
8616 return 0;
8617
8618 if (type && AGGREGATE_TYPE_P (type))
8619 {
8620 int i;
8621 tree field;
8622 enum x86_64_reg_class subclasses[MAX_CLASSES];
8623
8624 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
8625 if (bytes > 64)
8626 return 0;
8627
8628 for (i = 0; i < words; i++)
8629 classes[i] = X86_64_NO_CLASS;
8630
8631 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
8632 signalize memory class, so handle it as special case. */
8633 if (!words)
8634 {
8635 classes[0] = X86_64_NO_CLASS;
8636 return 1;
8637 }
8638
8639 /* Classify each field of record and merge classes. */
8640 switch (TREE_CODE (type))
8641 {
8642 case RECORD_TYPE:
8643 /* And now merge the fields of structure. */
8644 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8645 {
8646 if (TREE_CODE (field) == FIELD_DECL)
8647 {
8648 int num;
8649
8650 if (TREE_TYPE (field) == error_mark_node)
8651 continue;
8652
8653 /* Bitfields are always classified as integer. Handle them
8654 early, since later code would consider them to be
8655 misaligned integers. */
8656 if (DECL_BIT_FIELD (field))
8657 {
8658 for (i = (int_bit_position (field)
8659 + (bit_offset % 64)) / 8 / 8;
8660 i < ((int_bit_position (field) + (bit_offset % 64))
8661 + tree_to_shwi (DECL_SIZE (field))
8662 + 63) / 8 / 8; i++)
8663 classes[i] =
8664 merge_classes (X86_64_INTEGER_CLASS,
8665 classes[i]);
8666 }
8667 else
8668 {
8669 int pos;
8670
8671 type = TREE_TYPE (field);
8672
8673 /* Flexible array member is ignored. */
8674 if (TYPE_MODE (type) == BLKmode
8675 && TREE_CODE (type) == ARRAY_TYPE
8676 && TYPE_SIZE (type) == NULL_TREE
8677 && TYPE_DOMAIN (type) != NULL_TREE
8678 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8679 == NULL_TREE))
8680 {
8681 static bool warned;
8682
8683 if (!warned && warn_psabi)
8684 {
8685 warned = true;
8686 inform (input_location,
8687 "the ABI of passing struct with"
8688 " a flexible array member has"
8689 " changed in GCC 4.4");
8690 }
8691 continue;
8692 }
8693 num = classify_argument (TYPE_MODE (type), type,
8694 subclasses,
8695 (int_bit_position (field)
8696 + bit_offset) % 512);
8697 if (!num)
8698 return 0;
8699 pos = (int_bit_position (field)
8700 + (bit_offset % 64)) / 8 / 8;
8701 for (i = 0; i < num && (i + pos) < words; i++)
8702 classes[i + pos] =
8703 merge_classes (subclasses[i], classes[i + pos]);
8704 }
8705 }
8706 }
8707 break;
8708
8709 case ARRAY_TYPE:
8710 /* Arrays are handled as small records. */
8711 {
8712 int num;
8713 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8714 TREE_TYPE (type), subclasses, bit_offset);
8715 if (!num)
8716 return 0;
8717
8718 /* The partial classes are now full classes. */
8719 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8720 subclasses[0] = X86_64_SSE_CLASS;
8721 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8722 && !((bit_offset % 64) == 0 && bytes == 4))
8723 subclasses[0] = X86_64_INTEGER_CLASS;
8724
8725 for (i = 0; i < words; i++)
8726 classes[i] = subclasses[i % num];
8727
8728 break;
8729 }
8730 case UNION_TYPE:
8731 case QUAL_UNION_TYPE:
8732 /* Unions are similar to RECORD_TYPE but offset is always 0.
8733 */
8734 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8735 {
8736 if (TREE_CODE (field) == FIELD_DECL)
8737 {
8738 int num;
8739
8740 if (TREE_TYPE (field) == error_mark_node)
8741 continue;
8742
8743 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8744 TREE_TYPE (field), subclasses,
8745 bit_offset);
8746 if (!num)
8747 return 0;
8748 for (i = 0; i < num && i < words; i++)
8749 classes[i] = merge_classes (subclasses[i], classes[i]);
8750 }
8751 }
8752 break;
8753
8754 default:
8755 gcc_unreachable ();
8756 }
8757
8758 if (words > 2)
8759 {
8760 /* When size > 16 bytes, if the first one isn't
8761 X86_64_SSE_CLASS or any other ones aren't
8762 X86_64_SSEUP_CLASS, everything should be passed in
8763 memory. */
8764 if (classes[0] != X86_64_SSE_CLASS)
8765 return 0;
8766
8767 for (i = 1; i < words; i++)
8768 if (classes[i] != X86_64_SSEUP_CLASS)
8769 return 0;
8770 }
8771
8772 /* Final merger cleanup. */
8773 for (i = 0; i < words; i++)
8774 {
8775 /* If one class is MEMORY, everything should be passed in
8776 memory. */
8777 if (classes[i] == X86_64_MEMORY_CLASS)
8778 return 0;
8779
8780 /* The X86_64_SSEUP_CLASS should be always preceded by
8781 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8782 if (classes[i] == X86_64_SSEUP_CLASS
8783 && classes[i - 1] != X86_64_SSE_CLASS
8784 && classes[i - 1] != X86_64_SSEUP_CLASS)
8785 {
8786 /* The first one should never be X86_64_SSEUP_CLASS. */
8787 gcc_assert (i != 0);
8788 classes[i] = X86_64_SSE_CLASS;
8789 }
8790
8791 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8792 everything should be passed in memory. */
8793 if (classes[i] == X86_64_X87UP_CLASS
8794 && (classes[i - 1] != X86_64_X87_CLASS))
8795 {
8796 static bool warned;
8797
8798 /* The first one should never be X86_64_X87UP_CLASS. */
8799 gcc_assert (i != 0);
8800 if (!warned && warn_psabi)
8801 {
8802 warned = true;
8803 inform (input_location,
8804 "the ABI of passing union with long double"
8805 " has changed in GCC 4.4");
8806 }
8807 return 0;
8808 }
8809 }
8810 return words;
8811 }
8812
8813 /* Compute alignment needed. We align all types to natural boundaries with
8814 exception of XFmode that is aligned to 64bits. */
8815 if (mode != VOIDmode && mode != BLKmode)
8816 {
8817 int mode_alignment = GET_MODE_BITSIZE (mode);
8818
8819 if (mode == XFmode)
8820 mode_alignment = 128;
8821 else if (mode == XCmode)
8822 mode_alignment = 256;
8823 if (COMPLEX_MODE_P (mode))
8824 mode_alignment /= 2;
8825 /* Misaligned fields are always returned in memory. */
8826 if (bit_offset % mode_alignment)
8827 return 0;
8828 }
8829
8830 /* for V1xx modes, just use the base mode */
8831 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8832 && GET_MODE_UNIT_SIZE (mode) == bytes)
8833 mode = GET_MODE_INNER (mode);
8834
8835 /* Classification of atomic types. */
8836 switch (mode)
8837 {
8838 case SDmode:
8839 case DDmode:
8840 classes[0] = X86_64_SSE_CLASS;
8841 return 1;
8842 case TDmode:
8843 classes[0] = X86_64_SSE_CLASS;
8844 classes[1] = X86_64_SSEUP_CLASS;
8845 return 2;
8846 case DImode:
8847 case SImode:
8848 case HImode:
8849 case QImode:
8850 case CSImode:
8851 case CHImode:
8852 case CQImode:
8853 {
8854 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8855
8856 /* Analyze last 128 bits only. */
8857 size = (size - 1) & 0x7f;
8858
8859 if (size < 32)
8860 {
8861 classes[0] = X86_64_INTEGERSI_CLASS;
8862 return 1;
8863 }
8864 else if (size < 64)
8865 {
8866 classes[0] = X86_64_INTEGER_CLASS;
8867 return 1;
8868 }
8869 else if (size < 64+32)
8870 {
8871 classes[0] = X86_64_INTEGER_CLASS;
8872 classes[1] = X86_64_INTEGERSI_CLASS;
8873 return 2;
8874 }
8875 else if (size < 64+64)
8876 {
8877 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8878 return 2;
8879 }
8880 else
8881 gcc_unreachable ();
8882 }
8883 case CDImode:
8884 case TImode:
8885 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8886 return 2;
8887 case COImode:
8888 case OImode:
8889 /* OImode shouldn't be used directly. */
8890 gcc_unreachable ();
8891 case CTImode:
8892 return 0;
8893 case SFmode:
8894 if (!(bit_offset % 64))
8895 classes[0] = X86_64_SSESF_CLASS;
8896 else
8897 classes[0] = X86_64_SSE_CLASS;
8898 return 1;
8899 case DFmode:
8900 classes[0] = X86_64_SSEDF_CLASS;
8901 return 1;
8902 case XFmode:
8903 classes[0] = X86_64_X87_CLASS;
8904 classes[1] = X86_64_X87UP_CLASS;
8905 return 2;
8906 case TFmode:
8907 classes[0] = X86_64_SSE_CLASS;
8908 classes[1] = X86_64_SSEUP_CLASS;
8909 return 2;
8910 case SCmode:
8911 classes[0] = X86_64_SSE_CLASS;
8912 if (!(bit_offset % 64))
8913 return 1;
8914 else
8915 {
8916 static bool warned;
8917
8918 if (!warned && warn_psabi)
8919 {
8920 warned = true;
8921 inform (input_location,
8922 "the ABI of passing structure with complex float"
8923 " member has changed in GCC 4.4");
8924 }
8925 classes[1] = X86_64_SSESF_CLASS;
8926 return 2;
8927 }
8928 case DCmode:
8929 classes[0] = X86_64_SSEDF_CLASS;
8930 classes[1] = X86_64_SSEDF_CLASS;
8931 return 2;
8932 case XCmode:
8933 classes[0] = X86_64_COMPLEX_X87_CLASS;
8934 return 1;
8935 case TCmode:
8936 /* This modes is larger than 16 bytes. */
8937 return 0;
8938 case V8SFmode:
8939 case V8SImode:
8940 case V32QImode:
8941 case V16HImode:
8942 case V4DFmode:
8943 case V4DImode:
8944 classes[0] = X86_64_SSE_CLASS;
8945 classes[1] = X86_64_SSEUP_CLASS;
8946 classes[2] = X86_64_SSEUP_CLASS;
8947 classes[3] = X86_64_SSEUP_CLASS;
8948 return 4;
8949 case V8DFmode:
8950 case V16SFmode:
8951 case V8DImode:
8952 case V16SImode:
8953 case V32HImode:
8954 case V64QImode:
8955 classes[0] = X86_64_SSE_CLASS;
8956 classes[1] = X86_64_SSEUP_CLASS;
8957 classes[2] = X86_64_SSEUP_CLASS;
8958 classes[3] = X86_64_SSEUP_CLASS;
8959 classes[4] = X86_64_SSEUP_CLASS;
8960 classes[5] = X86_64_SSEUP_CLASS;
8961 classes[6] = X86_64_SSEUP_CLASS;
8962 classes[7] = X86_64_SSEUP_CLASS;
8963 return 8;
8964 case V4SFmode:
8965 case V4SImode:
8966 case V16QImode:
8967 case V8HImode:
8968 case V2DFmode:
8969 case V2DImode:
8970 classes[0] = X86_64_SSE_CLASS;
8971 classes[1] = X86_64_SSEUP_CLASS;
8972 return 2;
8973 case V1TImode:
8974 case V1DImode:
8975 case V2SFmode:
8976 case V2SImode:
8977 case V4HImode:
8978 case V8QImode:
8979 classes[0] = X86_64_SSE_CLASS;
8980 return 1;
8981 case BLKmode:
8982 case VOIDmode:
8983 return 0;
8984 default:
8985 gcc_assert (VECTOR_MODE_P (mode));
8986
8987 if (bytes > 16)
8988 return 0;
8989
8990 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8991
8992 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8993 classes[0] = X86_64_INTEGERSI_CLASS;
8994 else
8995 classes[0] = X86_64_INTEGER_CLASS;
8996 classes[1] = X86_64_INTEGER_CLASS;
8997 return 1 + (bytes > 8);
8998 }
8999 }
9000
9001 /* Examine the argument and return set number of register required in each
9002 class. Return true iff parameter should be passed in memory. */
9003
9004 static bool
9005 examine_argument (machine_mode mode, const_tree type, int in_return,
9006 int *int_nregs, int *sse_nregs)
9007 {
9008 enum x86_64_reg_class regclass[MAX_CLASSES];
9009 int n = classify_argument (mode, type, regclass, 0);
9010
9011 *int_nregs = 0;
9012 *sse_nregs = 0;
9013
9014 if (!n)
9015 return true;
9016 for (n--; n >= 0; n--)
9017 switch (regclass[n])
9018 {
9019 case X86_64_INTEGER_CLASS:
9020 case X86_64_INTEGERSI_CLASS:
9021 (*int_nregs)++;
9022 break;
9023 case X86_64_SSE_CLASS:
9024 case X86_64_SSESF_CLASS:
9025 case X86_64_SSEDF_CLASS:
9026 (*sse_nregs)++;
9027 break;
9028 case X86_64_NO_CLASS:
9029 case X86_64_SSEUP_CLASS:
9030 break;
9031 case X86_64_X87_CLASS:
9032 case X86_64_X87UP_CLASS:
9033 case X86_64_COMPLEX_X87_CLASS:
9034 if (!in_return)
9035 return true;
9036 break;
9037 case X86_64_MEMORY_CLASS:
9038 gcc_unreachable ();
9039 }
9040
9041 return false;
9042 }
9043
9044 /* Construct container for the argument used by GCC interface. See
9045 FUNCTION_ARG for the detailed description. */
9046
9047 static rtx
9048 construct_container (machine_mode mode, machine_mode orig_mode,
9049 const_tree type, int in_return, int nintregs, int nsseregs,
9050 const int *intreg, int sse_regno)
9051 {
9052 /* The following variables hold the static issued_error state. */
9053 static bool issued_sse_arg_error;
9054 static bool issued_sse_ret_error;
9055 static bool issued_x87_ret_error;
9056
9057 machine_mode tmpmode;
9058 int bytes =
9059 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
9060 enum x86_64_reg_class regclass[MAX_CLASSES];
9061 int n;
9062 int i;
9063 int nexps = 0;
9064 int needed_sseregs, needed_intregs;
9065 rtx exp[MAX_CLASSES];
9066 rtx ret;
9067
9068 n = classify_argument (mode, type, regclass, 0);
9069 if (!n)
9070 return NULL;
9071 if (examine_argument (mode, type, in_return, &needed_intregs,
9072 &needed_sseregs))
9073 return NULL;
9074 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
9075 return NULL;
9076
9077 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
9078 some less clueful developer tries to use floating-point anyway. */
9079 if (needed_sseregs && !TARGET_SSE)
9080 {
9081 if (in_return)
9082 {
9083 if (!issued_sse_ret_error)
9084 {
9085 error ("SSE register return with SSE disabled");
9086 issued_sse_ret_error = true;
9087 }
9088 }
9089 else if (!issued_sse_arg_error)
9090 {
9091 error ("SSE register argument with SSE disabled");
9092 issued_sse_arg_error = true;
9093 }
9094 return NULL;
9095 }
9096
9097 /* Likewise, error if the ABI requires us to return values in the
9098 x87 registers and the user specified -mno-80387. */
9099 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
9100 for (i = 0; i < n; i++)
9101 if (regclass[i] == X86_64_X87_CLASS
9102 || regclass[i] == X86_64_X87UP_CLASS
9103 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
9104 {
9105 if (!issued_x87_ret_error)
9106 {
9107 error ("x87 register return with x87 disabled");
9108 issued_x87_ret_error = true;
9109 }
9110 return NULL;
9111 }
9112
9113 /* First construct simple cases. Avoid SCmode, since we want to use
9114 single register to pass this type. */
9115 if (n == 1 && mode != SCmode)
9116 switch (regclass[0])
9117 {
9118 case X86_64_INTEGER_CLASS:
9119 case X86_64_INTEGERSI_CLASS:
9120 return gen_rtx_REG (mode, intreg[0]);
9121 case X86_64_SSE_CLASS:
9122 case X86_64_SSESF_CLASS:
9123 case X86_64_SSEDF_CLASS:
9124 if (mode != BLKmode)
9125 return gen_reg_or_parallel (mode, orig_mode,
9126 SSE_REGNO (sse_regno));
9127 break;
9128 case X86_64_X87_CLASS:
9129 case X86_64_COMPLEX_X87_CLASS:
9130 return gen_rtx_REG (mode, FIRST_STACK_REG);
9131 case X86_64_NO_CLASS:
9132 /* Zero sized array, struct or class. */
9133 return NULL;
9134 default:
9135 gcc_unreachable ();
9136 }
9137 if (n == 2
9138 && regclass[0] == X86_64_SSE_CLASS
9139 && regclass[1] == X86_64_SSEUP_CLASS
9140 && mode != BLKmode)
9141 return gen_reg_or_parallel (mode, orig_mode,
9142 SSE_REGNO (sse_regno));
9143 if (n == 4
9144 && regclass[0] == X86_64_SSE_CLASS
9145 && regclass[1] == X86_64_SSEUP_CLASS
9146 && regclass[2] == X86_64_SSEUP_CLASS
9147 && regclass[3] == X86_64_SSEUP_CLASS
9148 && mode != BLKmode)
9149 return gen_reg_or_parallel (mode, orig_mode,
9150 SSE_REGNO (sse_regno));
9151 if (n == 8
9152 && regclass[0] == X86_64_SSE_CLASS
9153 && regclass[1] == X86_64_SSEUP_CLASS
9154 && regclass[2] == X86_64_SSEUP_CLASS
9155 && regclass[3] == X86_64_SSEUP_CLASS
9156 && regclass[4] == X86_64_SSEUP_CLASS
9157 && regclass[5] == X86_64_SSEUP_CLASS
9158 && regclass[6] == X86_64_SSEUP_CLASS
9159 && regclass[7] == X86_64_SSEUP_CLASS
9160 && mode != BLKmode)
9161 return gen_reg_or_parallel (mode, orig_mode,
9162 SSE_REGNO (sse_regno));
9163 if (n == 2
9164 && regclass[0] == X86_64_X87_CLASS
9165 && regclass[1] == X86_64_X87UP_CLASS)
9166 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
9167
9168 if (n == 2
9169 && regclass[0] == X86_64_INTEGER_CLASS
9170 && regclass[1] == X86_64_INTEGER_CLASS
9171 && (mode == CDImode || mode == TImode)
9172 && intreg[0] + 1 == intreg[1])
9173 return gen_rtx_REG (mode, intreg[0]);
9174
9175 /* Otherwise figure out the entries of the PARALLEL. */
9176 for (i = 0; i < n; i++)
9177 {
9178 int pos;
9179
9180 switch (regclass[i])
9181 {
9182 case X86_64_NO_CLASS:
9183 break;
9184 case X86_64_INTEGER_CLASS:
9185 case X86_64_INTEGERSI_CLASS:
9186 /* Merge TImodes on aligned occasions here too. */
9187 if (i * 8 + 8 > bytes)
9188 tmpmode
9189 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
9190 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
9191 tmpmode = SImode;
9192 else
9193 tmpmode = DImode;
9194 /* We've requested 24 bytes we
9195 don't have mode for. Use DImode. */
9196 if (tmpmode == BLKmode)
9197 tmpmode = DImode;
9198 exp [nexps++]
9199 = gen_rtx_EXPR_LIST (VOIDmode,
9200 gen_rtx_REG (tmpmode, *intreg),
9201 GEN_INT (i*8));
9202 intreg++;
9203 break;
9204 case X86_64_SSESF_CLASS:
9205 exp [nexps++]
9206 = gen_rtx_EXPR_LIST (VOIDmode,
9207 gen_rtx_REG (SFmode,
9208 SSE_REGNO (sse_regno)),
9209 GEN_INT (i*8));
9210 sse_regno++;
9211 break;
9212 case X86_64_SSEDF_CLASS:
9213 exp [nexps++]
9214 = gen_rtx_EXPR_LIST (VOIDmode,
9215 gen_rtx_REG (DFmode,
9216 SSE_REGNO (sse_regno)),
9217 GEN_INT (i*8));
9218 sse_regno++;
9219 break;
9220 case X86_64_SSE_CLASS:
9221 pos = i;
9222 switch (n)
9223 {
9224 case 1:
9225 tmpmode = DImode;
9226 break;
9227 case 2:
9228 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
9229 {
9230 tmpmode = TImode;
9231 i++;
9232 }
9233 else
9234 tmpmode = DImode;
9235 break;
9236 case 4:
9237 gcc_assert (i == 0
9238 && regclass[1] == X86_64_SSEUP_CLASS
9239 && regclass[2] == X86_64_SSEUP_CLASS
9240 && regclass[3] == X86_64_SSEUP_CLASS);
9241 tmpmode = OImode;
9242 i += 3;
9243 break;
9244 case 8:
9245 gcc_assert (i == 0
9246 && regclass[1] == X86_64_SSEUP_CLASS
9247 && regclass[2] == X86_64_SSEUP_CLASS
9248 && regclass[3] == X86_64_SSEUP_CLASS
9249 && regclass[4] == X86_64_SSEUP_CLASS
9250 && regclass[5] == X86_64_SSEUP_CLASS
9251 && regclass[6] == X86_64_SSEUP_CLASS
9252 && regclass[7] == X86_64_SSEUP_CLASS);
9253 tmpmode = XImode;
9254 i += 7;
9255 break;
9256 default:
9257 gcc_unreachable ();
9258 }
9259 exp [nexps++]
9260 = gen_rtx_EXPR_LIST (VOIDmode,
9261 gen_rtx_REG (tmpmode,
9262 SSE_REGNO (sse_regno)),
9263 GEN_INT (pos*8));
9264 sse_regno++;
9265 break;
9266 default:
9267 gcc_unreachable ();
9268 }
9269 }
9270
9271 /* Empty aligned struct, union or class. */
9272 if (nexps == 0)
9273 return NULL;
9274
9275 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
9276 for (i = 0; i < nexps; i++)
9277 XVECEXP (ret, 0, i) = exp [i];
9278 return ret;
9279 }
9280
9281 /* Update the data in CUM to advance over an argument of mode MODE
9282 and data type TYPE. (TYPE is null for libcalls where that information
9283 may not be available.)
9284
9285 Return a number of integer regsiters advanced over. */
9286
9287 static int
9288 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
9289 const_tree type, HOST_WIDE_INT bytes,
9290 HOST_WIDE_INT words)
9291 {
9292 int res = 0;
9293 bool error_p = NULL;
9294
9295 if (TARGET_IAMCU)
9296 {
9297 /* Intel MCU psABI passes scalars and aggregates no larger than 8
9298 bytes in registers. */
9299 if (!VECTOR_MODE_P (mode) && bytes <= 8)
9300 goto pass_in_reg;
9301 return res;
9302 }
9303
9304 switch (mode)
9305 {
9306 default:
9307 break;
9308
9309 case BLKmode:
9310 if (bytes < 0)
9311 break;
9312 /* FALLTHRU */
9313
9314 case DImode:
9315 case SImode:
9316 case HImode:
9317 case QImode:
9318 pass_in_reg:
9319 cum->words += words;
9320 cum->nregs -= words;
9321 cum->regno += words;
9322 if (cum->nregs >= 0)
9323 res = words;
9324 if (cum->nregs <= 0)
9325 {
9326 cum->nregs = 0;
9327 cfun->machine->arg_reg_available = false;
9328 cum->regno = 0;
9329 }
9330 break;
9331
9332 case OImode:
9333 /* OImode shouldn't be used directly. */
9334 gcc_unreachable ();
9335
9336 case DFmode:
9337 if (cum->float_in_sse == -1)
9338 error_p = 1;
9339 if (cum->float_in_sse < 2)
9340 break;
9341 /* FALLTHRU */
9342 case SFmode:
9343 if (cum->float_in_sse == -1)
9344 error_p = 1;
9345 if (cum->float_in_sse < 1)
9346 break;
9347 /* FALLTHRU */
9348
9349 case V8SFmode:
9350 case V8SImode:
9351 case V64QImode:
9352 case V32HImode:
9353 case V16SImode:
9354 case V8DImode:
9355 case V16SFmode:
9356 case V8DFmode:
9357 case V32QImode:
9358 case V16HImode:
9359 case V4DFmode:
9360 case V4DImode:
9361 case TImode:
9362 case V16QImode:
9363 case V8HImode:
9364 case V4SImode:
9365 case V2DImode:
9366 case V4SFmode:
9367 case V2DFmode:
9368 if (!type || !AGGREGATE_TYPE_P (type))
9369 {
9370 cum->sse_words += words;
9371 cum->sse_nregs -= 1;
9372 cum->sse_regno += 1;
9373 if (cum->sse_nregs <= 0)
9374 {
9375 cum->sse_nregs = 0;
9376 cum->sse_regno = 0;
9377 }
9378 }
9379 break;
9380
9381 case V8QImode:
9382 case V4HImode:
9383 case V2SImode:
9384 case V2SFmode:
9385 case V1TImode:
9386 case V1DImode:
9387 if (!type || !AGGREGATE_TYPE_P (type))
9388 {
9389 cum->mmx_words += words;
9390 cum->mmx_nregs -= 1;
9391 cum->mmx_regno += 1;
9392 if (cum->mmx_nregs <= 0)
9393 {
9394 cum->mmx_nregs = 0;
9395 cum->mmx_regno = 0;
9396 }
9397 }
9398 break;
9399 }
9400 if (error_p)
9401 {
9402 cum->float_in_sse = 0;
9403 error ("calling %qD with SSE calling convention without "
9404 "SSE/SSE2 enabled", cum->decl);
9405 sorry ("this is a GCC bug that can be worked around by adding "
9406 "attribute used to function called");
9407 }
9408
9409 return res;
9410 }
9411
9412 static int
9413 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
9414 const_tree type, HOST_WIDE_INT words, bool named)
9415 {
9416 int int_nregs, sse_nregs;
9417
9418 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
9419 if (!named && (VALID_AVX512F_REG_MODE (mode)
9420 || VALID_AVX256_REG_MODE (mode)))
9421 return 0;
9422
9423 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
9424 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
9425 {
9426 cum->nregs -= int_nregs;
9427 cum->sse_nregs -= sse_nregs;
9428 cum->regno += int_nregs;
9429 cum->sse_regno += sse_nregs;
9430 return int_nregs;
9431 }
9432 else
9433 {
9434 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
9435 cum->words = ROUND_UP (cum->words, align);
9436 cum->words += words;
9437 return 0;
9438 }
9439 }
9440
9441 static int
9442 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
9443 HOST_WIDE_INT words)
9444 {
9445 /* Otherwise, this should be passed indirect. */
9446 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
9447
9448 cum->words += words;
9449 if (cum->nregs > 0)
9450 {
9451 cum->nregs -= 1;
9452 cum->regno += 1;
9453 return 1;
9454 }
9455 return 0;
9456 }
9457
9458 /* Update the data in CUM to advance over an argument of mode MODE and
9459 data type TYPE. (TYPE is null for libcalls where that information
9460 may not be available.) */
9461
9462 static void
9463 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9464 const_tree type, bool named)
9465 {
9466 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9467 HOST_WIDE_INT bytes, words;
9468 int nregs;
9469
9470 /* The argument of interrupt handler is a special case and is
9471 handled in ix86_function_arg. */
9472 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
9473 return;
9474
9475 if (mode == BLKmode)
9476 bytes = int_size_in_bytes (type);
9477 else
9478 bytes = GET_MODE_SIZE (mode);
9479 words = CEIL (bytes, UNITS_PER_WORD);
9480
9481 if (type)
9482 mode = type_natural_mode (type, NULL, false);
9483
9484 if ((type && POINTER_BOUNDS_TYPE_P (type))
9485 || POINTER_BOUNDS_MODE_P (mode))
9486 {
9487 /* If we pass bounds in BT then just update remained bounds count. */
9488 if (cum->bnds_in_bt)
9489 {
9490 cum->bnds_in_bt--;
9491 return;
9492 }
9493
9494 /* Update remained number of bounds to force. */
9495 if (cum->force_bnd_pass)
9496 cum->force_bnd_pass--;
9497
9498 cum->bnd_regno++;
9499
9500 return;
9501 }
9502
9503 /* The first arg not going to Bounds Tables resets this counter. */
9504 cum->bnds_in_bt = 0;
9505 /* For unnamed args we always pass bounds to avoid bounds mess when
9506 passed and received types do not match. If bounds do not follow
9507 unnamed arg, still pretend required number of bounds were passed. */
9508 if (cum->force_bnd_pass)
9509 {
9510 cum->bnd_regno += cum->force_bnd_pass;
9511 cum->force_bnd_pass = 0;
9512 }
9513
9514 if (TARGET_64BIT)
9515 {
9516 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9517
9518 if (call_abi == MS_ABI)
9519 nregs = function_arg_advance_ms_64 (cum, bytes, words);
9520 else
9521 nregs = function_arg_advance_64 (cum, mode, type, words, named);
9522 }
9523 else
9524 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
9525
9526 /* For stdarg we expect bounds to be passed for each value passed
9527 in register. */
9528 if (cum->stdarg)
9529 cum->force_bnd_pass = nregs;
9530 /* For pointers passed in memory we expect bounds passed in Bounds
9531 Table. */
9532 if (!nregs)
9533 cum->bnds_in_bt = chkp_type_bounds_count (type);
9534 }
9535
9536 /* Define where to put the arguments to a function.
9537 Value is zero to push the argument on the stack,
9538 or a hard register in which to store the argument.
9539
9540 MODE is the argument's machine mode.
9541 TYPE is the data type of the argument (as a tree).
9542 This is null for libcalls where that information may
9543 not be available.
9544 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9545 the preceding args and about the function being called.
9546 NAMED is nonzero if this argument is a named parameter
9547 (otherwise it is an extra parameter matching an ellipsis). */
9548
9549 static rtx
9550 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
9551 machine_mode orig_mode, const_tree type,
9552 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
9553 {
9554 bool error_p = false;
9555 /* Avoid the AL settings for the Unix64 ABI. */
9556 if (mode == VOIDmode)
9557 return constm1_rtx;
9558
9559 if (TARGET_IAMCU)
9560 {
9561 /* Intel MCU psABI passes scalars and aggregates no larger than 8
9562 bytes in registers. */
9563 if (!VECTOR_MODE_P (mode) && bytes <= 8)
9564 goto pass_in_reg;
9565 return NULL_RTX;
9566 }
9567
9568 switch (mode)
9569 {
9570 default:
9571 break;
9572
9573 case BLKmode:
9574 if (bytes < 0)
9575 break;
9576 /* FALLTHRU */
9577 case DImode:
9578 case SImode:
9579 case HImode:
9580 case QImode:
9581 pass_in_reg:
9582 if (words <= cum->nregs)
9583 {
9584 int regno = cum->regno;
9585
9586 /* Fastcall allocates the first two DWORD (SImode) or
9587 smaller arguments to ECX and EDX if it isn't an
9588 aggregate type . */
9589 if (cum->fastcall)
9590 {
9591 if (mode == BLKmode
9592 || mode == DImode
9593 || (type && AGGREGATE_TYPE_P (type)))
9594 break;
9595
9596 /* ECX not EAX is the first allocated register. */
9597 if (regno == AX_REG)
9598 regno = CX_REG;
9599 }
9600 return gen_rtx_REG (mode, regno);
9601 }
9602 break;
9603
9604 case DFmode:
9605 if (cum->float_in_sse == -1)
9606 error_p = 1;
9607 if (cum->float_in_sse < 2)
9608 break;
9609 /* FALLTHRU */
9610 case SFmode:
9611 if (cum->float_in_sse == -1)
9612 error_p = 1;
9613 if (cum->float_in_sse < 1)
9614 break;
9615 /* FALLTHRU */
9616 case TImode:
9617 /* In 32bit, we pass TImode in xmm registers. */
9618 case V16QImode:
9619 case V8HImode:
9620 case V4SImode:
9621 case V2DImode:
9622 case V4SFmode:
9623 case V2DFmode:
9624 if (!type || !AGGREGATE_TYPE_P (type))
9625 {
9626 if (cum->sse_nregs)
9627 return gen_reg_or_parallel (mode, orig_mode,
9628 cum->sse_regno + FIRST_SSE_REG);
9629 }
9630 break;
9631
9632 case OImode:
9633 case XImode:
9634 /* OImode and XImode shouldn't be used directly. */
9635 gcc_unreachable ();
9636
9637 case V64QImode:
9638 case V32HImode:
9639 case V16SImode:
9640 case V8DImode:
9641 case V16SFmode:
9642 case V8DFmode:
9643 case V8SFmode:
9644 case V8SImode:
9645 case V32QImode:
9646 case V16HImode:
9647 case V4DFmode:
9648 case V4DImode:
9649 if (!type || !AGGREGATE_TYPE_P (type))
9650 {
9651 if (cum->sse_nregs)
9652 return gen_reg_or_parallel (mode, orig_mode,
9653 cum->sse_regno + FIRST_SSE_REG);
9654 }
9655 break;
9656
9657 case V8QImode:
9658 case V4HImode:
9659 case V2SImode:
9660 case V2SFmode:
9661 case V1TImode:
9662 case V1DImode:
9663 if (!type || !AGGREGATE_TYPE_P (type))
9664 {
9665 if (cum->mmx_nregs)
9666 return gen_reg_or_parallel (mode, orig_mode,
9667 cum->mmx_regno + FIRST_MMX_REG);
9668 }
9669 break;
9670 }
9671 if (error_p)
9672 {
9673 cum->float_in_sse = 0;
9674 error ("calling %qD with SSE calling convention without "
9675 "SSE/SSE2 enabled", cum->decl);
9676 sorry ("this is a GCC bug that can be worked around by adding "
9677 "attribute used to function called");
9678 }
9679
9680 return NULL_RTX;
9681 }
9682
9683 static rtx
9684 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9685 machine_mode orig_mode, const_tree type, bool named)
9686 {
9687 /* Handle a hidden AL argument containing number of registers
9688 for varargs x86-64 functions. */
9689 if (mode == VOIDmode)
9690 return GEN_INT (cum->maybe_vaarg
9691 ? (cum->sse_nregs < 0
9692 ? X86_64_SSE_REGPARM_MAX
9693 : cum->sse_regno)
9694 : -1);
9695
9696 switch (mode)
9697 {
9698 default:
9699 break;
9700
9701 case V8SFmode:
9702 case V8SImode:
9703 case V32QImode:
9704 case V16HImode:
9705 case V4DFmode:
9706 case V4DImode:
9707 case V16SFmode:
9708 case V16SImode:
9709 case V64QImode:
9710 case V32HImode:
9711 case V8DFmode:
9712 case V8DImode:
9713 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9714 if (!named)
9715 return NULL;
9716 break;
9717 }
9718
9719 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9720 cum->sse_nregs,
9721 &x86_64_int_parameter_registers [cum->regno],
9722 cum->sse_regno);
9723 }
9724
9725 static rtx
9726 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9727 machine_mode orig_mode, bool named,
9728 HOST_WIDE_INT bytes)
9729 {
9730 unsigned int regno;
9731
9732 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9733 We use value of -2 to specify that current function call is MSABI. */
9734 if (mode == VOIDmode)
9735 return GEN_INT (-2);
9736
9737 /* If we've run out of registers, it goes on the stack. */
9738 if (cum->nregs == 0)
9739 return NULL_RTX;
9740
9741 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9742
9743 /* Only floating point modes are passed in anything but integer regs. */
9744 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9745 {
9746 if (named)
9747 regno = cum->regno + FIRST_SSE_REG;
9748 else
9749 {
9750 rtx t1, t2;
9751
9752 /* Unnamed floating parameters are passed in both the
9753 SSE and integer registers. */
9754 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9755 t2 = gen_rtx_REG (mode, regno);
9756 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9757 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9758 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9759 }
9760 }
9761 /* Handle aggregated types passed in register. */
9762 if (orig_mode == BLKmode)
9763 {
9764 if (bytes > 0 && bytes <= 8)
9765 mode = (bytes > 4 ? DImode : SImode);
9766 if (mode == BLKmode)
9767 mode = DImode;
9768 }
9769
9770 return gen_reg_or_parallel (mode, orig_mode, regno);
9771 }
9772
9773 /* Return where to put the arguments to a function.
9774 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9775
9776 MODE is the argument's machine mode. TYPE is the data type of the
9777 argument. It is null for libcalls where that information may not be
9778 available. CUM gives information about the preceding args and about
9779 the function being called. NAMED is nonzero if this argument is a
9780 named parameter (otherwise it is an extra parameter matching an
9781 ellipsis). */
9782
9783 static rtx
9784 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9785 const_tree type, bool named)
9786 {
9787 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9788 machine_mode mode = omode;
9789 HOST_WIDE_INT bytes, words;
9790 rtx arg;
9791
9792 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
9793 {
9794 gcc_assert (type != NULL_TREE);
9795 if (POINTER_TYPE_P (type))
9796 {
9797 /* This is the pointer argument. */
9798 gcc_assert (TYPE_MODE (type) == Pmode);
9799 if (cfun->machine->func_type == TYPE_INTERRUPT)
9800 /* -WORD(AP) in the current frame in interrupt handler. */
9801 arg = plus_constant (Pmode, arg_pointer_rtx,
9802 -UNITS_PER_WORD);
9803 else
9804 /* (AP) in the current frame in exception handler. */
9805 arg = arg_pointer_rtx;
9806 }
9807 else
9808 {
9809 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
9810 && TREE_CODE (type) == INTEGER_TYPE
9811 && TYPE_MODE (type) == word_mode);
9812 /* The integer argument is the error code at -WORD(AP) in
9813 the current frame in exception handler. */
9814 arg = gen_rtx_MEM (word_mode,
9815 plus_constant (Pmode,
9816 arg_pointer_rtx,
9817 -UNITS_PER_WORD));
9818 }
9819 return arg;
9820 }
9821
9822 /* All pointer bounds arguments are handled separately here. */
9823 if ((type && POINTER_BOUNDS_TYPE_P (type))
9824 || POINTER_BOUNDS_MODE_P (mode))
9825 {
9826 /* Return NULL if bounds are forced to go in Bounds Table. */
9827 if (cum->bnds_in_bt)
9828 arg = NULL;
9829 /* Return the next available bound reg if any. */
9830 else if (cum->bnd_regno <= LAST_BND_REG)
9831 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9832 /* Return the next special slot number otherwise. */
9833 else
9834 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9835
9836 return arg;
9837 }
9838
9839 if (mode == BLKmode)
9840 bytes = int_size_in_bytes (type);
9841 else
9842 bytes = GET_MODE_SIZE (mode);
9843 words = CEIL (bytes, UNITS_PER_WORD);
9844
9845 /* To simplify the code below, represent vector types with a vector mode
9846 even if MMX/SSE are not active. */
9847 if (type && TREE_CODE (type) == VECTOR_TYPE)
9848 mode = type_natural_mode (type, cum, false);
9849
9850 if (TARGET_64BIT)
9851 {
9852 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9853
9854 if (call_abi == MS_ABI)
9855 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9856 else
9857 arg = function_arg_64 (cum, mode, omode, type, named);
9858 }
9859 else
9860 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9861
9862 return arg;
9863 }
9864
9865 /* A C expression that indicates when an argument must be passed by
9866 reference. If nonzero for an argument, a copy of that argument is
9867 made in memory and a pointer to the argument is passed instead of
9868 the argument itself. The pointer is passed in whatever way is
9869 appropriate for passing a pointer to that type. */
9870
9871 static bool
9872 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9873 const_tree type, bool)
9874 {
9875 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9876
9877 /* Bounds are never passed by reference. */
9878 if ((type && POINTER_BOUNDS_TYPE_P (type))
9879 || POINTER_BOUNDS_MODE_P (mode))
9880 return false;
9881
9882 if (TARGET_64BIT)
9883 {
9884 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9885
9886 /* See Windows x64 Software Convention. */
9887 if (call_abi == MS_ABI)
9888 {
9889 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9890
9891 if (type)
9892 {
9893 /* Arrays are passed by reference. */
9894 if (TREE_CODE (type) == ARRAY_TYPE)
9895 return true;
9896
9897 if (RECORD_OR_UNION_TYPE_P (type))
9898 {
9899 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9900 are passed by reference. */
9901 msize = int_size_in_bytes (type);
9902 }
9903 }
9904
9905 /* __m128 is passed by reference. */
9906 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9907 }
9908 else if (type && int_size_in_bytes (type) == -1)
9909 return true;
9910 }
9911
9912 return false;
9913 }
9914
9915 /* Return true when TYPE should be 128bit aligned for 32bit argument
9916 passing ABI. XXX: This function is obsolete and is only used for
9917 checking psABI compatibility with previous versions of GCC. */
9918
9919 static bool
9920 ix86_compat_aligned_value_p (const_tree type)
9921 {
9922 machine_mode mode = TYPE_MODE (type);
9923 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9924 || mode == TDmode
9925 || mode == TFmode
9926 || mode == TCmode)
9927 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9928 return true;
9929 if (TYPE_ALIGN (type) < 128)
9930 return false;
9931
9932 if (AGGREGATE_TYPE_P (type))
9933 {
9934 /* Walk the aggregates recursively. */
9935 switch (TREE_CODE (type))
9936 {
9937 case RECORD_TYPE:
9938 case UNION_TYPE:
9939 case QUAL_UNION_TYPE:
9940 {
9941 tree field;
9942
9943 /* Walk all the structure fields. */
9944 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9945 {
9946 if (TREE_CODE (field) == FIELD_DECL
9947 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9948 return true;
9949 }
9950 break;
9951 }
9952
9953 case ARRAY_TYPE:
9954 /* Just for use if some languages passes arrays by value. */
9955 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9956 return true;
9957 break;
9958
9959 default:
9960 gcc_unreachable ();
9961 }
9962 }
9963 return false;
9964 }
9965
9966 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9967 XXX: This function is obsolete and is only used for checking psABI
9968 compatibility with previous versions of GCC. */
9969
9970 static unsigned int
9971 ix86_compat_function_arg_boundary (machine_mode mode,
9972 const_tree type, unsigned int align)
9973 {
9974 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9975 natural boundaries. */
9976 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9977 {
9978 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9979 make an exception for SSE modes since these require 128bit
9980 alignment.
9981
9982 The handling here differs from field_alignment. ICC aligns MMX
9983 arguments to 4 byte boundaries, while structure fields are aligned
9984 to 8 byte boundaries. */
9985 if (!type)
9986 {
9987 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9988 align = PARM_BOUNDARY;
9989 }
9990 else
9991 {
9992 if (!ix86_compat_aligned_value_p (type))
9993 align = PARM_BOUNDARY;
9994 }
9995 }
9996 if (align > BIGGEST_ALIGNMENT)
9997 align = BIGGEST_ALIGNMENT;
9998 return align;
9999 }
10000
10001 /* Return true when TYPE should be 128bit aligned for 32bit argument
10002 passing ABI. */
10003
10004 static bool
10005 ix86_contains_aligned_value_p (const_tree type)
10006 {
10007 machine_mode mode = TYPE_MODE (type);
10008
10009 if (mode == XFmode || mode == XCmode)
10010 return false;
10011
10012 if (TYPE_ALIGN (type) < 128)
10013 return false;
10014
10015 if (AGGREGATE_TYPE_P (type))
10016 {
10017 /* Walk the aggregates recursively. */
10018 switch (TREE_CODE (type))
10019 {
10020 case RECORD_TYPE:
10021 case UNION_TYPE:
10022 case QUAL_UNION_TYPE:
10023 {
10024 tree field;
10025
10026 /* Walk all the structure fields. */
10027 for (field = TYPE_FIELDS (type);
10028 field;
10029 field = DECL_CHAIN (field))
10030 {
10031 if (TREE_CODE (field) == FIELD_DECL
10032 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
10033 return true;
10034 }
10035 break;
10036 }
10037
10038 case ARRAY_TYPE:
10039 /* Just for use if some languages passes arrays by value. */
10040 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
10041 return true;
10042 break;
10043
10044 default:
10045 gcc_unreachable ();
10046 }
10047 }
10048 else
10049 return TYPE_ALIGN (type) >= 128;
10050
10051 return false;
10052 }
10053
10054 /* Gives the alignment boundary, in bits, of an argument with the
10055 specified mode and type. */
10056
10057 static unsigned int
10058 ix86_function_arg_boundary (machine_mode mode, const_tree type)
10059 {
10060 unsigned int align;
10061 if (type)
10062 {
10063 /* Since the main variant type is used for call, we convert it to
10064 the main variant type. */
10065 type = TYPE_MAIN_VARIANT (type);
10066 align = TYPE_ALIGN (type);
10067 }
10068 else
10069 align = GET_MODE_ALIGNMENT (mode);
10070 if (align < PARM_BOUNDARY)
10071 align = PARM_BOUNDARY;
10072 else
10073 {
10074 static bool warned;
10075 unsigned int saved_align = align;
10076
10077 if (!TARGET_64BIT)
10078 {
10079 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
10080 if (!type)
10081 {
10082 if (mode == XFmode || mode == XCmode)
10083 align = PARM_BOUNDARY;
10084 }
10085 else if (!ix86_contains_aligned_value_p (type))
10086 align = PARM_BOUNDARY;
10087
10088 if (align < 128)
10089 align = PARM_BOUNDARY;
10090 }
10091
10092 if (warn_psabi
10093 && !warned
10094 && align != ix86_compat_function_arg_boundary (mode, type,
10095 saved_align))
10096 {
10097 warned = true;
10098 inform (input_location,
10099 "The ABI for passing parameters with %d-byte"
10100 " alignment has changed in GCC 4.6",
10101 align / BITS_PER_UNIT);
10102 }
10103 }
10104
10105 return align;
10106 }
10107
10108 /* Return true if N is a possible register number of function value. */
10109
10110 static bool
10111 ix86_function_value_regno_p (const unsigned int regno)
10112 {
10113 switch (regno)
10114 {
10115 case AX_REG:
10116 return true;
10117 case DX_REG:
10118 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
10119 case DI_REG:
10120 case SI_REG:
10121 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
10122
10123 case BND0_REG:
10124 case BND1_REG:
10125 return chkp_function_instrumented_p (current_function_decl);
10126
10127 /* Complex values are returned in %st(0)/%st(1) pair. */
10128 case ST0_REG:
10129 case ST1_REG:
10130 /* TODO: The function should depend on current function ABI but
10131 builtins.c would need updating then. Therefore we use the
10132 default ABI. */
10133 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
10134 return false;
10135 return TARGET_FLOAT_RETURNS_IN_80387;
10136
10137 /* Complex values are returned in %xmm0/%xmm1 pair. */
10138 case XMM0_REG:
10139 case XMM1_REG:
10140 return TARGET_SSE;
10141
10142 case MM0_REG:
10143 if (TARGET_MACHO || TARGET_64BIT)
10144 return false;
10145 return TARGET_MMX;
10146 }
10147
10148 return false;
10149 }
10150
10151 /* Define how to find the value returned by a function.
10152 VALTYPE is the data type of the value (as a tree).
10153 If the precise function being called is known, FUNC is its FUNCTION_DECL;
10154 otherwise, FUNC is 0. */
10155
10156 static rtx
10157 function_value_32 (machine_mode orig_mode, machine_mode mode,
10158 const_tree fntype, const_tree fn)
10159 {
10160 unsigned int regno;
10161
10162 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
10163 we normally prevent this case when mmx is not available. However
10164 some ABIs may require the result to be returned like DImode. */
10165 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
10166 regno = FIRST_MMX_REG;
10167
10168 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
10169 we prevent this case when sse is not available. However some ABIs
10170 may require the result to be returned like integer TImode. */
10171 else if (mode == TImode
10172 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
10173 regno = FIRST_SSE_REG;
10174
10175 /* 32-byte vector modes in %ymm0. */
10176 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
10177 regno = FIRST_SSE_REG;
10178
10179 /* 64-byte vector modes in %zmm0. */
10180 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
10181 regno = FIRST_SSE_REG;
10182
10183 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
10184 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
10185 regno = FIRST_FLOAT_REG;
10186 else
10187 /* Most things go in %eax. */
10188 regno = AX_REG;
10189
10190 /* Override FP return register with %xmm0 for local functions when
10191 SSE math is enabled or for functions with sseregparm attribute. */
10192 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
10193 {
10194 int sse_level = ix86_function_sseregparm (fntype, fn, false);
10195 if (sse_level == -1)
10196 {
10197 error ("calling %qD with SSE caling convention without "
10198 "SSE/SSE2 enabled", fn);
10199 sorry ("this is a GCC bug that can be worked around by adding "
10200 "attribute used to function called");
10201 }
10202 else if ((sse_level >= 1 && mode == SFmode)
10203 || (sse_level == 2 && mode == DFmode))
10204 regno = FIRST_SSE_REG;
10205 }
10206
10207 /* OImode shouldn't be used directly. */
10208 gcc_assert (mode != OImode);
10209
10210 return gen_rtx_REG (orig_mode, regno);
10211 }
10212
10213 static rtx
10214 function_value_64 (machine_mode orig_mode, machine_mode mode,
10215 const_tree valtype)
10216 {
10217 rtx ret;
10218
10219 /* Handle libcalls, which don't provide a type node. */
10220 if (valtype == NULL)
10221 {
10222 unsigned int regno;
10223
10224 switch (mode)
10225 {
10226 case SFmode:
10227 case SCmode:
10228 case DFmode:
10229 case DCmode:
10230 case TFmode:
10231 case SDmode:
10232 case DDmode:
10233 case TDmode:
10234 regno = FIRST_SSE_REG;
10235 break;
10236 case XFmode:
10237 case XCmode:
10238 regno = FIRST_FLOAT_REG;
10239 break;
10240 case TCmode:
10241 return NULL;
10242 default:
10243 regno = AX_REG;
10244 }
10245
10246 return gen_rtx_REG (mode, regno);
10247 }
10248 else if (POINTER_TYPE_P (valtype))
10249 {
10250 /* Pointers are always returned in word_mode. */
10251 mode = word_mode;
10252 }
10253
10254 ret = construct_container (mode, orig_mode, valtype, 1,
10255 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
10256 x86_64_int_return_registers, 0);
10257
10258 /* For zero sized structures, construct_container returns NULL, but we
10259 need to keep rest of compiler happy by returning meaningful value. */
10260 if (!ret)
10261 ret = gen_rtx_REG (orig_mode, AX_REG);
10262
10263 return ret;
10264 }
10265
10266 static rtx
10267 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
10268 const_tree valtype)
10269 {
10270 unsigned int regno = AX_REG;
10271
10272 if (TARGET_SSE)
10273 {
10274 switch (GET_MODE_SIZE (mode))
10275 {
10276 case 16:
10277 if (valtype != NULL_TREE
10278 && !VECTOR_INTEGER_TYPE_P (valtype)
10279 && !VECTOR_INTEGER_TYPE_P (valtype)
10280 && !INTEGRAL_TYPE_P (valtype)
10281 && !VECTOR_FLOAT_TYPE_P (valtype))
10282 break;
10283 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
10284 && !COMPLEX_MODE_P (mode))
10285 regno = FIRST_SSE_REG;
10286 break;
10287 case 8:
10288 case 4:
10289 if (mode == SFmode || mode == DFmode)
10290 regno = FIRST_SSE_REG;
10291 break;
10292 default:
10293 break;
10294 }
10295 }
10296 return gen_rtx_REG (orig_mode, regno);
10297 }
10298
10299 static rtx
10300 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
10301 machine_mode orig_mode, machine_mode mode)
10302 {
10303 const_tree fn, fntype;
10304
10305 fn = NULL_TREE;
10306 if (fntype_or_decl && DECL_P (fntype_or_decl))
10307 fn = fntype_or_decl;
10308 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
10309
10310 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
10311 || POINTER_BOUNDS_MODE_P (mode))
10312 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
10313 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
10314 return function_value_ms_64 (orig_mode, mode, valtype);
10315 else if (TARGET_64BIT)
10316 return function_value_64 (orig_mode, mode, valtype);
10317 else
10318 return function_value_32 (orig_mode, mode, fntype, fn);
10319 }
10320
10321 static rtx
10322 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
10323 {
10324 machine_mode mode, orig_mode;
10325
10326 orig_mode = TYPE_MODE (valtype);
10327 mode = type_natural_mode (valtype, NULL, true);
10328 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
10329 }
10330
10331 /* Return an RTX representing a place where a function returns
10332 or recieves pointer bounds or NULL if no bounds are returned.
10333
10334 VALTYPE is a data type of a value returned by the function.
10335
10336 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
10337 or FUNCTION_TYPE of the function.
10338
10339 If OUTGOING is false, return a place in which the caller will
10340 see the return value. Otherwise, return a place where a
10341 function returns a value. */
10342
10343 static rtx
10344 ix86_function_value_bounds (const_tree valtype,
10345 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
10346 bool outgoing ATTRIBUTE_UNUSED)
10347 {
10348 rtx res = NULL_RTX;
10349
10350 if (BOUNDED_TYPE_P (valtype))
10351 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
10352 else if (chkp_type_has_pointer (valtype))
10353 {
10354 bitmap slots;
10355 rtx bounds[2];
10356 bitmap_iterator bi;
10357 unsigned i, bnd_no = 0;
10358
10359 bitmap_obstack_initialize (NULL);
10360 slots = BITMAP_ALLOC (NULL);
10361 chkp_find_bound_slots (valtype, slots);
10362
10363 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
10364 {
10365 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
10366 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
10367 gcc_assert (bnd_no < 2);
10368 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
10369 }
10370
10371 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
10372
10373 BITMAP_FREE (slots);
10374 bitmap_obstack_release (NULL);
10375 }
10376 else
10377 res = NULL_RTX;
10378
10379 return res;
10380 }
10381
10382 /* Pointer function arguments and return values are promoted to
10383 word_mode for normal functions. */
10384
10385 static machine_mode
10386 ix86_promote_function_mode (const_tree type, machine_mode mode,
10387 int *punsignedp, const_tree fntype,
10388 int for_return)
10389 {
10390 if (cfun->machine->func_type == TYPE_NORMAL
10391 && type != NULL_TREE
10392 && POINTER_TYPE_P (type))
10393 {
10394 *punsignedp = POINTERS_EXTEND_UNSIGNED;
10395 return word_mode;
10396 }
10397 return default_promote_function_mode (type, mode, punsignedp, fntype,
10398 for_return);
10399 }
10400
10401 /* Return true if a structure, union or array with MODE containing FIELD
10402 should be accessed using BLKmode. */
10403
10404 static bool
10405 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
10406 {
10407 /* Union with XFmode must be in BLKmode. */
10408 return (mode == XFmode
10409 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
10410 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
10411 }
10412
10413 rtx
10414 ix86_libcall_value (machine_mode mode)
10415 {
10416 return ix86_function_value_1 (NULL, NULL, mode, mode);
10417 }
10418
10419 /* Return true iff type is returned in memory. */
10420
10421 static bool
10422 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10423 {
10424 #ifdef SUBTARGET_RETURN_IN_MEMORY
10425 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
10426 #else
10427 const machine_mode mode = type_natural_mode (type, NULL, true);
10428 HOST_WIDE_INT size;
10429
10430 if (POINTER_BOUNDS_TYPE_P (type))
10431 return false;
10432
10433 if (TARGET_64BIT)
10434 {
10435 if (ix86_function_type_abi (fntype) == MS_ABI)
10436 {
10437 size = int_size_in_bytes (type);
10438
10439 /* __m128 is returned in xmm0. */
10440 if ((!type || VECTOR_INTEGER_TYPE_P (type)
10441 || INTEGRAL_TYPE_P (type)
10442 || VECTOR_FLOAT_TYPE_P (type))
10443 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
10444 && !COMPLEX_MODE_P (mode)
10445 && (GET_MODE_SIZE (mode) == 16 || size == 16))
10446 return false;
10447
10448 /* Otherwise, the size must be exactly in [1248]. */
10449 return size != 1 && size != 2 && size != 4 && size != 8;
10450 }
10451 else
10452 {
10453 int needed_intregs, needed_sseregs;
10454
10455 return examine_argument (mode, type, 1,
10456 &needed_intregs, &needed_sseregs);
10457 }
10458 }
10459 else
10460 {
10461 size = int_size_in_bytes (type);
10462
10463 /* Intel MCU psABI returns scalars and aggregates no larger than 8
10464 bytes in registers. */
10465 if (TARGET_IAMCU)
10466 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
10467
10468 if (mode == BLKmode)
10469 return true;
10470
10471 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
10472 return false;
10473
10474 if (VECTOR_MODE_P (mode) || mode == TImode)
10475 {
10476 /* User-created vectors small enough to fit in EAX. */
10477 if (size < 8)
10478 return false;
10479
10480 /* Unless ABI prescibes otherwise,
10481 MMX/3dNow values are returned in MM0 if available. */
10482
10483 if (size == 8)
10484 return TARGET_VECT8_RETURNS || !TARGET_MMX;
10485
10486 /* SSE values are returned in XMM0 if available. */
10487 if (size == 16)
10488 return !TARGET_SSE;
10489
10490 /* AVX values are returned in YMM0 if available. */
10491 if (size == 32)
10492 return !TARGET_AVX;
10493
10494 /* AVX512F values are returned in ZMM0 if available. */
10495 if (size == 64)
10496 return !TARGET_AVX512F;
10497 }
10498
10499 if (mode == XFmode)
10500 return false;
10501
10502 if (size > 12)
10503 return true;
10504
10505 /* OImode shouldn't be used directly. */
10506 gcc_assert (mode != OImode);
10507
10508 return false;
10509 }
10510 #endif
10511 }
10512
10513 \f
10514 /* Create the va_list data type. */
10515
10516 static tree
10517 ix86_build_builtin_va_list_64 (void)
10518 {
10519 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
10520
10521 record = lang_hooks.types.make_type (RECORD_TYPE);
10522 type_decl = build_decl (BUILTINS_LOCATION,
10523 TYPE_DECL, get_identifier ("__va_list_tag"), record);
10524
10525 f_gpr = build_decl (BUILTINS_LOCATION,
10526 FIELD_DECL, get_identifier ("gp_offset"),
10527 unsigned_type_node);
10528 f_fpr = build_decl (BUILTINS_LOCATION,
10529 FIELD_DECL, get_identifier ("fp_offset"),
10530 unsigned_type_node);
10531 f_ovf = build_decl (BUILTINS_LOCATION,
10532 FIELD_DECL, get_identifier ("overflow_arg_area"),
10533 ptr_type_node);
10534 f_sav = build_decl (BUILTINS_LOCATION,
10535 FIELD_DECL, get_identifier ("reg_save_area"),
10536 ptr_type_node);
10537
10538 va_list_gpr_counter_field = f_gpr;
10539 va_list_fpr_counter_field = f_fpr;
10540
10541 DECL_FIELD_CONTEXT (f_gpr) = record;
10542 DECL_FIELD_CONTEXT (f_fpr) = record;
10543 DECL_FIELD_CONTEXT (f_ovf) = record;
10544 DECL_FIELD_CONTEXT (f_sav) = record;
10545
10546 TYPE_STUB_DECL (record) = type_decl;
10547 TYPE_NAME (record) = type_decl;
10548 TYPE_FIELDS (record) = f_gpr;
10549 DECL_CHAIN (f_gpr) = f_fpr;
10550 DECL_CHAIN (f_fpr) = f_ovf;
10551 DECL_CHAIN (f_ovf) = f_sav;
10552
10553 layout_type (record);
10554
10555 /* The correct type is an array type of one element. */
10556 return build_array_type (record, build_index_type (size_zero_node));
10557 }
10558
10559 /* Setup the builtin va_list data type and for 64-bit the additional
10560 calling convention specific va_list data types. */
10561
10562 static tree
10563 ix86_build_builtin_va_list (void)
10564 {
10565 if (TARGET_64BIT)
10566 {
10567 /* Initialize ABI specific va_list builtin types. */
10568 tree sysv_va_list, ms_va_list;
10569
10570 sysv_va_list = ix86_build_builtin_va_list_64 ();
10571 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
10572
10573 /* For MS_ABI we use plain pointer to argument area. */
10574 ms_va_list = build_pointer_type (char_type_node);
10575 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
10576
10577 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
10578 }
10579 else
10580 {
10581 /* For i386 we use plain pointer to argument area. */
10582 return build_pointer_type (char_type_node);
10583 }
10584 }
10585
10586 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
10587
10588 static void
10589 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
10590 {
10591 rtx save_area, mem;
10592 alias_set_type set;
10593 int i, max;
10594
10595 /* GPR size of varargs save area. */
10596 if (cfun->va_list_gpr_size)
10597 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
10598 else
10599 ix86_varargs_gpr_size = 0;
10600
10601 /* FPR size of varargs save area. We don't need it if we don't pass
10602 anything in SSE registers. */
10603 if (TARGET_SSE && cfun->va_list_fpr_size)
10604 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
10605 else
10606 ix86_varargs_fpr_size = 0;
10607
10608 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
10609 return;
10610
10611 save_area = frame_pointer_rtx;
10612 set = get_varargs_alias_set ();
10613
10614 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10615 if (max > X86_64_REGPARM_MAX)
10616 max = X86_64_REGPARM_MAX;
10617
10618 for (i = cum->regno; i < max; i++)
10619 {
10620 mem = gen_rtx_MEM (word_mode,
10621 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
10622 MEM_NOTRAP_P (mem) = 1;
10623 set_mem_alias_set (mem, set);
10624 emit_move_insn (mem,
10625 gen_rtx_REG (word_mode,
10626 x86_64_int_parameter_registers[i]));
10627 }
10628
10629 if (ix86_varargs_fpr_size)
10630 {
10631 machine_mode smode;
10632 rtx_code_label *label;
10633 rtx test;
10634
10635 /* Now emit code to save SSE registers. The AX parameter contains number
10636 of SSE parameter registers used to call this function, though all we
10637 actually check here is the zero/non-zero status. */
10638
10639 label = gen_label_rtx ();
10640 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
10641 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
10642 label));
10643
10644 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
10645 we used movdqa (i.e. TImode) instead? Perhaps even better would
10646 be if we could determine the real mode of the data, via a hook
10647 into pass_stdarg. Ignore all that for now. */
10648 smode = V4SFmode;
10649 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
10650 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
10651
10652 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
10653 if (max > X86_64_SSE_REGPARM_MAX)
10654 max = X86_64_SSE_REGPARM_MAX;
10655
10656 for (i = cum->sse_regno; i < max; ++i)
10657 {
10658 mem = plus_constant (Pmode, save_area,
10659 i * 16 + ix86_varargs_gpr_size);
10660 mem = gen_rtx_MEM (smode, mem);
10661 MEM_NOTRAP_P (mem) = 1;
10662 set_mem_alias_set (mem, set);
10663 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
10664
10665 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
10666 }
10667
10668 emit_label (label);
10669 }
10670 }
10671
10672 static void
10673 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
10674 {
10675 alias_set_type set = get_varargs_alias_set ();
10676 int i;
10677
10678 /* Reset to zero, as there might be a sysv vaarg used
10679 before. */
10680 ix86_varargs_gpr_size = 0;
10681 ix86_varargs_fpr_size = 0;
10682
10683 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
10684 {
10685 rtx reg, mem;
10686
10687 mem = gen_rtx_MEM (Pmode,
10688 plus_constant (Pmode, virtual_incoming_args_rtx,
10689 i * UNITS_PER_WORD));
10690 MEM_NOTRAP_P (mem) = 1;
10691 set_mem_alias_set (mem, set);
10692
10693 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10694 emit_move_insn (mem, reg);
10695 }
10696 }
10697
10698 static void
10699 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10700 tree type, int *, int no_rtl)
10701 {
10702 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10703 CUMULATIVE_ARGS next_cum;
10704 tree fntype;
10705
10706 /* This argument doesn't appear to be used anymore. Which is good,
10707 because the old code here didn't suppress rtl generation. */
10708 gcc_assert (!no_rtl);
10709
10710 if (!TARGET_64BIT)
10711 return;
10712
10713 fntype = TREE_TYPE (current_function_decl);
10714
10715 /* For varargs, we do not want to skip the dummy va_dcl argument.
10716 For stdargs, we do want to skip the last named argument. */
10717 next_cum = *cum;
10718 if (stdarg_p (fntype))
10719 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10720 true);
10721
10722 if (cum->call_abi == MS_ABI)
10723 setup_incoming_varargs_ms_64 (&next_cum);
10724 else
10725 setup_incoming_varargs_64 (&next_cum);
10726 }
10727
10728 static void
10729 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10730 enum machine_mode mode,
10731 tree type,
10732 int *pretend_size ATTRIBUTE_UNUSED,
10733 int no_rtl)
10734 {
10735 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10736 CUMULATIVE_ARGS next_cum;
10737 tree fntype;
10738 rtx save_area;
10739 int bnd_reg, i, max;
10740
10741 gcc_assert (!no_rtl);
10742
10743 /* Do nothing if we use plain pointer to argument area. */
10744 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10745 return;
10746
10747 fntype = TREE_TYPE (current_function_decl);
10748
10749 /* For varargs, we do not want to skip the dummy va_dcl argument.
10750 For stdargs, we do want to skip the last named argument. */
10751 next_cum = *cum;
10752 if (stdarg_p (fntype))
10753 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10754 true);
10755 save_area = frame_pointer_rtx;
10756
10757 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10758 if (max > X86_64_REGPARM_MAX)
10759 max = X86_64_REGPARM_MAX;
10760
10761 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10762 if (chkp_function_instrumented_p (current_function_decl))
10763 for (i = cum->regno; i < max; i++)
10764 {
10765 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10766 rtx ptr = gen_rtx_REG (Pmode,
10767 x86_64_int_parameter_registers[i]);
10768 rtx bounds;
10769
10770 if (bnd_reg <= LAST_BND_REG)
10771 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10772 else
10773 {
10774 rtx ldx_addr =
10775 plus_constant (Pmode, arg_pointer_rtx,
10776 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10777 bounds = gen_reg_rtx (BNDmode);
10778 emit_insn (BNDmode == BND64mode
10779 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10780 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10781 }
10782
10783 emit_insn (BNDmode == BND64mode
10784 ? gen_bnd64_stx (addr, ptr, bounds)
10785 : gen_bnd32_stx (addr, ptr, bounds));
10786
10787 bnd_reg++;
10788 }
10789 }
10790
10791
10792 /* Checks if TYPE is of kind va_list char *. */
10793
10794 static bool
10795 is_va_list_char_pointer (tree type)
10796 {
10797 tree canonic;
10798
10799 /* For 32-bit it is always true. */
10800 if (!TARGET_64BIT)
10801 return true;
10802 canonic = ix86_canonical_va_list_type (type);
10803 return (canonic == ms_va_list_type_node
10804 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10805 }
10806
10807 /* Implement va_start. */
10808
10809 static void
10810 ix86_va_start (tree valist, rtx nextarg)
10811 {
10812 HOST_WIDE_INT words, n_gpr, n_fpr;
10813 tree f_gpr, f_fpr, f_ovf, f_sav;
10814 tree gpr, fpr, ovf, sav, t;
10815 tree type;
10816 rtx ovf_rtx;
10817
10818 if (flag_split_stack
10819 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10820 {
10821 unsigned int scratch_regno;
10822
10823 /* When we are splitting the stack, we can't refer to the stack
10824 arguments using internal_arg_pointer, because they may be on
10825 the old stack. The split stack prologue will arrange to
10826 leave a pointer to the old stack arguments in a scratch
10827 register, which we here copy to a pseudo-register. The split
10828 stack prologue can't set the pseudo-register directly because
10829 it (the prologue) runs before any registers have been saved. */
10830
10831 scratch_regno = split_stack_prologue_scratch_regno ();
10832 if (scratch_regno != INVALID_REGNUM)
10833 {
10834 rtx reg;
10835 rtx_insn *seq;
10836
10837 reg = gen_reg_rtx (Pmode);
10838 cfun->machine->split_stack_varargs_pointer = reg;
10839
10840 start_sequence ();
10841 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10842 seq = get_insns ();
10843 end_sequence ();
10844
10845 push_topmost_sequence ();
10846 emit_insn_after (seq, entry_of_function ());
10847 pop_topmost_sequence ();
10848 }
10849 }
10850
10851 /* Only 64bit target needs something special. */
10852 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10853 {
10854 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10855 std_expand_builtin_va_start (valist, nextarg);
10856 else
10857 {
10858 rtx va_r, next;
10859
10860 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10861 next = expand_binop (ptr_mode, add_optab,
10862 cfun->machine->split_stack_varargs_pointer,
10863 crtl->args.arg_offset_rtx,
10864 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10865 convert_move (va_r, next, 0);
10866
10867 /* Store zero bounds for va_list. */
10868 if (chkp_function_instrumented_p (current_function_decl))
10869 chkp_expand_bounds_reset_for_mem (valist,
10870 make_tree (TREE_TYPE (valist),
10871 next));
10872
10873 }
10874 return;
10875 }
10876
10877 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10878 f_fpr = DECL_CHAIN (f_gpr);
10879 f_ovf = DECL_CHAIN (f_fpr);
10880 f_sav = DECL_CHAIN (f_ovf);
10881
10882 valist = build_simple_mem_ref (valist);
10883 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10884 /* The following should be folded into the MEM_REF offset. */
10885 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10886 f_gpr, NULL_TREE);
10887 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10888 f_fpr, NULL_TREE);
10889 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10890 f_ovf, NULL_TREE);
10891 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10892 f_sav, NULL_TREE);
10893
10894 /* Count number of gp and fp argument registers used. */
10895 words = crtl->args.info.words;
10896 n_gpr = crtl->args.info.regno;
10897 n_fpr = crtl->args.info.sse_regno;
10898
10899 if (cfun->va_list_gpr_size)
10900 {
10901 type = TREE_TYPE (gpr);
10902 t = build2 (MODIFY_EXPR, type,
10903 gpr, build_int_cst (type, n_gpr * 8));
10904 TREE_SIDE_EFFECTS (t) = 1;
10905 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10906 }
10907
10908 if (TARGET_SSE && cfun->va_list_fpr_size)
10909 {
10910 type = TREE_TYPE (fpr);
10911 t = build2 (MODIFY_EXPR, type, fpr,
10912 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10913 TREE_SIDE_EFFECTS (t) = 1;
10914 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10915 }
10916
10917 /* Find the overflow area. */
10918 type = TREE_TYPE (ovf);
10919 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10920 ovf_rtx = crtl->args.internal_arg_pointer;
10921 else
10922 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10923 t = make_tree (type, ovf_rtx);
10924 if (words != 0)
10925 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10926
10927 /* Store zero bounds for overflow area pointer. */
10928 if (chkp_function_instrumented_p (current_function_decl))
10929 chkp_expand_bounds_reset_for_mem (ovf, t);
10930
10931 t = build2 (MODIFY_EXPR, type, ovf, t);
10932 TREE_SIDE_EFFECTS (t) = 1;
10933 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10934
10935 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10936 {
10937 /* Find the register save area.
10938 Prologue of the function save it right above stack frame. */
10939 type = TREE_TYPE (sav);
10940 t = make_tree (type, frame_pointer_rtx);
10941 if (!ix86_varargs_gpr_size)
10942 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10943
10944 /* Store zero bounds for save area pointer. */
10945 if (chkp_function_instrumented_p (current_function_decl))
10946 chkp_expand_bounds_reset_for_mem (sav, t);
10947
10948 t = build2 (MODIFY_EXPR, type, sav, t);
10949 TREE_SIDE_EFFECTS (t) = 1;
10950 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10951 }
10952 }
10953
10954 /* Implement va_arg. */
10955
10956 static tree
10957 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10958 gimple_seq *post_p)
10959 {
10960 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10961 tree f_gpr, f_fpr, f_ovf, f_sav;
10962 tree gpr, fpr, ovf, sav, t;
10963 int size, rsize;
10964 tree lab_false, lab_over = NULL_TREE;
10965 tree addr, t2;
10966 rtx container;
10967 int indirect_p = 0;
10968 tree ptrtype;
10969 machine_mode nat_mode;
10970 unsigned int arg_boundary;
10971
10972 /* Only 64bit target needs something special. */
10973 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10974 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10975
10976 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10977 f_fpr = DECL_CHAIN (f_gpr);
10978 f_ovf = DECL_CHAIN (f_fpr);
10979 f_sav = DECL_CHAIN (f_ovf);
10980
10981 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10982 valist, f_gpr, NULL_TREE);
10983
10984 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10985 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10986 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10987
10988 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10989 if (indirect_p)
10990 type = build_pointer_type (type);
10991 size = int_size_in_bytes (type);
10992 rsize = CEIL (size, UNITS_PER_WORD);
10993
10994 nat_mode = type_natural_mode (type, NULL, false);
10995 switch (nat_mode)
10996 {
10997 case V8SFmode:
10998 case V8SImode:
10999 case V32QImode:
11000 case V16HImode:
11001 case V4DFmode:
11002 case V4DImode:
11003 case V16SFmode:
11004 case V16SImode:
11005 case V64QImode:
11006 case V32HImode:
11007 case V8DFmode:
11008 case V8DImode:
11009 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
11010 if (!TARGET_64BIT_MS_ABI)
11011 {
11012 container = NULL;
11013 break;
11014 }
11015 /* FALLTHRU */
11016
11017 default:
11018 container = construct_container (nat_mode, TYPE_MODE (type),
11019 type, 0, X86_64_REGPARM_MAX,
11020 X86_64_SSE_REGPARM_MAX, intreg,
11021 0);
11022 break;
11023 }
11024
11025 /* Pull the value out of the saved registers. */
11026
11027 addr = create_tmp_var (ptr_type_node, "addr");
11028
11029 if (container)
11030 {
11031 int needed_intregs, needed_sseregs;
11032 bool need_temp;
11033 tree int_addr, sse_addr;
11034
11035 lab_false = create_artificial_label (UNKNOWN_LOCATION);
11036 lab_over = create_artificial_label (UNKNOWN_LOCATION);
11037
11038 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
11039
11040 need_temp = (!REG_P (container)
11041 && ((needed_intregs && TYPE_ALIGN (type) > 64)
11042 || TYPE_ALIGN (type) > 128));
11043
11044 /* In case we are passing structure, verify that it is consecutive block
11045 on the register save area. If not we need to do moves. */
11046 if (!need_temp && !REG_P (container))
11047 {
11048 /* Verify that all registers are strictly consecutive */
11049 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
11050 {
11051 int i;
11052
11053 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
11054 {
11055 rtx slot = XVECEXP (container, 0, i);
11056 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
11057 || INTVAL (XEXP (slot, 1)) != i * 16)
11058 need_temp = true;
11059 }
11060 }
11061 else
11062 {
11063 int i;
11064
11065 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
11066 {
11067 rtx slot = XVECEXP (container, 0, i);
11068 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
11069 || INTVAL (XEXP (slot, 1)) != i * 8)
11070 need_temp = true;
11071 }
11072 }
11073 }
11074 if (!need_temp)
11075 {
11076 int_addr = addr;
11077 sse_addr = addr;
11078 }
11079 else
11080 {
11081 int_addr = create_tmp_var (ptr_type_node, "int_addr");
11082 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
11083 }
11084
11085 /* First ensure that we fit completely in registers. */
11086 if (needed_intregs)
11087 {
11088 t = build_int_cst (TREE_TYPE (gpr),
11089 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
11090 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
11091 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
11092 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
11093 gimplify_and_add (t, pre_p);
11094 }
11095 if (needed_sseregs)
11096 {
11097 t = build_int_cst (TREE_TYPE (fpr),
11098 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
11099 + X86_64_REGPARM_MAX * 8);
11100 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
11101 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
11102 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
11103 gimplify_and_add (t, pre_p);
11104 }
11105
11106 /* Compute index to start of area used for integer regs. */
11107 if (needed_intregs)
11108 {
11109 /* int_addr = gpr + sav; */
11110 t = fold_build_pointer_plus (sav, gpr);
11111 gimplify_assign (int_addr, t, pre_p);
11112 }
11113 if (needed_sseregs)
11114 {
11115 /* sse_addr = fpr + sav; */
11116 t = fold_build_pointer_plus (sav, fpr);
11117 gimplify_assign (sse_addr, t, pre_p);
11118 }
11119 if (need_temp)
11120 {
11121 int i, prev_size = 0;
11122 tree temp = create_tmp_var (type, "va_arg_tmp");
11123
11124 /* addr = &temp; */
11125 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
11126 gimplify_assign (addr, t, pre_p);
11127
11128 for (i = 0; i < XVECLEN (container, 0); i++)
11129 {
11130 rtx slot = XVECEXP (container, 0, i);
11131 rtx reg = XEXP (slot, 0);
11132 machine_mode mode = GET_MODE (reg);
11133 tree piece_type;
11134 tree addr_type;
11135 tree daddr_type;
11136 tree src_addr, src;
11137 int src_offset;
11138 tree dest_addr, dest;
11139 int cur_size = GET_MODE_SIZE (mode);
11140
11141 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
11142 prev_size = INTVAL (XEXP (slot, 1));
11143 if (prev_size + cur_size > size)
11144 {
11145 cur_size = size - prev_size;
11146 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
11147 if (mode == BLKmode)
11148 mode = QImode;
11149 }
11150 piece_type = lang_hooks.types.type_for_mode (mode, 1);
11151 if (mode == GET_MODE (reg))
11152 addr_type = build_pointer_type (piece_type);
11153 else
11154 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
11155 true);
11156 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
11157 true);
11158
11159 if (SSE_REGNO_P (REGNO (reg)))
11160 {
11161 src_addr = sse_addr;
11162 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
11163 }
11164 else
11165 {
11166 src_addr = int_addr;
11167 src_offset = REGNO (reg) * 8;
11168 }
11169 src_addr = fold_convert (addr_type, src_addr);
11170 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
11171
11172 dest_addr = fold_convert (daddr_type, addr);
11173 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
11174 if (cur_size == GET_MODE_SIZE (mode))
11175 {
11176 src = build_va_arg_indirect_ref (src_addr);
11177 dest = build_va_arg_indirect_ref (dest_addr);
11178
11179 gimplify_assign (dest, src, pre_p);
11180 }
11181 else
11182 {
11183 tree copy
11184 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11185 3, dest_addr, src_addr,
11186 size_int (cur_size));
11187 gimplify_and_add (copy, pre_p);
11188 }
11189 prev_size += cur_size;
11190 }
11191 }
11192
11193 if (needed_intregs)
11194 {
11195 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
11196 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
11197 gimplify_assign (gpr, t, pre_p);
11198 }
11199
11200 if (needed_sseregs)
11201 {
11202 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
11203 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
11204 gimplify_assign (unshare_expr (fpr), t, pre_p);
11205 }
11206
11207 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11208
11209 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11210 }
11211
11212 /* ... otherwise out of the overflow area. */
11213
11214 /* When we align parameter on stack for caller, if the parameter
11215 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
11216 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
11217 here with caller. */
11218 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
11219 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11220 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11221
11222 /* Care for on-stack alignment if needed. */
11223 if (arg_boundary <= 64 || size == 0)
11224 t = ovf;
11225 else
11226 {
11227 HOST_WIDE_INT align = arg_boundary / 8;
11228 t = fold_build_pointer_plus_hwi (ovf, align - 1);
11229 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11230 build_int_cst (TREE_TYPE (t), -align));
11231 }
11232
11233 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11234 gimplify_assign (addr, t, pre_p);
11235
11236 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
11237 gimplify_assign (unshare_expr (ovf), t, pre_p);
11238
11239 if (container)
11240 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
11241
11242 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11243 addr = fold_convert (ptrtype, addr);
11244
11245 if (indirect_p)
11246 addr = build_va_arg_indirect_ref (addr);
11247 return build_va_arg_indirect_ref (addr);
11248 }
11249 \f
11250 /* Return true if OPNUM's MEM should be matched
11251 in movabs* patterns. */
11252
11253 bool
11254 ix86_check_movabs (rtx insn, int opnum)
11255 {
11256 rtx set, mem;
11257
11258 set = PATTERN (insn);
11259 if (GET_CODE (set) == PARALLEL)
11260 set = XVECEXP (set, 0, 0);
11261 gcc_assert (GET_CODE (set) == SET);
11262 mem = XEXP (set, opnum);
11263 while (SUBREG_P (mem))
11264 mem = SUBREG_REG (mem);
11265 gcc_assert (MEM_P (mem));
11266 return volatile_ok || !MEM_VOLATILE_P (mem);
11267 }
11268
11269 /* Return false if INSN contains a MEM with a non-default address space. */
11270 bool
11271 ix86_check_no_addr_space (rtx insn)
11272 {
11273 subrtx_var_iterator::array_type array;
11274 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
11275 {
11276 rtx x = *iter;
11277 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
11278 return false;
11279 }
11280 return true;
11281 }
11282 \f
11283 /* Initialize the table of extra 80387 mathematical constants. */
11284
11285 static void
11286 init_ext_80387_constants (void)
11287 {
11288 static const char * cst[5] =
11289 {
11290 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
11291 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
11292 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
11293 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
11294 "3.1415926535897932385128089594061862044", /* 4: fldpi */
11295 };
11296 int i;
11297
11298 for (i = 0; i < 5; i++)
11299 {
11300 real_from_string (&ext_80387_constants_table[i], cst[i]);
11301 /* Ensure each constant is rounded to XFmode precision. */
11302 real_convert (&ext_80387_constants_table[i],
11303 XFmode, &ext_80387_constants_table[i]);
11304 }
11305
11306 ext_80387_constants_init = 1;
11307 }
11308
11309 /* Return non-zero if the constant is something that
11310 can be loaded with a special instruction. */
11311
11312 int
11313 standard_80387_constant_p (rtx x)
11314 {
11315 machine_mode mode = GET_MODE (x);
11316
11317 const REAL_VALUE_TYPE *r;
11318
11319 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
11320 return -1;
11321
11322 if (x == CONST0_RTX (mode))
11323 return 1;
11324 if (x == CONST1_RTX (mode))
11325 return 2;
11326
11327 r = CONST_DOUBLE_REAL_VALUE (x);
11328
11329 /* For XFmode constants, try to find a special 80387 instruction when
11330 optimizing for size or on those CPUs that benefit from them. */
11331 if (mode == XFmode
11332 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
11333 {
11334 int i;
11335
11336 if (! ext_80387_constants_init)
11337 init_ext_80387_constants ();
11338
11339 for (i = 0; i < 5; i++)
11340 if (real_identical (r, &ext_80387_constants_table[i]))
11341 return i + 3;
11342 }
11343
11344 /* Load of the constant -0.0 or -1.0 will be split as
11345 fldz;fchs or fld1;fchs sequence. */
11346 if (real_isnegzero (r))
11347 return 8;
11348 if (real_identical (r, &dconstm1))
11349 return 9;
11350
11351 return 0;
11352 }
11353
11354 /* Return the opcode of the special instruction to be used to load
11355 the constant X. */
11356
11357 const char *
11358 standard_80387_constant_opcode (rtx x)
11359 {
11360 switch (standard_80387_constant_p (x))
11361 {
11362 case 1:
11363 return "fldz";
11364 case 2:
11365 return "fld1";
11366 case 3:
11367 return "fldlg2";
11368 case 4:
11369 return "fldln2";
11370 case 5:
11371 return "fldl2e";
11372 case 6:
11373 return "fldl2t";
11374 case 7:
11375 return "fldpi";
11376 case 8:
11377 case 9:
11378 return "#";
11379 default:
11380 gcc_unreachable ();
11381 }
11382 }
11383
11384 /* Return the CONST_DOUBLE representing the 80387 constant that is
11385 loaded by the specified special instruction. The argument IDX
11386 matches the return value from standard_80387_constant_p. */
11387
11388 rtx
11389 standard_80387_constant_rtx (int idx)
11390 {
11391 int i;
11392
11393 if (! ext_80387_constants_init)
11394 init_ext_80387_constants ();
11395
11396 switch (idx)
11397 {
11398 case 3:
11399 case 4:
11400 case 5:
11401 case 6:
11402 case 7:
11403 i = idx - 3;
11404 break;
11405
11406 default:
11407 gcc_unreachable ();
11408 }
11409
11410 return const_double_from_real_value (ext_80387_constants_table[i],
11411 XFmode);
11412 }
11413
11414 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
11415 in supported SSE/AVX vector mode. */
11416
11417 int
11418 standard_sse_constant_p (rtx x, machine_mode pred_mode)
11419 {
11420 machine_mode mode;
11421
11422 if (!TARGET_SSE)
11423 return 0;
11424
11425 mode = GET_MODE (x);
11426
11427 if (x == const0_rtx || const0_operand (x, mode))
11428 return 1;
11429
11430 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
11431 {
11432 /* VOIDmode integer constant, get mode from the predicate. */
11433 if (mode == VOIDmode)
11434 mode = pred_mode;
11435
11436 switch (GET_MODE_SIZE (mode))
11437 {
11438 case 64:
11439 if (TARGET_AVX512F)
11440 return 2;
11441 break;
11442 case 32:
11443 if (TARGET_AVX2)
11444 return 2;
11445 break;
11446 case 16:
11447 if (TARGET_SSE2)
11448 return 2;
11449 break;
11450 case 0:
11451 /* VOIDmode */
11452 gcc_unreachable ();
11453 default:
11454 break;
11455 }
11456 }
11457
11458 return 0;
11459 }
11460
11461 /* Return the opcode of the special instruction to be used to load
11462 the constant X. */
11463
11464 const char *
11465 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
11466 {
11467 machine_mode mode;
11468
11469 gcc_assert (TARGET_SSE);
11470
11471 mode = GET_MODE (x);
11472
11473 if (x == const0_rtx || const0_operand (x, mode))
11474 {
11475 switch (get_attr_mode (insn))
11476 {
11477 case MODE_XI:
11478 return "vpxord\t%g0, %g0, %g0";
11479 case MODE_OI:
11480 return (TARGET_AVX512VL
11481 ? "vpxord\t%x0, %x0, %x0"
11482 : "vpxor\t%x0, %x0, %x0");
11483 case MODE_TI:
11484 return (TARGET_AVX512VL
11485 ? "vpxord\t%t0, %t0, %t0"
11486 : "%vpxor\t%0, %d0");
11487
11488 case MODE_V8DF:
11489 return (TARGET_AVX512DQ
11490 ? "vxorpd\t%g0, %g0, %g0"
11491 : "vpxorq\t%g0, %g0, %g0");
11492 case MODE_V4DF:
11493 return "vxorpd\t%x0, %x0, %x0";
11494 case MODE_V2DF:
11495 return "%vxorpd\t%0, %d0";
11496
11497 case MODE_V16SF:
11498 return (TARGET_AVX512DQ
11499 ? "vxorps\t%g0, %g0, %g0"
11500 : "vpxord\t%g0, %g0, %g0");
11501 case MODE_V8SF:
11502 return "vxorps\t%x0, %x0, %x0";
11503 case MODE_V4SF:
11504 return "%vxorps\t%0, %d0";
11505
11506 default:
11507 gcc_unreachable ();
11508 }
11509 }
11510 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
11511 {
11512 enum attr_mode insn_mode = get_attr_mode (insn);
11513
11514 switch (insn_mode)
11515 {
11516 case MODE_XI:
11517 case MODE_V8DF:
11518 case MODE_V16SF:
11519 gcc_assert (TARGET_AVX512F);
11520 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
11521
11522 case MODE_OI:
11523 case MODE_V4DF:
11524 case MODE_V8SF:
11525 gcc_assert (TARGET_AVX2);
11526 /* FALLTHRU */
11527 case MODE_TI:
11528 case MODE_V2DF:
11529 case MODE_V4SF:
11530 gcc_assert (TARGET_SSE2);
11531 return (TARGET_AVX
11532 ? "vpcmpeqd\t%0, %0, %0"
11533 : "pcmpeqd\t%0, %0");
11534
11535 default:
11536 gcc_unreachable ();
11537 }
11538 }
11539
11540 gcc_unreachable ();
11541 }
11542
11543 /* Returns true if INSN can be transformed from a memory load
11544 to a supported FP constant load. */
11545
11546 bool
11547 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
11548 {
11549 rtx src = find_constant_src (insn);
11550
11551 gcc_assert (REG_P (dst));
11552
11553 if (src == NULL
11554 || (SSE_REGNO_P (REGNO (dst))
11555 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
11556 || (STACK_REGNO_P (REGNO (dst))
11557 && standard_80387_constant_p (src) < 1))
11558 return false;
11559
11560 return true;
11561 }
11562
11563 /* Returns true if OP contains a symbol reference */
11564
11565 bool
11566 symbolic_reference_mentioned_p (rtx op)
11567 {
11568 const char *fmt;
11569 int i;
11570
11571 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
11572 return true;
11573
11574 fmt = GET_RTX_FORMAT (GET_CODE (op));
11575 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
11576 {
11577 if (fmt[i] == 'E')
11578 {
11579 int j;
11580
11581 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
11582 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
11583 return true;
11584 }
11585
11586 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
11587 return true;
11588 }
11589
11590 return false;
11591 }
11592
11593 /* Return true if it is appropriate to emit `ret' instructions in the
11594 body of a function. Do this only if the epilogue is simple, needing a
11595 couple of insns. Prior to reloading, we can't tell how many registers
11596 must be saved, so return false then. Return false if there is no frame
11597 marker to de-allocate. */
11598
11599 bool
11600 ix86_can_use_return_insn_p (void)
11601 {
11602 struct ix86_frame frame;
11603
11604 /* Don't use `ret' instruction in interrupt handler. */
11605 if (! reload_completed
11606 || frame_pointer_needed
11607 || cfun->machine->func_type != TYPE_NORMAL)
11608 return 0;
11609
11610 /* Don't allow more than 32k pop, since that's all we can do
11611 with one instruction. */
11612 if (crtl->args.pops_args && crtl->args.size >= 32768)
11613 return 0;
11614
11615 ix86_compute_frame_layout (&frame);
11616 return (frame.stack_pointer_offset == UNITS_PER_WORD
11617 && (frame.nregs + frame.nsseregs) == 0);
11618 }
11619 \f
11620 /* Value should be nonzero if functions must have frame pointers.
11621 Zero means the frame pointer need not be set up (and parms may
11622 be accessed via the stack pointer) in functions that seem suitable. */
11623
11624 static bool
11625 ix86_frame_pointer_required (void)
11626 {
11627 /* If we accessed previous frames, then the generated code expects
11628 to be able to access the saved ebp value in our frame. */
11629 if (cfun->machine->accesses_prev_frame)
11630 return true;
11631
11632 /* Several x86 os'es need a frame pointer for other reasons,
11633 usually pertaining to setjmp. */
11634 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11635 return true;
11636
11637 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
11638 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
11639 return true;
11640
11641 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
11642 allocation is 4GB. */
11643 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
11644 return true;
11645
11646 /* SSE saves require frame-pointer when stack is misaligned. */
11647 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
11648 return true;
11649
11650 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
11651 turns off the frame pointer by default. Turn it back on now if
11652 we've not got a leaf function. */
11653 if (TARGET_OMIT_LEAF_FRAME_POINTER
11654 && (!crtl->is_leaf
11655 || ix86_current_function_calls_tls_descriptor))
11656 return true;
11657
11658 if (crtl->profile && !flag_fentry)
11659 return true;
11660
11661 return false;
11662 }
11663
11664 /* Record that the current function accesses previous call frames. */
11665
11666 void
11667 ix86_setup_frame_addresses (void)
11668 {
11669 cfun->machine->accesses_prev_frame = 1;
11670 }
11671 \f
11672 #ifndef USE_HIDDEN_LINKONCE
11673 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
11674 # define USE_HIDDEN_LINKONCE 1
11675 # else
11676 # define USE_HIDDEN_LINKONCE 0
11677 # endif
11678 #endif
11679
11680 static int pic_labels_used;
11681
11682 /* Fills in the label name that should be used for a pc thunk for
11683 the given register. */
11684
11685 static void
11686 get_pc_thunk_name (char name[32], unsigned int regno)
11687 {
11688 gcc_assert (!TARGET_64BIT);
11689
11690 if (USE_HIDDEN_LINKONCE)
11691 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
11692 else
11693 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
11694 }
11695
11696
11697 /* This function generates code for -fpic that loads %ebx with
11698 the return address of the caller and then returns. */
11699
11700 static void
11701 ix86_code_end (void)
11702 {
11703 rtx xops[2];
11704 int regno;
11705
11706 for (regno = AX_REG; regno <= SP_REG; regno++)
11707 {
11708 char name[32];
11709 tree decl;
11710
11711 if (!(pic_labels_used & (1 << regno)))
11712 continue;
11713
11714 get_pc_thunk_name (name, regno);
11715
11716 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11717 get_identifier (name),
11718 build_function_type_list (void_type_node, NULL_TREE));
11719 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11720 NULL_TREE, void_type_node);
11721 TREE_PUBLIC (decl) = 1;
11722 TREE_STATIC (decl) = 1;
11723 DECL_IGNORED_P (decl) = 1;
11724
11725 #if TARGET_MACHO
11726 if (TARGET_MACHO)
11727 {
11728 switch_to_section (darwin_sections[text_coal_section]);
11729 fputs ("\t.weak_definition\t", asm_out_file);
11730 assemble_name (asm_out_file, name);
11731 fputs ("\n\t.private_extern\t", asm_out_file);
11732 assemble_name (asm_out_file, name);
11733 putc ('\n', asm_out_file);
11734 ASM_OUTPUT_LABEL (asm_out_file, name);
11735 DECL_WEAK (decl) = 1;
11736 }
11737 else
11738 #endif
11739 if (USE_HIDDEN_LINKONCE)
11740 {
11741 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11742
11743 targetm.asm_out.unique_section (decl, 0);
11744 switch_to_section (get_named_section (decl, NULL, 0));
11745
11746 targetm.asm_out.globalize_label (asm_out_file, name);
11747 fputs ("\t.hidden\t", asm_out_file);
11748 assemble_name (asm_out_file, name);
11749 putc ('\n', asm_out_file);
11750 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11751 }
11752 else
11753 {
11754 switch_to_section (text_section);
11755 ASM_OUTPUT_LABEL (asm_out_file, name);
11756 }
11757
11758 DECL_INITIAL (decl) = make_node (BLOCK);
11759 current_function_decl = decl;
11760 allocate_struct_function (decl, false);
11761 init_function_start (decl);
11762 first_function_block_is_cold = false;
11763 /* Make sure unwind info is emitted for the thunk if needed. */
11764 final_start_function (emit_barrier (), asm_out_file, 1);
11765
11766 /* Pad stack IP move with 4 instructions (two NOPs count
11767 as one instruction). */
11768 if (TARGET_PAD_SHORT_FUNCTION)
11769 {
11770 int i = 8;
11771
11772 while (i--)
11773 fputs ("\tnop\n", asm_out_file);
11774 }
11775
11776 xops[0] = gen_rtx_REG (Pmode, regno);
11777 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11778 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11779 output_asm_insn ("%!ret", NULL);
11780 final_end_function ();
11781 init_insn_lengths ();
11782 free_after_compilation (cfun);
11783 set_cfun (NULL);
11784 current_function_decl = NULL;
11785 }
11786
11787 if (flag_split_stack)
11788 file_end_indicate_split_stack ();
11789 }
11790
11791 /* Emit code for the SET_GOT patterns. */
11792
11793 const char *
11794 output_set_got (rtx dest, rtx label)
11795 {
11796 rtx xops[3];
11797
11798 xops[0] = dest;
11799
11800 if (TARGET_VXWORKS_RTP && flag_pic)
11801 {
11802 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11803 xops[2] = gen_rtx_MEM (Pmode,
11804 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11805 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11806
11807 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11808 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11809 an unadorned address. */
11810 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11811 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11812 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11813 return "";
11814 }
11815
11816 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11817
11818 if (flag_pic)
11819 {
11820 char name[32];
11821 get_pc_thunk_name (name, REGNO (dest));
11822 pic_labels_used |= 1 << REGNO (dest);
11823
11824 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11825 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11826 output_asm_insn ("%!call\t%X2", xops);
11827
11828 #if TARGET_MACHO
11829 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11830 This is what will be referenced by the Mach-O PIC subsystem. */
11831 if (machopic_should_output_picbase_label () || !label)
11832 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11833
11834 /* When we are restoring the pic base at the site of a nonlocal label,
11835 and we decided to emit the pic base above, we will still output a
11836 local label used for calculating the correction offset (even though
11837 the offset will be 0 in that case). */
11838 if (label)
11839 targetm.asm_out.internal_label (asm_out_file, "L",
11840 CODE_LABEL_NUMBER (label));
11841 #endif
11842 }
11843 else
11844 {
11845 if (TARGET_MACHO)
11846 /* We don't need a pic base, we're not producing pic. */
11847 gcc_unreachable ();
11848
11849 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11850 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11851 targetm.asm_out.internal_label (asm_out_file, "L",
11852 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11853 }
11854
11855 if (!TARGET_MACHO)
11856 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11857
11858 return "";
11859 }
11860
11861 /* Generate an "push" pattern for input ARG. */
11862
11863 static rtx
11864 gen_push (rtx arg)
11865 {
11866 struct machine_function *m = cfun->machine;
11867
11868 if (m->fs.cfa_reg == stack_pointer_rtx)
11869 m->fs.cfa_offset += UNITS_PER_WORD;
11870 m->fs.sp_offset += UNITS_PER_WORD;
11871
11872 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11873 arg = gen_rtx_REG (word_mode, REGNO (arg));
11874
11875 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11876 gen_rtx_PRE_DEC (Pmode,
11877 stack_pointer_rtx)),
11878 arg);
11879 }
11880
11881 /* Generate an "pop" pattern for input ARG. */
11882
11883 static rtx
11884 gen_pop (rtx arg)
11885 {
11886 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11887 arg = gen_rtx_REG (word_mode, REGNO (arg));
11888
11889 return gen_rtx_SET (arg,
11890 gen_rtx_MEM (word_mode,
11891 gen_rtx_POST_INC (Pmode,
11892 stack_pointer_rtx)));
11893 }
11894
11895 /* Return >= 0 if there is an unused call-clobbered register available
11896 for the entire function. */
11897
11898 static unsigned int
11899 ix86_select_alt_pic_regnum (void)
11900 {
11901 if (ix86_use_pseudo_pic_reg ())
11902 return INVALID_REGNUM;
11903
11904 if (crtl->is_leaf
11905 && !crtl->profile
11906 && !ix86_current_function_calls_tls_descriptor)
11907 {
11908 int i, drap;
11909 /* Can't use the same register for both PIC and DRAP. */
11910 if (crtl->drap_reg)
11911 drap = REGNO (crtl->drap_reg);
11912 else
11913 drap = -1;
11914 for (i = 2; i >= 0; --i)
11915 if (i != drap && !df_regs_ever_live_p (i))
11916 return i;
11917 }
11918
11919 return INVALID_REGNUM;
11920 }
11921
11922 /* Return true if REGNO is used by the epilogue. */
11923
11924 bool
11925 ix86_epilogue_uses (int regno)
11926 {
11927 /* If there are no caller-saved registers, we preserve all registers,
11928 except for MMX and x87 registers which aren't supported when saving
11929 and restoring registers. Don't explicitly save SP register since
11930 it is always preserved. */
11931 return (epilogue_completed
11932 && cfun->machine->no_caller_saved_registers
11933 && !fixed_regs[regno]
11934 && !STACK_REGNO_P (regno)
11935 && !MMX_REGNO_P (regno));
11936 }
11937
11938 /* Return nonzero if register REGNO can be used as a scratch register
11939 in peephole2. */
11940
11941 static bool
11942 ix86_hard_regno_scratch_ok (unsigned int regno)
11943 {
11944 /* If there are no caller-saved registers, we can't use any register
11945 as a scratch register after epilogue and use REGNO as scratch
11946 register only if it has been used before to avoid saving and
11947 restoring it. */
11948 return (!cfun->machine->no_caller_saved_registers
11949 || (!epilogue_completed
11950 && df_regs_ever_live_p (regno)));
11951 }
11952
11953 /* Return TRUE if we need to save REGNO. */
11954
11955 static bool
11956 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11957 {
11958 /* If there are no caller-saved registers, we preserve all registers,
11959 except for MMX and x87 registers which aren't supported when saving
11960 and restoring registers. Don't explicitly save SP register since
11961 it is always preserved. */
11962 if (cfun->machine->no_caller_saved_registers)
11963 {
11964 /* Don't preserve registers used for function return value. */
11965 rtx reg = crtl->return_rtx;
11966 if (reg)
11967 {
11968 unsigned int i = REGNO (reg);
11969 unsigned int nregs = hard_regno_nregs[i][GET_MODE (reg)];
11970 while (nregs-- > 0)
11971 if ((i + nregs) == regno)
11972 return false;
11973
11974 reg = crtl->return_bnd;
11975 if (reg)
11976 {
11977 i = REGNO (reg);
11978 nregs = hard_regno_nregs[i][GET_MODE (reg)];
11979 while (nregs-- > 0)
11980 if ((i + nregs) == regno)
11981 return false;
11982 }
11983 }
11984
11985 return (df_regs_ever_live_p (regno)
11986 && !fixed_regs[regno]
11987 && !STACK_REGNO_P (regno)
11988 && !MMX_REGNO_P (regno)
11989 && (regno != HARD_FRAME_POINTER_REGNUM
11990 || !frame_pointer_needed));
11991 }
11992
11993 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11994 && pic_offset_table_rtx)
11995 {
11996 if (ix86_use_pseudo_pic_reg ())
11997 {
11998 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11999 _mcount in prologue. */
12000 if (!TARGET_64BIT && flag_pic && crtl->profile)
12001 return true;
12002 }
12003 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
12004 || crtl->profile
12005 || crtl->calls_eh_return
12006 || crtl->uses_const_pool
12007 || cfun->has_nonlocal_label)
12008 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
12009 }
12010
12011 if (crtl->calls_eh_return && maybe_eh_return)
12012 {
12013 unsigned i;
12014 for (i = 0; ; i++)
12015 {
12016 unsigned test = EH_RETURN_DATA_REGNO (i);
12017 if (test == INVALID_REGNUM)
12018 break;
12019 if (test == regno)
12020 return true;
12021 }
12022 }
12023
12024 if (crtl->drap_reg
12025 && regno == REGNO (crtl->drap_reg)
12026 && !cfun->machine->no_drap_save_restore)
12027 return true;
12028
12029 return (df_regs_ever_live_p (regno)
12030 && !call_used_regs[regno]
12031 && !fixed_regs[regno]
12032 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
12033 }
12034
12035 /* Return number of saved general prupose registers. */
12036
12037 static int
12038 ix86_nsaved_regs (void)
12039 {
12040 int nregs = 0;
12041 int regno;
12042
12043 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12044 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
12045 nregs ++;
12046 return nregs;
12047 }
12048
12049 /* Return number of saved SSE registers. */
12050
12051 static int
12052 ix86_nsaved_sseregs (void)
12053 {
12054 int nregs = 0;
12055 int regno;
12056
12057 if (!TARGET_64BIT_MS_ABI)
12058 return 0;
12059 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12060 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
12061 nregs ++;
12062 return nregs;
12063 }
12064
12065 /* Given FROM and TO register numbers, say whether this elimination is
12066 allowed. If stack alignment is needed, we can only replace argument
12067 pointer with hard frame pointer, or replace frame pointer with stack
12068 pointer. Otherwise, frame pointer elimination is automatically
12069 handled and all other eliminations are valid. */
12070
12071 static bool
12072 ix86_can_eliminate (const int from, const int to)
12073 {
12074 if (stack_realign_fp)
12075 return ((from == ARG_POINTER_REGNUM
12076 && to == HARD_FRAME_POINTER_REGNUM)
12077 || (from == FRAME_POINTER_REGNUM
12078 && to == STACK_POINTER_REGNUM));
12079 else
12080 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
12081 }
12082
12083 /* Return the offset between two registers, one to be eliminated, and the other
12084 its replacement, at the start of a routine. */
12085
12086 HOST_WIDE_INT
12087 ix86_initial_elimination_offset (int from, int to)
12088 {
12089 struct ix86_frame frame;
12090 ix86_compute_frame_layout (&frame);
12091
12092 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
12093 return frame.hard_frame_pointer_offset;
12094 else if (from == FRAME_POINTER_REGNUM
12095 && to == HARD_FRAME_POINTER_REGNUM)
12096 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
12097 else
12098 {
12099 gcc_assert (to == STACK_POINTER_REGNUM);
12100
12101 if (from == ARG_POINTER_REGNUM)
12102 return frame.stack_pointer_offset;
12103
12104 gcc_assert (from == FRAME_POINTER_REGNUM);
12105 return frame.stack_pointer_offset - frame.frame_pointer_offset;
12106 }
12107 }
12108
12109 /* In a dynamically-aligned function, we can't know the offset from
12110 stack pointer to frame pointer, so we must ensure that setjmp
12111 eliminates fp against the hard fp (%ebp) rather than trying to
12112 index from %esp up to the top of the frame across a gap that is
12113 of unknown (at compile-time) size. */
12114 static rtx
12115 ix86_builtin_setjmp_frame_value (void)
12116 {
12117 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
12118 }
12119
12120 /* When using -fsplit-stack, the allocation routines set a field in
12121 the TCB to the bottom of the stack plus this much space, measured
12122 in bytes. */
12123
12124 #define SPLIT_STACK_AVAILABLE 256
12125
12126 /* Fill structure ix86_frame about frame of currently computed function. */
12127
12128 static void
12129 ix86_compute_frame_layout (struct ix86_frame *frame)
12130 {
12131 unsigned HOST_WIDE_INT stack_alignment_needed;
12132 HOST_WIDE_INT offset;
12133 unsigned HOST_WIDE_INT preferred_alignment;
12134 HOST_WIDE_INT size = get_frame_size ();
12135 HOST_WIDE_INT to_allocate;
12136
12137 frame->nregs = ix86_nsaved_regs ();
12138 frame->nsseregs = ix86_nsaved_sseregs ();
12139
12140 /* 64-bit MS ABI seem to require stack alignment to be always 16,
12141 except for function prologues, leaf functions and when the defult
12142 incoming stack boundary is overriden at command line or via
12143 force_align_arg_pointer attribute. */
12144 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
12145 && (!crtl->is_leaf || cfun->calls_alloca != 0
12146 || ix86_current_function_calls_tls_descriptor
12147 || ix86_incoming_stack_boundary < 128))
12148 {
12149 crtl->preferred_stack_boundary = 128;
12150 crtl->stack_alignment_needed = 128;
12151 }
12152
12153 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
12154 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
12155
12156 gcc_assert (!size || stack_alignment_needed);
12157 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
12158 gcc_assert (preferred_alignment <= stack_alignment_needed);
12159
12160 /* For SEH we have to limit the amount of code movement into the prologue.
12161 At present we do this via a BLOCKAGE, at which point there's very little
12162 scheduling that can be done, which means that there's very little point
12163 in doing anything except PUSHs. */
12164 if (TARGET_SEH)
12165 cfun->machine->use_fast_prologue_epilogue = false;
12166
12167 /* During reload iteration the amount of registers saved can change.
12168 Recompute the value as needed. Do not recompute when amount of registers
12169 didn't change as reload does multiple calls to the function and does not
12170 expect the decision to change within single iteration. */
12171 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
12172 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
12173 {
12174 int count = frame->nregs;
12175 struct cgraph_node *node = cgraph_node::get (current_function_decl);
12176
12177 cfun->machine->use_fast_prologue_epilogue_nregs = count;
12178
12179 /* The fast prologue uses move instead of push to save registers. This
12180 is significantly longer, but also executes faster as modern hardware
12181 can execute the moves in parallel, but can't do that for push/pop.
12182
12183 Be careful about choosing what prologue to emit: When function takes
12184 many instructions to execute we may use slow version as well as in
12185 case function is known to be outside hot spot (this is known with
12186 feedback only). Weight the size of function by number of registers
12187 to save as it is cheap to use one or two push instructions but very
12188 slow to use many of them. */
12189 if (count)
12190 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
12191 if (node->frequency < NODE_FREQUENCY_NORMAL
12192 || (flag_branch_probabilities
12193 && node->frequency < NODE_FREQUENCY_HOT))
12194 cfun->machine->use_fast_prologue_epilogue = false;
12195 else
12196 cfun->machine->use_fast_prologue_epilogue
12197 = !expensive_function_p (count);
12198 }
12199
12200 frame->save_regs_using_mov
12201 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
12202 /* If static stack checking is enabled and done with probes,
12203 the registers need to be saved before allocating the frame. */
12204 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
12205
12206 /* Skip return address. */
12207 offset = UNITS_PER_WORD;
12208
12209 /* Skip pushed static chain. */
12210 if (ix86_static_chain_on_stack)
12211 offset += UNITS_PER_WORD;
12212
12213 /* Skip saved base pointer. */
12214 if (frame_pointer_needed)
12215 offset += UNITS_PER_WORD;
12216 frame->hfp_save_offset = offset;
12217
12218 /* The traditional frame pointer location is at the top of the frame. */
12219 frame->hard_frame_pointer_offset = offset;
12220
12221 /* Register save area */
12222 offset += frame->nregs * UNITS_PER_WORD;
12223 frame->reg_save_offset = offset;
12224
12225 /* On SEH target, registers are pushed just before the frame pointer
12226 location. */
12227 if (TARGET_SEH)
12228 frame->hard_frame_pointer_offset = offset;
12229
12230 /* Align and set SSE register save area. */
12231 if (frame->nsseregs)
12232 {
12233 /* The only ABI that has saved SSE registers (Win64) also has a
12234 16-byte aligned default stack, and thus we don't need to be
12235 within the re-aligned local stack frame to save them. In case
12236 incoming stack boundary is aligned to less than 16 bytes,
12237 unaligned move of SSE register will be emitted, so there is
12238 no point to round up the SSE register save area outside the
12239 re-aligned local stack frame to 16 bytes. */
12240 if (ix86_incoming_stack_boundary >= 128)
12241 offset = ROUND_UP (offset, 16);
12242 offset += frame->nsseregs * 16;
12243 }
12244 frame->sse_reg_save_offset = offset;
12245
12246 /* The re-aligned stack starts here. Values before this point are not
12247 directly comparable with values below this point. In order to make
12248 sure that no value happens to be the same before and after, force
12249 the alignment computation below to add a non-zero value. */
12250 if (stack_realign_fp)
12251 offset = ROUND_UP (offset, stack_alignment_needed);
12252
12253 /* Va-arg area */
12254 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
12255 offset += frame->va_arg_size;
12256
12257 /* Align start of frame for local function. */
12258 if (stack_realign_fp
12259 || offset != frame->sse_reg_save_offset
12260 || size != 0
12261 || !crtl->is_leaf
12262 || cfun->calls_alloca
12263 || ix86_current_function_calls_tls_descriptor)
12264 offset = ROUND_UP (offset, stack_alignment_needed);
12265
12266 /* Frame pointer points here. */
12267 frame->frame_pointer_offset = offset;
12268
12269 offset += size;
12270
12271 /* Add outgoing arguments area. Can be skipped if we eliminated
12272 all the function calls as dead code.
12273 Skipping is however impossible when function calls alloca. Alloca
12274 expander assumes that last crtl->outgoing_args_size
12275 of stack frame are unused. */
12276 if (ACCUMULATE_OUTGOING_ARGS
12277 && (!crtl->is_leaf || cfun->calls_alloca
12278 || ix86_current_function_calls_tls_descriptor))
12279 {
12280 offset += crtl->outgoing_args_size;
12281 frame->outgoing_arguments_size = crtl->outgoing_args_size;
12282 }
12283 else
12284 frame->outgoing_arguments_size = 0;
12285
12286 /* Align stack boundary. Only needed if we're calling another function
12287 or using alloca. */
12288 if (!crtl->is_leaf || cfun->calls_alloca
12289 || ix86_current_function_calls_tls_descriptor)
12290 offset = ROUND_UP (offset, preferred_alignment);
12291
12292 /* We've reached end of stack frame. */
12293 frame->stack_pointer_offset = offset;
12294
12295 /* Size prologue needs to allocate. */
12296 to_allocate = offset - frame->sse_reg_save_offset;
12297
12298 if ((!to_allocate && frame->nregs <= 1)
12299 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)))
12300 frame->save_regs_using_mov = false;
12301
12302 if (ix86_using_red_zone ()
12303 && crtl->sp_is_unchanging
12304 && crtl->is_leaf
12305 && !ix86_pc_thunk_call_expanded
12306 && !ix86_current_function_calls_tls_descriptor)
12307 {
12308 frame->red_zone_size = to_allocate;
12309 if (frame->save_regs_using_mov)
12310 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
12311 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
12312 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
12313 }
12314 else
12315 frame->red_zone_size = 0;
12316 frame->stack_pointer_offset -= frame->red_zone_size;
12317
12318 /* The SEH frame pointer location is near the bottom of the frame.
12319 This is enforced by the fact that the difference between the
12320 stack pointer and the frame pointer is limited to 240 bytes in
12321 the unwind data structure. */
12322 if (TARGET_SEH)
12323 {
12324 HOST_WIDE_INT diff;
12325
12326 /* If we can leave the frame pointer where it is, do so. Also, returns
12327 the establisher frame for __builtin_frame_address (0). */
12328 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
12329 if (diff <= SEH_MAX_FRAME_SIZE
12330 && (diff > 240 || (diff & 15) != 0)
12331 && !crtl->accesses_prior_frames)
12332 {
12333 /* Ideally we'd determine what portion of the local stack frame
12334 (within the constraint of the lowest 240) is most heavily used.
12335 But without that complication, simply bias the frame pointer
12336 by 128 bytes so as to maximize the amount of the local stack
12337 frame that is addressable with 8-bit offsets. */
12338 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
12339 }
12340 }
12341 }
12342
12343 /* This is semi-inlined memory_address_length, but simplified
12344 since we know that we're always dealing with reg+offset, and
12345 to avoid having to create and discard all that rtl. */
12346
12347 static inline int
12348 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
12349 {
12350 int len = 4;
12351
12352 if (offset == 0)
12353 {
12354 /* EBP and R13 cannot be encoded without an offset. */
12355 len = (regno == BP_REG || regno == R13_REG);
12356 }
12357 else if (IN_RANGE (offset, -128, 127))
12358 len = 1;
12359
12360 /* ESP and R12 must be encoded with a SIB byte. */
12361 if (regno == SP_REG || regno == R12_REG)
12362 len++;
12363
12364 return len;
12365 }
12366
12367 /* Return an RTX that points to CFA_OFFSET within the stack frame.
12368 The valid base registers are taken from CFUN->MACHINE->FS. */
12369
12370 static rtx
12371 choose_baseaddr (HOST_WIDE_INT cfa_offset)
12372 {
12373 const struct machine_function *m = cfun->machine;
12374 rtx base_reg = NULL;
12375 HOST_WIDE_INT base_offset = 0;
12376
12377 if (m->use_fast_prologue_epilogue)
12378 {
12379 /* Choose the base register most likely to allow the most scheduling
12380 opportunities. Generally FP is valid throughout the function,
12381 while DRAP must be reloaded within the epilogue. But choose either
12382 over the SP due to increased encoding size. */
12383
12384 if (m->fs.fp_valid)
12385 {
12386 base_reg = hard_frame_pointer_rtx;
12387 base_offset = m->fs.fp_offset - cfa_offset;
12388 }
12389 else if (m->fs.drap_valid)
12390 {
12391 base_reg = crtl->drap_reg;
12392 base_offset = 0 - cfa_offset;
12393 }
12394 else if (m->fs.sp_valid)
12395 {
12396 base_reg = stack_pointer_rtx;
12397 base_offset = m->fs.sp_offset - cfa_offset;
12398 }
12399 }
12400 else
12401 {
12402 HOST_WIDE_INT toffset;
12403 int len = 16, tlen;
12404
12405 /* Choose the base register with the smallest address encoding.
12406 With a tie, choose FP > DRAP > SP. */
12407 if (m->fs.sp_valid)
12408 {
12409 base_reg = stack_pointer_rtx;
12410 base_offset = m->fs.sp_offset - cfa_offset;
12411 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
12412 }
12413 if (m->fs.drap_valid)
12414 {
12415 toffset = 0 - cfa_offset;
12416 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
12417 if (tlen <= len)
12418 {
12419 base_reg = crtl->drap_reg;
12420 base_offset = toffset;
12421 len = tlen;
12422 }
12423 }
12424 if (m->fs.fp_valid)
12425 {
12426 toffset = m->fs.fp_offset - cfa_offset;
12427 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
12428 if (tlen <= len)
12429 {
12430 base_reg = hard_frame_pointer_rtx;
12431 base_offset = toffset;
12432 len = tlen;
12433 }
12434 }
12435 }
12436 gcc_assert (base_reg != NULL);
12437
12438 return plus_constant (Pmode, base_reg, base_offset);
12439 }
12440
12441 /* Emit code to save registers in the prologue. */
12442
12443 static void
12444 ix86_emit_save_regs (void)
12445 {
12446 unsigned int regno;
12447 rtx_insn *insn;
12448
12449 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
12450 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
12451 {
12452 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
12453 RTX_FRAME_RELATED_P (insn) = 1;
12454 }
12455 }
12456
12457 /* Emit a single register save at CFA - CFA_OFFSET. */
12458
12459 static void
12460 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
12461 HOST_WIDE_INT cfa_offset)
12462 {
12463 struct machine_function *m = cfun->machine;
12464 rtx reg = gen_rtx_REG (mode, regno);
12465 rtx mem, addr, base, insn;
12466 unsigned int align;
12467
12468 addr = choose_baseaddr (cfa_offset);
12469 mem = gen_frame_mem (mode, addr);
12470
12471 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
12472 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
12473 set_mem_align (mem, align);
12474
12475 insn = emit_insn (gen_rtx_SET (mem, reg));
12476 RTX_FRAME_RELATED_P (insn) = 1;
12477
12478 base = addr;
12479 if (GET_CODE (base) == PLUS)
12480 base = XEXP (base, 0);
12481 gcc_checking_assert (REG_P (base));
12482
12483 /* When saving registers into a re-aligned local stack frame, avoid
12484 any tricky guessing by dwarf2out. */
12485 if (m->fs.realigned)
12486 {
12487 gcc_checking_assert (stack_realign_drap);
12488
12489 if (regno == REGNO (crtl->drap_reg))
12490 {
12491 /* A bit of a hack. We force the DRAP register to be saved in
12492 the re-aligned stack frame, which provides us with a copy
12493 of the CFA that will last past the prologue. Install it. */
12494 gcc_checking_assert (cfun->machine->fs.fp_valid);
12495 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12496 cfun->machine->fs.fp_offset - cfa_offset);
12497 mem = gen_rtx_MEM (mode, addr);
12498 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
12499 }
12500 else
12501 {
12502 /* The frame pointer is a stable reference within the
12503 aligned frame. Use it. */
12504 gcc_checking_assert (cfun->machine->fs.fp_valid);
12505 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12506 cfun->machine->fs.fp_offset - cfa_offset);
12507 mem = gen_rtx_MEM (mode, addr);
12508 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12509 }
12510 }
12511
12512 /* The memory may not be relative to the current CFA register,
12513 which means that we may need to generate a new pattern for
12514 use by the unwind info. */
12515 else if (base != m->fs.cfa_reg)
12516 {
12517 addr = plus_constant (Pmode, m->fs.cfa_reg,
12518 m->fs.cfa_offset - cfa_offset);
12519 mem = gen_rtx_MEM (mode, addr);
12520 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
12521 }
12522 }
12523
12524 /* Emit code to save registers using MOV insns.
12525 First register is stored at CFA - CFA_OFFSET. */
12526 static void
12527 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
12528 {
12529 unsigned int regno;
12530
12531 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12532 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
12533 {
12534 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
12535 cfa_offset -= UNITS_PER_WORD;
12536 }
12537 }
12538
12539 /* Emit code to save SSE registers using MOV insns.
12540 First register is stored at CFA - CFA_OFFSET. */
12541 static void
12542 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
12543 {
12544 unsigned int regno;
12545
12546 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12547 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
12548 {
12549 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
12550 cfa_offset -= GET_MODE_SIZE (V4SFmode);
12551 }
12552 }
12553
12554 static GTY(()) rtx queued_cfa_restores;
12555
12556 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
12557 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
12558 Don't add the note if the previously saved value will be left untouched
12559 within stack red-zone till return, as unwinders can find the same value
12560 in the register and on the stack. */
12561
12562 static void
12563 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
12564 {
12565 if (!crtl->shrink_wrapped
12566 && cfa_offset <= cfun->machine->fs.red_zone_offset)
12567 return;
12568
12569 if (insn)
12570 {
12571 add_reg_note (insn, REG_CFA_RESTORE, reg);
12572 RTX_FRAME_RELATED_P (insn) = 1;
12573 }
12574 else
12575 queued_cfa_restores
12576 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
12577 }
12578
12579 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
12580
12581 static void
12582 ix86_add_queued_cfa_restore_notes (rtx insn)
12583 {
12584 rtx last;
12585 if (!queued_cfa_restores)
12586 return;
12587 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
12588 ;
12589 XEXP (last, 1) = REG_NOTES (insn);
12590 REG_NOTES (insn) = queued_cfa_restores;
12591 queued_cfa_restores = NULL_RTX;
12592 RTX_FRAME_RELATED_P (insn) = 1;
12593 }
12594
12595 /* Expand prologue or epilogue stack adjustment.
12596 The pattern exist to put a dependency on all ebp-based memory accesses.
12597 STYLE should be negative if instructions should be marked as frame related,
12598 zero if %r11 register is live and cannot be freely used and positive
12599 otherwise. */
12600
12601 static void
12602 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
12603 int style, bool set_cfa)
12604 {
12605 struct machine_function *m = cfun->machine;
12606 rtx insn;
12607 bool add_frame_related_expr = false;
12608
12609 if (Pmode == SImode)
12610 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
12611 else if (x86_64_immediate_operand (offset, DImode))
12612 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
12613 else
12614 {
12615 rtx tmp;
12616 /* r11 is used by indirect sibcall return as well, set before the
12617 epilogue and used after the epilogue. */
12618 if (style)
12619 tmp = gen_rtx_REG (DImode, R11_REG);
12620 else
12621 {
12622 gcc_assert (src != hard_frame_pointer_rtx
12623 && dest != hard_frame_pointer_rtx);
12624 tmp = hard_frame_pointer_rtx;
12625 }
12626 insn = emit_insn (gen_rtx_SET (tmp, offset));
12627 if (style < 0)
12628 add_frame_related_expr = true;
12629
12630 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
12631 }
12632
12633 insn = emit_insn (insn);
12634 if (style >= 0)
12635 ix86_add_queued_cfa_restore_notes (insn);
12636
12637 if (set_cfa)
12638 {
12639 rtx r;
12640
12641 gcc_assert (m->fs.cfa_reg == src);
12642 m->fs.cfa_offset += INTVAL (offset);
12643 m->fs.cfa_reg = dest;
12644
12645 r = gen_rtx_PLUS (Pmode, src, offset);
12646 r = gen_rtx_SET (dest, r);
12647 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
12648 RTX_FRAME_RELATED_P (insn) = 1;
12649 }
12650 else if (style < 0)
12651 {
12652 RTX_FRAME_RELATED_P (insn) = 1;
12653 if (add_frame_related_expr)
12654 {
12655 rtx r = gen_rtx_PLUS (Pmode, src, offset);
12656 r = gen_rtx_SET (dest, r);
12657 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
12658 }
12659 }
12660
12661 if (dest == stack_pointer_rtx)
12662 {
12663 HOST_WIDE_INT ooffset = m->fs.sp_offset;
12664 bool valid = m->fs.sp_valid;
12665
12666 if (src == hard_frame_pointer_rtx)
12667 {
12668 valid = m->fs.fp_valid;
12669 ooffset = m->fs.fp_offset;
12670 }
12671 else if (src == crtl->drap_reg)
12672 {
12673 valid = m->fs.drap_valid;
12674 ooffset = 0;
12675 }
12676 else
12677 {
12678 /* Else there are two possibilities: SP itself, which we set
12679 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
12680 taken care of this by hand along the eh_return path. */
12681 gcc_checking_assert (src == stack_pointer_rtx
12682 || offset == const0_rtx);
12683 }
12684
12685 m->fs.sp_offset = ooffset - INTVAL (offset);
12686 m->fs.sp_valid = valid;
12687 }
12688 }
12689
12690 /* Find an available register to be used as dynamic realign argument
12691 pointer regsiter. Such a register will be written in prologue and
12692 used in begin of body, so it must not be
12693 1. parameter passing register.
12694 2. GOT pointer.
12695 We reuse static-chain register if it is available. Otherwise, we
12696 use DI for i386 and R13 for x86-64. We chose R13 since it has
12697 shorter encoding.
12698
12699 Return: the regno of chosen register. */
12700
12701 static unsigned int
12702 find_drap_reg (void)
12703 {
12704 tree decl = cfun->decl;
12705
12706 /* Always use callee-saved register if there are no caller-saved
12707 registers. */
12708 if (TARGET_64BIT)
12709 {
12710 /* Use R13 for nested function or function need static chain.
12711 Since function with tail call may use any caller-saved
12712 registers in epilogue, DRAP must not use caller-saved
12713 register in such case. */
12714 if (DECL_STATIC_CHAIN (decl)
12715 || cfun->machine->no_caller_saved_registers
12716 || crtl->tail_call_emit)
12717 return R13_REG;
12718
12719 return R10_REG;
12720 }
12721 else
12722 {
12723 /* Use DI for nested function or function need static chain.
12724 Since function with tail call may use any caller-saved
12725 registers in epilogue, DRAP must not use caller-saved
12726 register in such case. */
12727 if (DECL_STATIC_CHAIN (decl)
12728 || cfun->machine->no_caller_saved_registers
12729 || crtl->tail_call_emit)
12730 return DI_REG;
12731
12732 /* Reuse static chain register if it isn't used for parameter
12733 passing. */
12734 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
12735 {
12736 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
12737 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
12738 return CX_REG;
12739 }
12740 return DI_REG;
12741 }
12742 }
12743
12744 /* Handle a "force_align_arg_pointer" attribute. */
12745
12746 static tree
12747 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
12748 tree, int, bool *no_add_attrs)
12749 {
12750 if (TREE_CODE (*node) != FUNCTION_TYPE
12751 && TREE_CODE (*node) != METHOD_TYPE
12752 && TREE_CODE (*node) != FIELD_DECL
12753 && TREE_CODE (*node) != TYPE_DECL)
12754 {
12755 warning (OPT_Wattributes, "%qE attribute only applies to functions",
12756 name);
12757 *no_add_attrs = true;
12758 }
12759
12760 return NULL_TREE;
12761 }
12762
12763 /* Return minimum incoming stack alignment. */
12764
12765 static unsigned int
12766 ix86_minimum_incoming_stack_boundary (bool sibcall)
12767 {
12768 unsigned int incoming_stack_boundary;
12769
12770 /* Stack of interrupt handler is always aligned to MIN_STACK_BOUNDARY.
12771 */
12772 if (cfun->machine->func_type != TYPE_NORMAL)
12773 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12774 /* Prefer the one specified at command line. */
12775 else if (ix86_user_incoming_stack_boundary)
12776 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12777 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12778 if -mstackrealign is used, it isn't used for sibcall check and
12779 estimated stack alignment is 128bit. */
12780 else if (!sibcall
12781 && ix86_force_align_arg_pointer
12782 && crtl->stack_alignment_estimated == 128)
12783 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12784 else
12785 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12786
12787 /* Incoming stack alignment can be changed on individual functions
12788 via force_align_arg_pointer attribute. We use the smallest
12789 incoming stack boundary. */
12790 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12791 && lookup_attribute (ix86_force_align_arg_pointer_string,
12792 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12793 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12794
12795 /* The incoming stack frame has to be aligned at least at
12796 parm_stack_boundary. */
12797 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12798 incoming_stack_boundary = crtl->parm_stack_boundary;
12799
12800 /* Stack at entrance of main is aligned by runtime. We use the
12801 smallest incoming stack boundary. */
12802 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12803 && DECL_NAME (current_function_decl)
12804 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12805 && DECL_FILE_SCOPE_P (current_function_decl))
12806 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12807
12808 return incoming_stack_boundary;
12809 }
12810
12811 /* Update incoming stack boundary and estimated stack alignment. */
12812
12813 static void
12814 ix86_update_stack_boundary (void)
12815 {
12816 ix86_incoming_stack_boundary
12817 = ix86_minimum_incoming_stack_boundary (false);
12818
12819 /* x86_64 vararg needs 16byte stack alignment for register save
12820 area. */
12821 if (TARGET_64BIT
12822 && cfun->stdarg
12823 && crtl->stack_alignment_estimated < 128)
12824 crtl->stack_alignment_estimated = 128;
12825
12826 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12827 if (ix86_tls_descriptor_calls_expanded_in_cfun
12828 && crtl->preferred_stack_boundary < 128)
12829 crtl->preferred_stack_boundary = 128;
12830 }
12831
12832 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12833 needed or an rtx for DRAP otherwise. */
12834
12835 static rtx
12836 ix86_get_drap_rtx (void)
12837 {
12838 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12839 crtl->need_drap = true;
12840
12841 if (stack_realign_drap)
12842 {
12843 /* Assign DRAP to vDRAP and returns vDRAP */
12844 unsigned int regno = find_drap_reg ();
12845 rtx drap_vreg;
12846 rtx arg_ptr;
12847 rtx_insn *seq, *insn;
12848
12849 arg_ptr = gen_rtx_REG (Pmode, regno);
12850 crtl->drap_reg = arg_ptr;
12851
12852 start_sequence ();
12853 drap_vreg = copy_to_reg (arg_ptr);
12854 seq = get_insns ();
12855 end_sequence ();
12856
12857 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12858 if (!optimize)
12859 {
12860 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12861 RTX_FRAME_RELATED_P (insn) = 1;
12862 }
12863 return drap_vreg;
12864 }
12865 else
12866 return NULL;
12867 }
12868
12869 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12870
12871 static rtx
12872 ix86_internal_arg_pointer (void)
12873 {
12874 return virtual_incoming_args_rtx;
12875 }
12876
12877 struct scratch_reg {
12878 rtx reg;
12879 bool saved;
12880 };
12881
12882 /* Return a short-lived scratch register for use on function entry.
12883 In 32-bit mode, it is valid only after the registers are saved
12884 in the prologue. This register must be released by means of
12885 release_scratch_register_on_entry once it is dead. */
12886
12887 static void
12888 get_scratch_register_on_entry (struct scratch_reg *sr)
12889 {
12890 int regno;
12891
12892 sr->saved = false;
12893
12894 if (TARGET_64BIT)
12895 {
12896 /* We always use R11 in 64-bit mode. */
12897 regno = R11_REG;
12898 }
12899 else
12900 {
12901 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12902 bool fastcall_p
12903 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12904 bool thiscall_p
12905 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12906 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12907 int regparm = ix86_function_regparm (fntype, decl);
12908 int drap_regno
12909 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12910
12911 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12912 for the static chain register. */
12913 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12914 && drap_regno != AX_REG)
12915 regno = AX_REG;
12916 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12917 for the static chain register. */
12918 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12919 regno = AX_REG;
12920 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12921 regno = DX_REG;
12922 /* ecx is the static chain register. */
12923 else if (regparm < 3 && !fastcall_p && !thiscall_p
12924 && !static_chain_p
12925 && drap_regno != CX_REG)
12926 regno = CX_REG;
12927 else if (ix86_save_reg (BX_REG, true))
12928 regno = BX_REG;
12929 /* esi is the static chain register. */
12930 else if (!(regparm == 3 && static_chain_p)
12931 && ix86_save_reg (SI_REG, true))
12932 regno = SI_REG;
12933 else if (ix86_save_reg (DI_REG, true))
12934 regno = DI_REG;
12935 else
12936 {
12937 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12938 sr->saved = true;
12939 }
12940 }
12941
12942 sr->reg = gen_rtx_REG (Pmode, regno);
12943 if (sr->saved)
12944 {
12945 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12946 RTX_FRAME_RELATED_P (insn) = 1;
12947 }
12948 }
12949
12950 /* Release a scratch register obtained from the preceding function. */
12951
12952 static void
12953 release_scratch_register_on_entry (struct scratch_reg *sr)
12954 {
12955 if (sr->saved)
12956 {
12957 struct machine_function *m = cfun->machine;
12958 rtx x, insn = emit_insn (gen_pop (sr->reg));
12959
12960 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12961 RTX_FRAME_RELATED_P (insn) = 1;
12962 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12963 x = gen_rtx_SET (stack_pointer_rtx, x);
12964 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12965 m->fs.sp_offset -= UNITS_PER_WORD;
12966 }
12967 }
12968
12969 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12970
12971 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12972
12973 static void
12974 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12975 {
12976 /* We skip the probe for the first interval + a small dope of 4 words and
12977 probe that many bytes past the specified size to maintain a protection
12978 area at the botton of the stack. */
12979 const int dope = 4 * UNITS_PER_WORD;
12980 rtx size_rtx = GEN_INT (size), last;
12981
12982 /* See if we have a constant small number of probes to generate. If so,
12983 that's the easy case. The run-time loop is made up of 9 insns in the
12984 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12985 for n # of intervals. */
12986 if (size <= 4 * PROBE_INTERVAL)
12987 {
12988 HOST_WIDE_INT i, adjust;
12989 bool first_probe = true;
12990
12991 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12992 values of N from 1 until it exceeds SIZE. If only one probe is
12993 needed, this will not generate any code. Then adjust and probe
12994 to PROBE_INTERVAL + SIZE. */
12995 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12996 {
12997 if (first_probe)
12998 {
12999 adjust = 2 * PROBE_INTERVAL + dope;
13000 first_probe = false;
13001 }
13002 else
13003 adjust = PROBE_INTERVAL;
13004
13005 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13006 plus_constant (Pmode, stack_pointer_rtx,
13007 -adjust)));
13008 emit_stack_probe (stack_pointer_rtx);
13009 }
13010
13011 if (first_probe)
13012 adjust = size + PROBE_INTERVAL + dope;
13013 else
13014 adjust = size + PROBE_INTERVAL - i;
13015
13016 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13017 plus_constant (Pmode, stack_pointer_rtx,
13018 -adjust)));
13019 emit_stack_probe (stack_pointer_rtx);
13020
13021 /* Adjust back to account for the additional first interval. */
13022 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
13023 plus_constant (Pmode, stack_pointer_rtx,
13024 PROBE_INTERVAL + dope)));
13025 }
13026
13027 /* Otherwise, do the same as above, but in a loop. Note that we must be
13028 extra careful with variables wrapping around because we might be at
13029 the very top (or the very bottom) of the address space and we have
13030 to be able to handle this case properly; in particular, we use an
13031 equality test for the loop condition. */
13032 else
13033 {
13034 HOST_WIDE_INT rounded_size;
13035 struct scratch_reg sr;
13036
13037 get_scratch_register_on_entry (&sr);
13038
13039
13040 /* Step 1: round SIZE to the previous multiple of the interval. */
13041
13042 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
13043
13044
13045 /* Step 2: compute initial and final value of the loop counter. */
13046
13047 /* SP = SP_0 + PROBE_INTERVAL. */
13048 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13049 plus_constant (Pmode, stack_pointer_rtx,
13050 - (PROBE_INTERVAL + dope))));
13051
13052 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
13053 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
13054 emit_insn (gen_rtx_SET (sr.reg,
13055 plus_constant (Pmode, stack_pointer_rtx,
13056 -rounded_size)));
13057 else
13058 {
13059 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
13060 emit_insn (gen_rtx_SET (sr.reg,
13061 gen_rtx_PLUS (Pmode, sr.reg,
13062 stack_pointer_rtx)));
13063 }
13064
13065
13066 /* Step 3: the loop
13067
13068 do
13069 {
13070 SP = SP + PROBE_INTERVAL
13071 probe at SP
13072 }
13073 while (SP != LAST_ADDR)
13074
13075 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
13076 values of N from 1 until it is equal to ROUNDED_SIZE. */
13077
13078 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
13079
13080
13081 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
13082 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
13083
13084 if (size != rounded_size)
13085 {
13086 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13087 plus_constant (Pmode, stack_pointer_rtx,
13088 rounded_size - size)));
13089 emit_stack_probe (stack_pointer_rtx);
13090 }
13091
13092 /* Adjust back to account for the additional first interval. */
13093 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
13094 plus_constant (Pmode, stack_pointer_rtx,
13095 PROBE_INTERVAL + dope)));
13096
13097 release_scratch_register_on_entry (&sr);
13098 }
13099
13100 /* Even if the stack pointer isn't the CFA register, we need to correctly
13101 describe the adjustments made to it, in particular differentiate the
13102 frame-related ones from the frame-unrelated ones. */
13103 if (size > 0)
13104 {
13105 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
13106 XVECEXP (expr, 0, 0)
13107 = gen_rtx_SET (stack_pointer_rtx,
13108 plus_constant (Pmode, stack_pointer_rtx, -size));
13109 XVECEXP (expr, 0, 1)
13110 = gen_rtx_SET (stack_pointer_rtx,
13111 plus_constant (Pmode, stack_pointer_rtx,
13112 PROBE_INTERVAL + dope + size));
13113 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
13114 RTX_FRAME_RELATED_P (last) = 1;
13115
13116 cfun->machine->fs.sp_offset += size;
13117 }
13118
13119 /* Make sure nothing is scheduled before we are done. */
13120 emit_insn (gen_blockage ());
13121 }
13122
13123 /* Adjust the stack pointer up to REG while probing it. */
13124
13125 const char *
13126 output_adjust_stack_and_probe (rtx reg)
13127 {
13128 static int labelno = 0;
13129 char loop_lab[32];
13130 rtx xops[2];
13131
13132 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13133
13134 /* Loop. */
13135 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13136
13137 /* SP = SP + PROBE_INTERVAL. */
13138 xops[0] = stack_pointer_rtx;
13139 xops[1] = GEN_INT (PROBE_INTERVAL);
13140 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13141
13142 /* Probe at SP. */
13143 xops[1] = const0_rtx;
13144 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
13145
13146 /* Test if SP == LAST_ADDR. */
13147 xops[0] = stack_pointer_rtx;
13148 xops[1] = reg;
13149 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13150
13151 /* Branch. */
13152 fputs ("\tjne\t", asm_out_file);
13153 assemble_name_raw (asm_out_file, loop_lab);
13154 fputc ('\n', asm_out_file);
13155
13156 return "";
13157 }
13158
13159 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
13160 inclusive. These are offsets from the current stack pointer. */
13161
13162 static void
13163 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
13164 {
13165 /* See if we have a constant small number of probes to generate. If so,
13166 that's the easy case. The run-time loop is made up of 6 insns in the
13167 generic case while the compile-time loop is made up of n insns for n #
13168 of intervals. */
13169 if (size <= 6 * PROBE_INTERVAL)
13170 {
13171 HOST_WIDE_INT i;
13172
13173 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
13174 it exceeds SIZE. If only one probe is needed, this will not
13175 generate any code. Then probe at FIRST + SIZE. */
13176 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
13177 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13178 -(first + i)));
13179
13180 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13181 -(first + size)));
13182 }
13183
13184 /* Otherwise, do the same as above, but in a loop. Note that we must be
13185 extra careful with variables wrapping around because we might be at
13186 the very top (or the very bottom) of the address space and we have
13187 to be able to handle this case properly; in particular, we use an
13188 equality test for the loop condition. */
13189 else
13190 {
13191 HOST_WIDE_INT rounded_size, last;
13192 struct scratch_reg sr;
13193
13194 get_scratch_register_on_entry (&sr);
13195
13196
13197 /* Step 1: round SIZE to the previous multiple of the interval. */
13198
13199 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
13200
13201
13202 /* Step 2: compute initial and final value of the loop counter. */
13203
13204 /* TEST_OFFSET = FIRST. */
13205 emit_move_insn (sr.reg, GEN_INT (-first));
13206
13207 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
13208 last = first + rounded_size;
13209
13210
13211 /* Step 3: the loop
13212
13213 do
13214 {
13215 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
13216 probe at TEST_ADDR
13217 }
13218 while (TEST_ADDR != LAST_ADDR)
13219
13220 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
13221 until it is equal to ROUNDED_SIZE. */
13222
13223 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
13224
13225
13226 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
13227 that SIZE is equal to ROUNDED_SIZE. */
13228
13229 if (size != rounded_size)
13230 emit_stack_probe (plus_constant (Pmode,
13231 gen_rtx_PLUS (Pmode,
13232 stack_pointer_rtx,
13233 sr.reg),
13234 rounded_size - size));
13235
13236 release_scratch_register_on_entry (&sr);
13237 }
13238
13239 /* Make sure nothing is scheduled before we are done. */
13240 emit_insn (gen_blockage ());
13241 }
13242
13243 /* Probe a range of stack addresses from REG to END, inclusive. These are
13244 offsets from the current stack pointer. */
13245
13246 const char *
13247 output_probe_stack_range (rtx reg, rtx end)
13248 {
13249 static int labelno = 0;
13250 char loop_lab[32];
13251 rtx xops[3];
13252
13253 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13254
13255 /* Loop. */
13256 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13257
13258 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
13259 xops[0] = reg;
13260 xops[1] = GEN_INT (PROBE_INTERVAL);
13261 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13262
13263 /* Probe at TEST_ADDR. */
13264 xops[0] = stack_pointer_rtx;
13265 xops[1] = reg;
13266 xops[2] = const0_rtx;
13267 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
13268
13269 /* Test if TEST_ADDR == LAST_ADDR. */
13270 xops[0] = reg;
13271 xops[1] = end;
13272 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13273
13274 /* Branch. */
13275 fputs ("\tjne\t", asm_out_file);
13276 assemble_name_raw (asm_out_file, loop_lab);
13277 fputc ('\n', asm_out_file);
13278
13279 return "";
13280 }
13281
13282 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
13283 to be generated in correct form. */
13284 static void
13285 ix86_finalize_stack_realign_flags (void)
13286 {
13287 /* Check if stack realign is really needed after reload, and
13288 stores result in cfun */
13289 unsigned int incoming_stack_boundary
13290 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
13291 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
13292 unsigned int stack_realign
13293 = (incoming_stack_boundary
13294 < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
13295 ? crtl->max_used_stack_slot_alignment
13296 : crtl->stack_alignment_needed));
13297
13298 if (crtl->stack_realign_finalized)
13299 {
13300 /* After stack_realign_needed is finalized, we can't no longer
13301 change it. */
13302 gcc_assert (crtl->stack_realign_needed == stack_realign);
13303 return;
13304 }
13305
13306 /* If the only reason for frame_pointer_needed is that we conservatively
13307 assumed stack realignment might be needed, but in the end nothing that
13308 needed the stack alignment had been spilled, clear frame_pointer_needed
13309 and say we don't need stack realignment. */
13310 if (stack_realign
13311 && frame_pointer_needed
13312 && crtl->is_leaf
13313 && flag_omit_frame_pointer
13314 && crtl->sp_is_unchanging
13315 && !ix86_current_function_calls_tls_descriptor
13316 && !crtl->accesses_prior_frames
13317 && !cfun->calls_alloca
13318 && !crtl->calls_eh_return
13319 /* See ira_setup_eliminable_regset for the rationale. */
13320 && !(STACK_CHECK_MOVING_SP
13321 && flag_stack_check
13322 && flag_exceptions
13323 && cfun->can_throw_non_call_exceptions)
13324 && !ix86_frame_pointer_required ()
13325 && get_frame_size () == 0
13326 && ix86_nsaved_sseregs () == 0
13327 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
13328 {
13329 HARD_REG_SET set_up_by_prologue, prologue_used;
13330 basic_block bb;
13331
13332 CLEAR_HARD_REG_SET (prologue_used);
13333 CLEAR_HARD_REG_SET (set_up_by_prologue);
13334 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
13335 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
13336 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
13337 HARD_FRAME_POINTER_REGNUM);
13338 FOR_EACH_BB_FN (bb, cfun)
13339 {
13340 rtx_insn *insn;
13341 FOR_BB_INSNS (bb, insn)
13342 if (NONDEBUG_INSN_P (insn)
13343 && requires_stack_frame_p (insn, prologue_used,
13344 set_up_by_prologue))
13345 {
13346 crtl->stack_realign_needed = stack_realign;
13347 crtl->stack_realign_finalized = true;
13348 return;
13349 }
13350 }
13351
13352 /* If drap has been set, but it actually isn't live at the start
13353 of the function, there is no reason to set it up. */
13354 if (crtl->drap_reg)
13355 {
13356 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13357 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
13358 {
13359 crtl->drap_reg = NULL_RTX;
13360 crtl->need_drap = false;
13361 }
13362 }
13363 else
13364 cfun->machine->no_drap_save_restore = true;
13365
13366 frame_pointer_needed = false;
13367 stack_realign = false;
13368 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
13369 crtl->stack_alignment_needed = incoming_stack_boundary;
13370 crtl->stack_alignment_estimated = incoming_stack_boundary;
13371 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
13372 crtl->preferred_stack_boundary = incoming_stack_boundary;
13373 df_finish_pass (true);
13374 df_scan_alloc (NULL);
13375 df_scan_blocks ();
13376 df_compute_regs_ever_live (true);
13377 df_analyze ();
13378 }
13379
13380 crtl->stack_realign_needed = stack_realign;
13381 crtl->stack_realign_finalized = true;
13382 }
13383
13384 /* Delete SET_GOT right after entry block if it is allocated to reg. */
13385
13386 static void
13387 ix86_elim_entry_set_got (rtx reg)
13388 {
13389 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13390 rtx_insn *c_insn = BB_HEAD (bb);
13391 if (!NONDEBUG_INSN_P (c_insn))
13392 c_insn = next_nonnote_nondebug_insn (c_insn);
13393 if (c_insn && NONJUMP_INSN_P (c_insn))
13394 {
13395 rtx pat = PATTERN (c_insn);
13396 if (GET_CODE (pat) == PARALLEL)
13397 {
13398 rtx vec = XVECEXP (pat, 0, 0);
13399 if (GET_CODE (vec) == SET
13400 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
13401 && REGNO (XEXP (vec, 0)) == REGNO (reg))
13402 delete_insn (c_insn);
13403 }
13404 }
13405 }
13406
13407 /* Expand the prologue into a bunch of separate insns. */
13408
13409 void
13410 ix86_expand_prologue (void)
13411 {
13412 struct machine_function *m = cfun->machine;
13413 rtx insn, t;
13414 struct ix86_frame frame;
13415 HOST_WIDE_INT allocate;
13416 bool int_registers_saved;
13417 bool sse_registers_saved;
13418 rtx static_chain = NULL_RTX;
13419
13420 ix86_finalize_stack_realign_flags ();
13421
13422 /* DRAP should not coexist with stack_realign_fp */
13423 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
13424
13425 memset (&m->fs, 0, sizeof (m->fs));
13426
13427 /* Initialize CFA state for before the prologue. */
13428 m->fs.cfa_reg = stack_pointer_rtx;
13429 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
13430
13431 /* Track SP offset to the CFA. We continue tracking this after we've
13432 swapped the CFA register away from SP. In the case of re-alignment
13433 this is fudged; we're interested to offsets within the local frame. */
13434 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13435 m->fs.sp_valid = true;
13436
13437 ix86_compute_frame_layout (&frame);
13438
13439 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
13440 {
13441 /* We should have already generated an error for any use of
13442 ms_hook on a nested function. */
13443 gcc_checking_assert (!ix86_static_chain_on_stack);
13444
13445 /* Check if profiling is active and we shall use profiling before
13446 prologue variant. If so sorry. */
13447 if (crtl->profile && flag_fentry != 0)
13448 sorry ("ms_hook_prologue attribute isn%'t compatible "
13449 "with -mfentry for 32-bit");
13450
13451 /* In ix86_asm_output_function_label we emitted:
13452 8b ff movl.s %edi,%edi
13453 55 push %ebp
13454 8b ec movl.s %esp,%ebp
13455
13456 This matches the hookable function prologue in Win32 API
13457 functions in Microsoft Windows XP Service Pack 2 and newer.
13458 Wine uses this to enable Windows apps to hook the Win32 API
13459 functions provided by Wine.
13460
13461 What that means is that we've already set up the frame pointer. */
13462
13463 if (frame_pointer_needed
13464 && !(crtl->drap_reg && crtl->stack_realign_needed))
13465 {
13466 rtx push, mov;
13467
13468 /* We've decided to use the frame pointer already set up.
13469 Describe this to the unwinder by pretending that both
13470 push and mov insns happen right here.
13471
13472 Putting the unwind info here at the end of the ms_hook
13473 is done so that we can make absolutely certain we get
13474 the required byte sequence at the start of the function,
13475 rather than relying on an assembler that can produce
13476 the exact encoding required.
13477
13478 However it does mean (in the unpatched case) that we have
13479 a 1 insn window where the asynchronous unwind info is
13480 incorrect. However, if we placed the unwind info at
13481 its correct location we would have incorrect unwind info
13482 in the patched case. Which is probably all moot since
13483 I don't expect Wine generates dwarf2 unwind info for the
13484 system libraries that use this feature. */
13485
13486 insn = emit_insn (gen_blockage ());
13487
13488 push = gen_push (hard_frame_pointer_rtx);
13489 mov = gen_rtx_SET (hard_frame_pointer_rtx,
13490 stack_pointer_rtx);
13491 RTX_FRAME_RELATED_P (push) = 1;
13492 RTX_FRAME_RELATED_P (mov) = 1;
13493
13494 RTX_FRAME_RELATED_P (insn) = 1;
13495 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13496 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
13497
13498 /* Note that gen_push incremented m->fs.cfa_offset, even
13499 though we didn't emit the push insn here. */
13500 m->fs.cfa_reg = hard_frame_pointer_rtx;
13501 m->fs.fp_offset = m->fs.cfa_offset;
13502 m->fs.fp_valid = true;
13503 }
13504 else
13505 {
13506 /* The frame pointer is not needed so pop %ebp again.
13507 This leaves us with a pristine state. */
13508 emit_insn (gen_pop (hard_frame_pointer_rtx));
13509 }
13510 }
13511
13512 /* The first insn of a function that accepts its static chain on the
13513 stack is to push the register that would be filled in by a direct
13514 call. This insn will be skipped by the trampoline. */
13515 else if (ix86_static_chain_on_stack)
13516 {
13517 static_chain = ix86_static_chain (cfun->decl, false);
13518 insn = emit_insn (gen_push (static_chain));
13519 emit_insn (gen_blockage ());
13520
13521 /* We don't want to interpret this push insn as a register save,
13522 only as a stack adjustment. The real copy of the register as
13523 a save will be done later, if needed. */
13524 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
13525 t = gen_rtx_SET (stack_pointer_rtx, t);
13526 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
13527 RTX_FRAME_RELATED_P (insn) = 1;
13528 }
13529
13530 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13531 of DRAP is needed and stack realignment is really needed after reload */
13532 if (stack_realign_drap)
13533 {
13534 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13535
13536 /* Can't use DRAP in interrupt function. */
13537 if (cfun->machine->func_type != TYPE_NORMAL)
13538 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13539 "in interrupt service routine. This may be worked "
13540 "around by avoiding functions with aggregate return.");
13541
13542 /* Only need to push parameter pointer reg if it is caller saved. */
13543 if (!call_used_regs[REGNO (crtl->drap_reg)])
13544 {
13545 /* Push arg pointer reg */
13546 insn = emit_insn (gen_push (crtl->drap_reg));
13547 RTX_FRAME_RELATED_P (insn) = 1;
13548 }
13549
13550 /* Grab the argument pointer. */
13551 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13552 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13553 RTX_FRAME_RELATED_P (insn) = 1;
13554 m->fs.cfa_reg = crtl->drap_reg;
13555 m->fs.cfa_offset = 0;
13556
13557 /* Align the stack. */
13558 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13559 stack_pointer_rtx,
13560 GEN_INT (-align_bytes)));
13561 RTX_FRAME_RELATED_P (insn) = 1;
13562
13563 /* Replicate the return address on the stack so that return
13564 address can be reached via (argp - 1) slot. This is needed
13565 to implement macro RETURN_ADDR_RTX and intrinsic function
13566 expand_builtin_return_addr etc. */
13567 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13568 t = gen_frame_mem (word_mode, t);
13569 insn = emit_insn (gen_push (t));
13570 RTX_FRAME_RELATED_P (insn) = 1;
13571
13572 /* For the purposes of frame and register save area addressing,
13573 we've started over with a new frame. */
13574 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13575 m->fs.realigned = true;
13576
13577 if (static_chain)
13578 {
13579 /* Replicate static chain on the stack so that static chain
13580 can be reached via (argp - 2) slot. This is needed for
13581 nested function with stack realignment. */
13582 insn = emit_insn (gen_push (static_chain));
13583 RTX_FRAME_RELATED_P (insn) = 1;
13584 }
13585 }
13586
13587 int_registers_saved = (frame.nregs == 0);
13588 sse_registers_saved = (frame.nsseregs == 0);
13589
13590 if (frame_pointer_needed && !m->fs.fp_valid)
13591 {
13592 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13593 slower on all targets. Also sdb doesn't like it. */
13594 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13595 RTX_FRAME_RELATED_P (insn) = 1;
13596
13597 /* Push registers now, before setting the frame pointer
13598 on SEH target. */
13599 if (!int_registers_saved
13600 && TARGET_SEH
13601 && !frame.save_regs_using_mov)
13602 {
13603 ix86_emit_save_regs ();
13604 int_registers_saved = true;
13605 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13606 }
13607
13608 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13609 {
13610 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13611 RTX_FRAME_RELATED_P (insn) = 1;
13612
13613 if (m->fs.cfa_reg == stack_pointer_rtx)
13614 m->fs.cfa_reg = hard_frame_pointer_rtx;
13615 m->fs.fp_offset = m->fs.sp_offset;
13616 m->fs.fp_valid = true;
13617 }
13618 }
13619
13620 if (!int_registers_saved)
13621 {
13622 /* If saving registers via PUSH, do so now. */
13623 if (!frame.save_regs_using_mov)
13624 {
13625 ix86_emit_save_regs ();
13626 int_registers_saved = true;
13627 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13628 }
13629
13630 /* When using red zone we may start register saving before allocating
13631 the stack frame saving one cycle of the prologue. However, avoid
13632 doing this if we have to probe the stack; at least on x86_64 the
13633 stack probe can turn into a call that clobbers a red zone location. */
13634 else if (ix86_using_red_zone ()
13635 && (! TARGET_STACK_PROBE
13636 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13637 {
13638 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13639 int_registers_saved = true;
13640 }
13641 }
13642
13643 if (stack_realign_fp)
13644 {
13645 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13646 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13647
13648 /* The computation of the size of the re-aligned stack frame means
13649 that we must allocate the size of the register save area before
13650 performing the actual alignment. Otherwise we cannot guarantee
13651 that there's enough storage above the realignment point. */
13652 if (m->fs.sp_offset != frame.sse_reg_save_offset)
13653 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13654 GEN_INT (m->fs.sp_offset
13655 - frame.sse_reg_save_offset),
13656 -1, false);
13657
13658 /* Align the stack. */
13659 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13660 stack_pointer_rtx,
13661 GEN_INT (-align_bytes)));
13662
13663 /* For the purposes of register save area addressing, the stack
13664 pointer is no longer valid. As for the value of sp_offset,
13665 see ix86_compute_frame_layout, which we need to match in order
13666 to pass verification of stack_pointer_offset at the end. */
13667 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13668 m->fs.sp_valid = false;
13669 }
13670
13671 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13672
13673 if (flag_stack_usage_info)
13674 {
13675 /* We start to count from ARG_POINTER. */
13676 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13677
13678 /* If it was realigned, take into account the fake frame. */
13679 if (stack_realign_drap)
13680 {
13681 if (ix86_static_chain_on_stack)
13682 stack_size += UNITS_PER_WORD;
13683
13684 if (!call_used_regs[REGNO (crtl->drap_reg)])
13685 stack_size += UNITS_PER_WORD;
13686
13687 /* This over-estimates by 1 minimal-stack-alignment-unit but
13688 mitigates that by counting in the new return address slot. */
13689 current_function_dynamic_stack_size
13690 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13691 }
13692
13693 current_function_static_stack_size = stack_size;
13694 }
13695
13696 /* On SEH target with very large frame size, allocate an area to save
13697 SSE registers (as the very large allocation won't be described). */
13698 if (TARGET_SEH
13699 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13700 && !sse_registers_saved)
13701 {
13702 HOST_WIDE_INT sse_size =
13703 frame.sse_reg_save_offset - frame.reg_save_offset;
13704
13705 gcc_assert (int_registers_saved);
13706
13707 /* No need to do stack checking as the area will be immediately
13708 written. */
13709 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13710 GEN_INT (-sse_size), -1,
13711 m->fs.cfa_reg == stack_pointer_rtx);
13712 allocate -= sse_size;
13713 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13714 sse_registers_saved = true;
13715 }
13716
13717 /* The stack has already been decremented by the instruction calling us
13718 so probe if the size is non-negative to preserve the protection area. */
13719 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
13720 {
13721 /* We expect the registers to be saved when probes are used. */
13722 gcc_assert (int_registers_saved);
13723
13724 if (STACK_CHECK_MOVING_SP)
13725 {
13726 if (!(crtl->is_leaf && !cfun->calls_alloca
13727 && allocate <= PROBE_INTERVAL))
13728 {
13729 ix86_adjust_stack_and_probe (allocate);
13730 allocate = 0;
13731 }
13732 }
13733 else
13734 {
13735 HOST_WIDE_INT size = allocate;
13736
13737 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13738 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
13739
13740 if (TARGET_STACK_PROBE)
13741 {
13742 if (crtl->is_leaf && !cfun->calls_alloca)
13743 {
13744 if (size > PROBE_INTERVAL)
13745 ix86_emit_probe_stack_range (0, size);
13746 }
13747 else
13748 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
13749 }
13750 else
13751 {
13752 if (crtl->is_leaf && !cfun->calls_alloca)
13753 {
13754 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
13755 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
13756 size - STACK_CHECK_PROTECT);
13757 }
13758 else
13759 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
13760 }
13761 }
13762 }
13763
13764 if (allocate == 0)
13765 ;
13766 else if (!ix86_target_stack_probe ()
13767 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13768 {
13769 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13770 GEN_INT (-allocate), -1,
13771 m->fs.cfa_reg == stack_pointer_rtx);
13772 }
13773 else
13774 {
13775 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13776 rtx r10 = NULL;
13777 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13778 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13779 bool eax_live = ix86_eax_live_at_start_p ();
13780 bool r10_live = false;
13781
13782 if (TARGET_64BIT)
13783 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13784
13785 if (eax_live)
13786 {
13787 insn = emit_insn (gen_push (eax));
13788 allocate -= UNITS_PER_WORD;
13789 /* Note that SEH directives need to continue tracking the stack
13790 pointer even after the frame pointer has been set up. */
13791 if (sp_is_cfa_reg || TARGET_SEH)
13792 {
13793 if (sp_is_cfa_reg)
13794 m->fs.cfa_offset += UNITS_PER_WORD;
13795 RTX_FRAME_RELATED_P (insn) = 1;
13796 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13797 gen_rtx_SET (stack_pointer_rtx,
13798 plus_constant (Pmode, stack_pointer_rtx,
13799 -UNITS_PER_WORD)));
13800 }
13801 }
13802
13803 if (r10_live)
13804 {
13805 r10 = gen_rtx_REG (Pmode, R10_REG);
13806 insn = emit_insn (gen_push (r10));
13807 allocate -= UNITS_PER_WORD;
13808 if (sp_is_cfa_reg || TARGET_SEH)
13809 {
13810 if (sp_is_cfa_reg)
13811 m->fs.cfa_offset += UNITS_PER_WORD;
13812 RTX_FRAME_RELATED_P (insn) = 1;
13813 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13814 gen_rtx_SET (stack_pointer_rtx,
13815 plus_constant (Pmode, stack_pointer_rtx,
13816 -UNITS_PER_WORD)));
13817 }
13818 }
13819
13820 emit_move_insn (eax, GEN_INT (allocate));
13821 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13822
13823 /* Use the fact that AX still contains ALLOCATE. */
13824 adjust_stack_insn = (Pmode == DImode
13825 ? gen_pro_epilogue_adjust_stack_di_sub
13826 : gen_pro_epilogue_adjust_stack_si_sub);
13827
13828 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13829 stack_pointer_rtx, eax));
13830
13831 if (sp_is_cfa_reg || TARGET_SEH)
13832 {
13833 if (sp_is_cfa_reg)
13834 m->fs.cfa_offset += allocate;
13835 RTX_FRAME_RELATED_P (insn) = 1;
13836 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13837 gen_rtx_SET (stack_pointer_rtx,
13838 plus_constant (Pmode, stack_pointer_rtx,
13839 -allocate)));
13840 }
13841 m->fs.sp_offset += allocate;
13842
13843 /* Use stack_pointer_rtx for relative addressing so that code
13844 works for realigned stack, too. */
13845 if (r10_live && eax_live)
13846 {
13847 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13848 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13849 gen_frame_mem (word_mode, t));
13850 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13851 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13852 gen_frame_mem (word_mode, t));
13853 }
13854 else if (eax_live || r10_live)
13855 {
13856 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13857 emit_move_insn (gen_rtx_REG (word_mode,
13858 (eax_live ? AX_REG : R10_REG)),
13859 gen_frame_mem (word_mode, t));
13860 }
13861 }
13862 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13863
13864 /* If we havn't already set up the frame pointer, do so now. */
13865 if (frame_pointer_needed && !m->fs.fp_valid)
13866 {
13867 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13868 GEN_INT (frame.stack_pointer_offset
13869 - frame.hard_frame_pointer_offset));
13870 insn = emit_insn (insn);
13871 RTX_FRAME_RELATED_P (insn) = 1;
13872 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13873
13874 if (m->fs.cfa_reg == stack_pointer_rtx)
13875 m->fs.cfa_reg = hard_frame_pointer_rtx;
13876 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13877 m->fs.fp_valid = true;
13878 }
13879
13880 if (!int_registers_saved)
13881 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13882 if (!sse_registers_saved)
13883 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13884
13885 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13886 in PROLOGUE. */
13887 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13888 {
13889 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13890 insn = emit_insn (gen_set_got (pic));
13891 RTX_FRAME_RELATED_P (insn) = 1;
13892 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13893 emit_insn (gen_prologue_use (pic));
13894 /* Deleting already emmitted SET_GOT if exist and allocated to
13895 REAL_PIC_OFFSET_TABLE_REGNUM. */
13896 ix86_elim_entry_set_got (pic);
13897 }
13898
13899 if (crtl->drap_reg && !crtl->stack_realign_needed)
13900 {
13901 /* vDRAP is setup but after reload it turns out stack realign
13902 isn't necessary, here we will emit prologue to setup DRAP
13903 without stack realign adjustment */
13904 t = choose_baseaddr (0);
13905 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13906 }
13907
13908 /* Prevent instructions from being scheduled into register save push
13909 sequence when access to the redzone area is done through frame pointer.
13910 The offset between the frame pointer and the stack pointer is calculated
13911 relative to the value of the stack pointer at the end of the function
13912 prologue, and moving instructions that access redzone area via frame
13913 pointer inside push sequence violates this assumption. */
13914 if (frame_pointer_needed && frame.red_zone_size)
13915 emit_insn (gen_memory_blockage ());
13916
13917 /* SEH requires that the prologue end within 256 bytes of the start of
13918 the function. Prevent instruction schedules that would extend that.
13919 Further, prevent alloca modifications to the stack pointer from being
13920 combined with prologue modifications. */
13921 if (TARGET_SEH)
13922 emit_insn (gen_prologue_use (stack_pointer_rtx));
13923 }
13924
13925 /* Emit code to restore REG using a POP insn. */
13926
13927 static void
13928 ix86_emit_restore_reg_using_pop (rtx reg)
13929 {
13930 struct machine_function *m = cfun->machine;
13931 rtx_insn *insn = emit_insn (gen_pop (reg));
13932
13933 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13934 m->fs.sp_offset -= UNITS_PER_WORD;
13935
13936 if (m->fs.cfa_reg == crtl->drap_reg
13937 && REGNO (reg) == REGNO (crtl->drap_reg))
13938 {
13939 /* Previously we'd represented the CFA as an expression
13940 like *(%ebp - 8). We've just popped that value from
13941 the stack, which means we need to reset the CFA to
13942 the drap register. This will remain until we restore
13943 the stack pointer. */
13944 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13945 RTX_FRAME_RELATED_P (insn) = 1;
13946
13947 /* This means that the DRAP register is valid for addressing too. */
13948 m->fs.drap_valid = true;
13949 return;
13950 }
13951
13952 if (m->fs.cfa_reg == stack_pointer_rtx)
13953 {
13954 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13955 x = gen_rtx_SET (stack_pointer_rtx, x);
13956 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13957 RTX_FRAME_RELATED_P (insn) = 1;
13958
13959 m->fs.cfa_offset -= UNITS_PER_WORD;
13960 }
13961
13962 /* When the frame pointer is the CFA, and we pop it, we are
13963 swapping back to the stack pointer as the CFA. This happens
13964 for stack frames that don't allocate other data, so we assume
13965 the stack pointer is now pointing at the return address, i.e.
13966 the function entry state, which makes the offset be 1 word. */
13967 if (reg == hard_frame_pointer_rtx)
13968 {
13969 m->fs.fp_valid = false;
13970 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13971 {
13972 m->fs.cfa_reg = stack_pointer_rtx;
13973 m->fs.cfa_offset -= UNITS_PER_WORD;
13974
13975 add_reg_note (insn, REG_CFA_DEF_CFA,
13976 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13977 GEN_INT (m->fs.cfa_offset)));
13978 RTX_FRAME_RELATED_P (insn) = 1;
13979 }
13980 }
13981 }
13982
13983 /* Emit code to restore saved registers using POP insns. */
13984
13985 static void
13986 ix86_emit_restore_regs_using_pop (void)
13987 {
13988 unsigned int regno;
13989
13990 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13991 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13992 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13993 }
13994
13995 /* Emit code and notes for the LEAVE instruction. */
13996
13997 static void
13998 ix86_emit_leave (void)
13999 {
14000 struct machine_function *m = cfun->machine;
14001 rtx_insn *insn = emit_insn (ix86_gen_leave ());
14002
14003 ix86_add_queued_cfa_restore_notes (insn);
14004
14005 gcc_assert (m->fs.fp_valid);
14006 m->fs.sp_valid = true;
14007 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
14008 m->fs.fp_valid = false;
14009
14010 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14011 {
14012 m->fs.cfa_reg = stack_pointer_rtx;
14013 m->fs.cfa_offset = m->fs.sp_offset;
14014
14015 add_reg_note (insn, REG_CFA_DEF_CFA,
14016 plus_constant (Pmode, stack_pointer_rtx,
14017 m->fs.sp_offset));
14018 RTX_FRAME_RELATED_P (insn) = 1;
14019 }
14020 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
14021 m->fs.fp_offset);
14022 }
14023
14024 /* Emit code to restore saved registers using MOV insns.
14025 First register is restored from CFA - CFA_OFFSET. */
14026 static void
14027 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
14028 bool maybe_eh_return)
14029 {
14030 struct machine_function *m = cfun->machine;
14031 unsigned int regno;
14032
14033 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14034 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
14035 {
14036 rtx reg = gen_rtx_REG (word_mode, regno);
14037 rtx mem;
14038 rtx_insn *insn;
14039
14040 mem = choose_baseaddr (cfa_offset);
14041 mem = gen_frame_mem (word_mode, mem);
14042 insn = emit_move_insn (reg, mem);
14043
14044 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
14045 {
14046 /* Previously we'd represented the CFA as an expression
14047 like *(%ebp - 8). We've just popped that value from
14048 the stack, which means we need to reset the CFA to
14049 the drap register. This will remain until we restore
14050 the stack pointer. */
14051 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14052 RTX_FRAME_RELATED_P (insn) = 1;
14053
14054 /* This means that the DRAP register is valid for addressing. */
14055 m->fs.drap_valid = true;
14056 }
14057 else
14058 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14059
14060 cfa_offset -= UNITS_PER_WORD;
14061 }
14062 }
14063
14064 /* Emit code to restore saved registers using MOV insns.
14065 First register is restored from CFA - CFA_OFFSET. */
14066 static void
14067 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
14068 bool maybe_eh_return)
14069 {
14070 unsigned int regno;
14071
14072 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14073 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
14074 {
14075 rtx reg = gen_rtx_REG (V4SFmode, regno);
14076 rtx mem;
14077 unsigned int align;
14078
14079 mem = choose_baseaddr (cfa_offset);
14080 mem = gen_rtx_MEM (V4SFmode, mem);
14081
14082 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
14083 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
14084 set_mem_align (mem, align);
14085 emit_insn (gen_rtx_SET (reg, mem));
14086
14087 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14088
14089 cfa_offset -= GET_MODE_SIZE (V4SFmode);
14090 }
14091 }
14092
14093 /* Restore function stack, frame, and registers. */
14094
14095 void
14096 ix86_expand_epilogue (int style)
14097 {
14098 struct machine_function *m = cfun->machine;
14099 struct machine_frame_state frame_state_save = m->fs;
14100 struct ix86_frame frame;
14101 bool restore_regs_via_mov;
14102 bool using_drap;
14103
14104 ix86_finalize_stack_realign_flags ();
14105 ix86_compute_frame_layout (&frame);
14106
14107 m->fs.sp_valid = (!frame_pointer_needed
14108 || (crtl->sp_is_unchanging
14109 && !stack_realign_fp));
14110 gcc_assert (!m->fs.sp_valid
14111 || m->fs.sp_offset == frame.stack_pointer_offset);
14112
14113 /* The FP must be valid if the frame pointer is present. */
14114 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
14115 gcc_assert (!m->fs.fp_valid
14116 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
14117
14118 /* We must have *some* valid pointer to the stack frame. */
14119 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
14120
14121 /* The DRAP is never valid at this point. */
14122 gcc_assert (!m->fs.drap_valid);
14123
14124 /* See the comment about red zone and frame
14125 pointer usage in ix86_expand_prologue. */
14126 if (frame_pointer_needed && frame.red_zone_size)
14127 emit_insn (gen_memory_blockage ());
14128
14129 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
14130 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
14131
14132 /* Determine the CFA offset of the end of the red-zone. */
14133 m->fs.red_zone_offset = 0;
14134 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
14135 {
14136 /* The red-zone begins below the return address. */
14137 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
14138
14139 /* When the register save area is in the aligned portion of
14140 the stack, determine the maximum runtime displacement that
14141 matches up with the aligned frame. */
14142 if (stack_realign_drap)
14143 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
14144 + UNITS_PER_WORD);
14145 }
14146
14147 /* Special care must be taken for the normal return case of a function
14148 using eh_return: the eax and edx registers are marked as saved, but
14149 not restored along this path. Adjust the save location to match. */
14150 if (crtl->calls_eh_return && style != 2)
14151 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
14152
14153 /* EH_RETURN requires the use of moves to function properly. */
14154 if (crtl->calls_eh_return)
14155 restore_regs_via_mov = true;
14156 /* SEH requires the use of pops to identify the epilogue. */
14157 else if (TARGET_SEH)
14158 restore_regs_via_mov = false;
14159 /* If we're only restoring one register and sp is not valid then
14160 using a move instruction to restore the register since it's
14161 less work than reloading sp and popping the register. */
14162 else if (!m->fs.sp_valid && frame.nregs <= 1)
14163 restore_regs_via_mov = true;
14164 else if (TARGET_EPILOGUE_USING_MOVE
14165 && cfun->machine->use_fast_prologue_epilogue
14166 && (frame.nregs > 1
14167 || m->fs.sp_offset != frame.reg_save_offset))
14168 restore_regs_via_mov = true;
14169 else if (frame_pointer_needed
14170 && !frame.nregs
14171 && m->fs.sp_offset != frame.reg_save_offset)
14172 restore_regs_via_mov = true;
14173 else if (frame_pointer_needed
14174 && TARGET_USE_LEAVE
14175 && cfun->machine->use_fast_prologue_epilogue
14176 && frame.nregs == 1)
14177 restore_regs_via_mov = true;
14178 else
14179 restore_regs_via_mov = false;
14180
14181 if (restore_regs_via_mov || frame.nsseregs)
14182 {
14183 /* Ensure that the entire register save area is addressable via
14184 the stack pointer, if we will restore via sp. */
14185 if (TARGET_64BIT
14186 && m->fs.sp_offset > 0x7fffffff
14187 && !(m->fs.fp_valid || m->fs.drap_valid)
14188 && (frame.nsseregs + frame.nregs) != 0)
14189 {
14190 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14191 GEN_INT (m->fs.sp_offset
14192 - frame.sse_reg_save_offset),
14193 style,
14194 m->fs.cfa_reg == stack_pointer_rtx);
14195 }
14196 }
14197
14198 /* If there are any SSE registers to restore, then we have to do it
14199 via moves, since there's obviously no pop for SSE regs. */
14200 if (frame.nsseregs)
14201 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
14202 style == 2);
14203
14204 if (restore_regs_via_mov)
14205 {
14206 rtx t;
14207
14208 if (frame.nregs)
14209 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
14210
14211 /* eh_return epilogues need %ecx added to the stack pointer. */
14212 if (style == 2)
14213 {
14214 rtx sa = EH_RETURN_STACKADJ_RTX;
14215 rtx_insn *insn;
14216
14217 /* %ecx can't be used for both DRAP register and eh_return. */
14218 if (crtl->drap_reg)
14219 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
14220
14221 /* regparm nested functions don't work with eh_return. */
14222 gcc_assert (!ix86_static_chain_on_stack);
14223
14224 if (frame_pointer_needed)
14225 {
14226 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
14227 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
14228 emit_insn (gen_rtx_SET (sa, t));
14229
14230 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
14231 insn = emit_move_insn (hard_frame_pointer_rtx, t);
14232
14233 /* Note that we use SA as a temporary CFA, as the return
14234 address is at the proper place relative to it. We
14235 pretend this happens at the FP restore insn because
14236 prior to this insn the FP would be stored at the wrong
14237 offset relative to SA, and after this insn we have no
14238 other reasonable register to use for the CFA. We don't
14239 bother resetting the CFA to the SP for the duration of
14240 the return insn. */
14241 add_reg_note (insn, REG_CFA_DEF_CFA,
14242 plus_constant (Pmode, sa, UNITS_PER_WORD));
14243 ix86_add_queued_cfa_restore_notes (insn);
14244 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
14245 RTX_FRAME_RELATED_P (insn) = 1;
14246
14247 m->fs.cfa_reg = sa;
14248 m->fs.cfa_offset = UNITS_PER_WORD;
14249 m->fs.fp_valid = false;
14250
14251 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
14252 const0_rtx, style, false);
14253 }
14254 else
14255 {
14256 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
14257 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
14258 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
14259 ix86_add_queued_cfa_restore_notes (insn);
14260
14261 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14262 if (m->fs.cfa_offset != UNITS_PER_WORD)
14263 {
14264 m->fs.cfa_offset = UNITS_PER_WORD;
14265 add_reg_note (insn, REG_CFA_DEF_CFA,
14266 plus_constant (Pmode, stack_pointer_rtx,
14267 UNITS_PER_WORD));
14268 RTX_FRAME_RELATED_P (insn) = 1;
14269 }
14270 }
14271 m->fs.sp_offset = UNITS_PER_WORD;
14272 m->fs.sp_valid = true;
14273 }
14274 }
14275 else
14276 {
14277 /* SEH requires that the function end with (1) a stack adjustment
14278 if necessary, (2) a sequence of pops, and (3) a return or
14279 jump instruction. Prevent insns from the function body from
14280 being scheduled into this sequence. */
14281 if (TARGET_SEH)
14282 {
14283 /* Prevent a catch region from being adjacent to the standard
14284 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
14285 several other flags that would be interesting to test are
14286 not yet set up. */
14287 if (flag_non_call_exceptions)
14288 emit_insn (gen_nops (const1_rtx));
14289 else
14290 emit_insn (gen_blockage ());
14291 }
14292
14293 /* First step is to deallocate the stack frame so that we can
14294 pop the registers. Also do it on SEH target for very large
14295 frame as the emitted instructions aren't allowed by the ABI in
14296 epilogues. */
14297 if (!m->fs.sp_valid
14298 || (TARGET_SEH
14299 && (m->fs.sp_offset - frame.reg_save_offset
14300 >= SEH_MAX_FRAME_SIZE)))
14301 {
14302 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14303 GEN_INT (m->fs.fp_offset
14304 - frame.reg_save_offset),
14305 style, false);
14306 }
14307 else if (m->fs.sp_offset != frame.reg_save_offset)
14308 {
14309 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14310 GEN_INT (m->fs.sp_offset
14311 - frame.reg_save_offset),
14312 style,
14313 m->fs.cfa_reg == stack_pointer_rtx);
14314 }
14315
14316 ix86_emit_restore_regs_using_pop ();
14317 }
14318
14319 /* If we used a stack pointer and haven't already got rid of it,
14320 then do so now. */
14321 if (m->fs.fp_valid)
14322 {
14323 /* If the stack pointer is valid and pointing at the frame
14324 pointer store address, then we only need a pop. */
14325 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
14326 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14327 /* Leave results in shorter dependency chains on CPUs that are
14328 able to grok it fast. */
14329 else if (TARGET_USE_LEAVE
14330 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14331 || !cfun->machine->use_fast_prologue_epilogue)
14332 ix86_emit_leave ();
14333 else
14334 {
14335 pro_epilogue_adjust_stack (stack_pointer_rtx,
14336 hard_frame_pointer_rtx,
14337 const0_rtx, style, !using_drap);
14338 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14339 }
14340 }
14341
14342 if (using_drap)
14343 {
14344 int param_ptr_offset = UNITS_PER_WORD;
14345 rtx_insn *insn;
14346
14347 gcc_assert (stack_realign_drap);
14348
14349 if (ix86_static_chain_on_stack)
14350 param_ptr_offset += UNITS_PER_WORD;
14351 if (!call_used_regs[REGNO (crtl->drap_reg)])
14352 param_ptr_offset += UNITS_PER_WORD;
14353
14354 insn = emit_insn (gen_rtx_SET
14355 (stack_pointer_rtx,
14356 gen_rtx_PLUS (Pmode,
14357 crtl->drap_reg,
14358 GEN_INT (-param_ptr_offset))));
14359 m->fs.cfa_reg = stack_pointer_rtx;
14360 m->fs.cfa_offset = param_ptr_offset;
14361 m->fs.sp_offset = param_ptr_offset;
14362 m->fs.realigned = false;
14363
14364 add_reg_note (insn, REG_CFA_DEF_CFA,
14365 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14366 GEN_INT (param_ptr_offset)));
14367 RTX_FRAME_RELATED_P (insn) = 1;
14368
14369 if (!call_used_regs[REGNO (crtl->drap_reg)])
14370 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14371 }
14372
14373 /* At this point the stack pointer must be valid, and we must have
14374 restored all of the registers. We may not have deallocated the
14375 entire stack frame. We've delayed this until now because it may
14376 be possible to merge the local stack deallocation with the
14377 deallocation forced by ix86_static_chain_on_stack. */
14378 gcc_assert (m->fs.sp_valid);
14379 gcc_assert (!m->fs.fp_valid);
14380 gcc_assert (!m->fs.realigned);
14381 if (m->fs.sp_offset != UNITS_PER_WORD)
14382 {
14383 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14384 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14385 style, true);
14386 }
14387 else
14388 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14389
14390 /* Sibcall epilogues don't want a return instruction. */
14391 if (style == 0)
14392 {
14393 m->fs = frame_state_save;
14394 return;
14395 }
14396
14397 if (cfun->machine->func_type != TYPE_NORMAL)
14398 {
14399 /* Return with the "IRET" instruction from interrupt handler.
14400 Pop the 'ERROR_CODE' off the stack before the 'IRET'
14401 instruction in exception handler. */
14402 if (cfun->machine->func_type == TYPE_EXCEPTION)
14403 {
14404 rtx r = plus_constant (Pmode, stack_pointer_rtx,
14405 UNITS_PER_WORD);
14406 emit_insn (gen_rtx_SET (stack_pointer_rtx, r));
14407 }
14408 emit_jump_insn (gen_interrupt_return ());
14409 }
14410 else if (crtl->args.pops_args && crtl->args.size)
14411 {
14412 rtx popc = GEN_INT (crtl->args.pops_args);
14413
14414 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14415 address, do explicit add, and jump indirectly to the caller. */
14416
14417 if (crtl->args.pops_args >= 65536)
14418 {
14419 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14420 rtx_insn *insn;
14421
14422 /* There is no "pascal" calling convention in any 64bit ABI. */
14423 gcc_assert (!TARGET_64BIT);
14424
14425 insn = emit_insn (gen_pop (ecx));
14426 m->fs.cfa_offset -= UNITS_PER_WORD;
14427 m->fs.sp_offset -= UNITS_PER_WORD;
14428
14429 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14430 x = gen_rtx_SET (stack_pointer_rtx, x);
14431 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14432 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14433 RTX_FRAME_RELATED_P (insn) = 1;
14434
14435 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14436 popc, -1, true);
14437 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14438 }
14439 else
14440 emit_jump_insn (gen_simple_return_pop_internal (popc));
14441 }
14442 else
14443 emit_jump_insn (gen_simple_return_internal ());
14444
14445 /* Restore the state back to the state from the prologue,
14446 so that it's correct for the next epilogue. */
14447 m->fs = frame_state_save;
14448 }
14449
14450 /* Reset from the function's potential modifications. */
14451
14452 static void
14453 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
14454 {
14455 if (pic_offset_table_rtx
14456 && !ix86_use_pseudo_pic_reg ())
14457 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14458 #if TARGET_MACHO
14459 /* Mach-O doesn't support labels at the end of objects, so if
14460 it looks like we might want one, insert a NOP. */
14461 {
14462 rtx_insn *insn = get_last_insn ();
14463 rtx_insn *deleted_debug_label = NULL;
14464 while (insn
14465 && NOTE_P (insn)
14466 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14467 {
14468 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14469 notes only, instead set their CODE_LABEL_NUMBER to -1,
14470 otherwise there would be code generation differences
14471 in between -g and -g0. */
14472 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14473 deleted_debug_label = insn;
14474 insn = PREV_INSN (insn);
14475 }
14476 if (insn
14477 && (LABEL_P (insn)
14478 || (NOTE_P (insn)
14479 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
14480 fputs ("\tnop\n", file);
14481 else if (deleted_debug_label)
14482 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14483 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14484 CODE_LABEL_NUMBER (insn) = -1;
14485 }
14486 #endif
14487
14488 }
14489
14490 /* Return a scratch register to use in the split stack prologue. The
14491 split stack prologue is used for -fsplit-stack. It is the first
14492 instructions in the function, even before the regular prologue.
14493 The scratch register can be any caller-saved register which is not
14494 used for parameters or for the static chain. */
14495
14496 static unsigned int
14497 split_stack_prologue_scratch_regno (void)
14498 {
14499 if (TARGET_64BIT)
14500 return R11_REG;
14501 else
14502 {
14503 bool is_fastcall, is_thiscall;
14504 int regparm;
14505
14506 is_fastcall = (lookup_attribute ("fastcall",
14507 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14508 != NULL);
14509 is_thiscall = (lookup_attribute ("thiscall",
14510 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14511 != NULL);
14512 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14513
14514 if (is_fastcall)
14515 {
14516 if (DECL_STATIC_CHAIN (cfun->decl))
14517 {
14518 sorry ("-fsplit-stack does not support fastcall with "
14519 "nested function");
14520 return INVALID_REGNUM;
14521 }
14522 return AX_REG;
14523 }
14524 else if (is_thiscall)
14525 {
14526 if (!DECL_STATIC_CHAIN (cfun->decl))
14527 return DX_REG;
14528 return AX_REG;
14529 }
14530 else if (regparm < 3)
14531 {
14532 if (!DECL_STATIC_CHAIN (cfun->decl))
14533 return CX_REG;
14534 else
14535 {
14536 if (regparm >= 2)
14537 {
14538 sorry ("-fsplit-stack does not support 2 register "
14539 "parameters for a nested function");
14540 return INVALID_REGNUM;
14541 }
14542 return DX_REG;
14543 }
14544 }
14545 else
14546 {
14547 /* FIXME: We could make this work by pushing a register
14548 around the addition and comparison. */
14549 sorry ("-fsplit-stack does not support 3 register parameters");
14550 return INVALID_REGNUM;
14551 }
14552 }
14553 }
14554
14555 /* A SYMBOL_REF for the function which allocates new stackspace for
14556 -fsplit-stack. */
14557
14558 static GTY(()) rtx split_stack_fn;
14559
14560 /* A SYMBOL_REF for the more stack function when using the large
14561 model. */
14562
14563 static GTY(()) rtx split_stack_fn_large;
14564
14565 /* Handle -fsplit-stack. These are the first instructions in the
14566 function, even before the regular prologue. */
14567
14568 void
14569 ix86_expand_split_stack_prologue (void)
14570 {
14571 struct ix86_frame frame;
14572 HOST_WIDE_INT allocate;
14573 unsigned HOST_WIDE_INT args_size;
14574 rtx_code_label *label;
14575 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
14576 rtx scratch_reg = NULL_RTX;
14577 rtx_code_label *varargs_label = NULL;
14578 rtx fn;
14579
14580 gcc_assert (flag_split_stack && reload_completed);
14581
14582 ix86_finalize_stack_realign_flags ();
14583 ix86_compute_frame_layout (&frame);
14584 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
14585
14586 /* This is the label we will branch to if we have enough stack
14587 space. We expect the basic block reordering pass to reverse this
14588 branch if optimizing, so that we branch in the unlikely case. */
14589 label = gen_label_rtx ();
14590
14591 /* We need to compare the stack pointer minus the frame size with
14592 the stack boundary in the TCB. The stack boundary always gives
14593 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14594 can compare directly. Otherwise we need to do an addition. */
14595
14596 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14597 UNSPEC_STACK_CHECK);
14598 limit = gen_rtx_CONST (Pmode, limit);
14599 limit = gen_rtx_MEM (Pmode, limit);
14600 if (allocate < SPLIT_STACK_AVAILABLE)
14601 current = stack_pointer_rtx;
14602 else
14603 {
14604 unsigned int scratch_regno;
14605 rtx offset;
14606
14607 /* We need a scratch register to hold the stack pointer minus
14608 the required frame size. Since this is the very start of the
14609 function, the scratch register can be any caller-saved
14610 register which is not used for parameters. */
14611 offset = GEN_INT (- allocate);
14612 scratch_regno = split_stack_prologue_scratch_regno ();
14613 if (scratch_regno == INVALID_REGNUM)
14614 return;
14615 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14616 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
14617 {
14618 /* We don't use ix86_gen_add3 in this case because it will
14619 want to split to lea, but when not optimizing the insn
14620 will not be split after this point. */
14621 emit_insn (gen_rtx_SET (scratch_reg,
14622 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14623 offset)));
14624 }
14625 else
14626 {
14627 emit_move_insn (scratch_reg, offset);
14628 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
14629 stack_pointer_rtx));
14630 }
14631 current = scratch_reg;
14632 }
14633
14634 ix86_expand_branch (GEU, current, limit, label);
14635 jump_insn = get_last_insn ();
14636 JUMP_LABEL (jump_insn) = label;
14637
14638 /* Mark the jump as very likely to be taken. */
14639 add_int_reg_note (jump_insn, REG_BR_PROB,
14640 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
14641
14642 if (split_stack_fn == NULL_RTX)
14643 {
14644 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
14645 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
14646 }
14647 fn = split_stack_fn;
14648
14649 /* Get more stack space. We pass in the desired stack space and the
14650 size of the arguments to copy to the new stack. In 32-bit mode
14651 we push the parameters; __morestack will return on a new stack
14652 anyhow. In 64-bit mode we pass the parameters in r10 and
14653 r11. */
14654 allocate_rtx = GEN_INT (allocate);
14655 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
14656 call_fusage = NULL_RTX;
14657 if (TARGET_64BIT)
14658 {
14659 rtx reg10, reg11;
14660
14661 reg10 = gen_rtx_REG (Pmode, R10_REG);
14662 reg11 = gen_rtx_REG (Pmode, R11_REG);
14663
14664 /* If this function uses a static chain, it will be in %r10.
14665 Preserve it across the call to __morestack. */
14666 if (DECL_STATIC_CHAIN (cfun->decl))
14667 {
14668 rtx rax;
14669
14670 rax = gen_rtx_REG (word_mode, AX_REG);
14671 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
14672 use_reg (&call_fusage, rax);
14673 }
14674
14675 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
14676 && !TARGET_PECOFF)
14677 {
14678 HOST_WIDE_INT argval;
14679
14680 gcc_assert (Pmode == DImode);
14681 /* When using the large model we need to load the address
14682 into a register, and we've run out of registers. So we
14683 switch to a different calling convention, and we call a
14684 different function: __morestack_large. We pass the
14685 argument size in the upper 32 bits of r10 and pass the
14686 frame size in the lower 32 bits. */
14687 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
14688 gcc_assert ((args_size & 0xffffffff) == args_size);
14689
14690 if (split_stack_fn_large == NULL_RTX)
14691 {
14692 split_stack_fn_large =
14693 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
14694 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
14695 }
14696 if (ix86_cmodel == CM_LARGE_PIC)
14697 {
14698 rtx_code_label *label;
14699 rtx x;
14700
14701 label = gen_label_rtx ();
14702 emit_label (label);
14703 LABEL_PRESERVE_P (label) = 1;
14704 emit_insn (gen_set_rip_rex64 (reg10, label));
14705 emit_insn (gen_set_got_offset_rex64 (reg11, label));
14706 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
14707 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
14708 UNSPEC_GOT);
14709 x = gen_rtx_CONST (Pmode, x);
14710 emit_move_insn (reg11, x);
14711 x = gen_rtx_PLUS (Pmode, reg10, reg11);
14712 x = gen_const_mem (Pmode, x);
14713 emit_move_insn (reg11, x);
14714 }
14715 else
14716 emit_move_insn (reg11, split_stack_fn_large);
14717
14718 fn = reg11;
14719
14720 argval = ((args_size << 16) << 16) + allocate;
14721 emit_move_insn (reg10, GEN_INT (argval));
14722 }
14723 else
14724 {
14725 emit_move_insn (reg10, allocate_rtx);
14726 emit_move_insn (reg11, GEN_INT (args_size));
14727 use_reg (&call_fusage, reg11);
14728 }
14729
14730 use_reg (&call_fusage, reg10);
14731 }
14732 else
14733 {
14734 emit_insn (gen_push (GEN_INT (args_size)));
14735 emit_insn (gen_push (allocate_rtx));
14736 }
14737 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
14738 GEN_INT (UNITS_PER_WORD), constm1_rtx,
14739 NULL_RTX, false);
14740 add_function_usage_to (call_insn, call_fusage);
14741
14742 /* In order to make call/return prediction work right, we now need
14743 to execute a return instruction. See
14744 libgcc/config/i386/morestack.S for the details on how this works.
14745
14746 For flow purposes gcc must not see this as a return
14747 instruction--we need control flow to continue at the subsequent
14748 label. Therefore, we use an unspec. */
14749 gcc_assert (crtl->args.pops_args < 65536);
14750 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
14751
14752 /* If we are in 64-bit mode and this function uses a static chain,
14753 we saved %r10 in %rax before calling _morestack. */
14754 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
14755 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14756 gen_rtx_REG (word_mode, AX_REG));
14757
14758 /* If this function calls va_start, we need to store a pointer to
14759 the arguments on the old stack, because they may not have been
14760 all copied to the new stack. At this point the old stack can be
14761 found at the frame pointer value used by __morestack, because
14762 __morestack has set that up before calling back to us. Here we
14763 store that pointer in a scratch register, and in
14764 ix86_expand_prologue we store the scratch register in a stack
14765 slot. */
14766 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14767 {
14768 unsigned int scratch_regno;
14769 rtx frame_reg;
14770 int words;
14771
14772 scratch_regno = split_stack_prologue_scratch_regno ();
14773 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14774 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14775
14776 /* 64-bit:
14777 fp -> old fp value
14778 return address within this function
14779 return address of caller of this function
14780 stack arguments
14781 So we add three words to get to the stack arguments.
14782
14783 32-bit:
14784 fp -> old fp value
14785 return address within this function
14786 first argument to __morestack
14787 second argument to __morestack
14788 return address of caller of this function
14789 stack arguments
14790 So we add five words to get to the stack arguments.
14791 */
14792 words = TARGET_64BIT ? 3 : 5;
14793 emit_insn (gen_rtx_SET (scratch_reg,
14794 gen_rtx_PLUS (Pmode, frame_reg,
14795 GEN_INT (words * UNITS_PER_WORD))));
14796
14797 varargs_label = gen_label_rtx ();
14798 emit_jump_insn (gen_jump (varargs_label));
14799 JUMP_LABEL (get_last_insn ()) = varargs_label;
14800
14801 emit_barrier ();
14802 }
14803
14804 emit_label (label);
14805 LABEL_NUSES (label) = 1;
14806
14807 /* If this function calls va_start, we now have to set the scratch
14808 register for the case where we do not call __morestack. In this
14809 case we need to set it based on the stack pointer. */
14810 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14811 {
14812 emit_insn (gen_rtx_SET (scratch_reg,
14813 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14814 GEN_INT (UNITS_PER_WORD))));
14815
14816 emit_label (varargs_label);
14817 LABEL_NUSES (varargs_label) = 1;
14818 }
14819 }
14820
14821 /* We may have to tell the dataflow pass that the split stack prologue
14822 is initializing a scratch register. */
14823
14824 static void
14825 ix86_live_on_entry (bitmap regs)
14826 {
14827 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14828 {
14829 gcc_assert (flag_split_stack);
14830 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14831 }
14832 }
14833 \f
14834 /* Extract the parts of an RTL expression that is a valid memory address
14835 for an instruction. Return 0 if the structure of the address is
14836 grossly off. Return -1 if the address contains ASHIFT, so it is not
14837 strictly valid, but still used for computing length of lea instruction. */
14838
14839 int
14840 ix86_decompose_address (rtx addr, struct ix86_address *out)
14841 {
14842 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14843 rtx base_reg, index_reg;
14844 HOST_WIDE_INT scale = 1;
14845 rtx scale_rtx = NULL_RTX;
14846 rtx tmp;
14847 int retval = 1;
14848 addr_space_t seg = ADDR_SPACE_GENERIC;
14849
14850 /* Allow zero-extended SImode addresses,
14851 they will be emitted with addr32 prefix. */
14852 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14853 {
14854 if (GET_CODE (addr) == ZERO_EXTEND
14855 && GET_MODE (XEXP (addr, 0)) == SImode)
14856 {
14857 addr = XEXP (addr, 0);
14858 if (CONST_INT_P (addr))
14859 return 0;
14860 }
14861 else if (GET_CODE (addr) == AND
14862 && const_32bit_mask (XEXP (addr, 1), DImode))
14863 {
14864 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
14865 if (addr == NULL_RTX)
14866 return 0;
14867
14868 if (CONST_INT_P (addr))
14869 return 0;
14870 }
14871 }
14872
14873 /* Allow SImode subregs of DImode addresses,
14874 they will be emitted with addr32 prefix. */
14875 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14876 {
14877 if (SUBREG_P (addr)
14878 && GET_MODE (SUBREG_REG (addr)) == DImode)
14879 {
14880 addr = SUBREG_REG (addr);
14881 if (CONST_INT_P (addr))
14882 return 0;
14883 }
14884 }
14885
14886 if (REG_P (addr))
14887 base = addr;
14888 else if (SUBREG_P (addr))
14889 {
14890 if (REG_P (SUBREG_REG (addr)))
14891 base = addr;
14892 else
14893 return 0;
14894 }
14895 else if (GET_CODE (addr) == PLUS)
14896 {
14897 rtx addends[4], op;
14898 int n = 0, i;
14899
14900 op = addr;
14901 do
14902 {
14903 if (n >= 4)
14904 return 0;
14905 addends[n++] = XEXP (op, 1);
14906 op = XEXP (op, 0);
14907 }
14908 while (GET_CODE (op) == PLUS);
14909 if (n >= 4)
14910 return 0;
14911 addends[n] = op;
14912
14913 for (i = n; i >= 0; --i)
14914 {
14915 op = addends[i];
14916 switch (GET_CODE (op))
14917 {
14918 case MULT:
14919 if (index)
14920 return 0;
14921 index = XEXP (op, 0);
14922 scale_rtx = XEXP (op, 1);
14923 break;
14924
14925 case ASHIFT:
14926 if (index)
14927 return 0;
14928 index = XEXP (op, 0);
14929 tmp = XEXP (op, 1);
14930 if (!CONST_INT_P (tmp))
14931 return 0;
14932 scale = INTVAL (tmp);
14933 if ((unsigned HOST_WIDE_INT) scale > 3)
14934 return 0;
14935 scale = 1 << scale;
14936 break;
14937
14938 case ZERO_EXTEND:
14939 op = XEXP (op, 0);
14940 if (GET_CODE (op) != UNSPEC)
14941 return 0;
14942 /* FALLTHRU */
14943
14944 case UNSPEC:
14945 if (XINT (op, 1) == UNSPEC_TP
14946 && TARGET_TLS_DIRECT_SEG_REFS
14947 && seg == ADDR_SPACE_GENERIC)
14948 seg = DEFAULT_TLS_SEG_REG;
14949 else
14950 return 0;
14951 break;
14952
14953 case SUBREG:
14954 if (!REG_P (SUBREG_REG (op)))
14955 return 0;
14956 /* FALLTHRU */
14957
14958 case REG:
14959 if (!base)
14960 base = op;
14961 else if (!index)
14962 index = op;
14963 else
14964 return 0;
14965 break;
14966
14967 case CONST:
14968 case CONST_INT:
14969 case SYMBOL_REF:
14970 case LABEL_REF:
14971 if (disp)
14972 return 0;
14973 disp = op;
14974 break;
14975
14976 default:
14977 return 0;
14978 }
14979 }
14980 }
14981 else if (GET_CODE (addr) == MULT)
14982 {
14983 index = XEXP (addr, 0); /* index*scale */
14984 scale_rtx = XEXP (addr, 1);
14985 }
14986 else if (GET_CODE (addr) == ASHIFT)
14987 {
14988 /* We're called for lea too, which implements ashift on occasion. */
14989 index = XEXP (addr, 0);
14990 tmp = XEXP (addr, 1);
14991 if (!CONST_INT_P (tmp))
14992 return 0;
14993 scale = INTVAL (tmp);
14994 if ((unsigned HOST_WIDE_INT) scale > 3)
14995 return 0;
14996 scale = 1 << scale;
14997 retval = -1;
14998 }
14999 else
15000 disp = addr; /* displacement */
15001
15002 if (index)
15003 {
15004 if (REG_P (index))
15005 ;
15006 else if (SUBREG_P (index)
15007 && REG_P (SUBREG_REG (index)))
15008 ;
15009 else
15010 return 0;
15011 }
15012
15013 /* Extract the integral value of scale. */
15014 if (scale_rtx)
15015 {
15016 if (!CONST_INT_P (scale_rtx))
15017 return 0;
15018 scale = INTVAL (scale_rtx);
15019 }
15020
15021 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
15022 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
15023
15024 /* Avoid useless 0 displacement. */
15025 if (disp == const0_rtx && (base || index))
15026 disp = NULL_RTX;
15027
15028 /* Allow arg pointer and stack pointer as index if there is not scaling. */
15029 if (base_reg && index_reg && scale == 1
15030 && (index_reg == arg_pointer_rtx
15031 || index_reg == frame_pointer_rtx
15032 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
15033 {
15034 std::swap (base, index);
15035 std::swap (base_reg, index_reg);
15036 }
15037
15038 /* Special case: %ebp cannot be encoded as a base without a displacement.
15039 Similarly %r13. */
15040 if (!disp
15041 && base_reg
15042 && (base_reg == hard_frame_pointer_rtx
15043 || base_reg == frame_pointer_rtx
15044 || base_reg == arg_pointer_rtx
15045 || (REG_P (base_reg)
15046 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
15047 || REGNO (base_reg) == R13_REG))))
15048 disp = const0_rtx;
15049
15050 /* Special case: on K6, [%esi] makes the instruction vector decoded.
15051 Avoid this by transforming to [%esi+0].
15052 Reload calls address legitimization without cfun defined, so we need
15053 to test cfun for being non-NULL. */
15054 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
15055 && base_reg && !index_reg && !disp
15056 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
15057 disp = const0_rtx;
15058
15059 /* Special case: encode reg+reg instead of reg*2. */
15060 if (!base && index && scale == 2)
15061 base = index, base_reg = index_reg, scale = 1;
15062
15063 /* Special case: scaling cannot be encoded without base or displacement. */
15064 if (!base && !disp && index && scale != 1)
15065 disp = const0_rtx;
15066
15067 out->base = base;
15068 out->index = index;
15069 out->disp = disp;
15070 out->scale = scale;
15071 out->seg = seg;
15072
15073 return retval;
15074 }
15075 \f
15076 /* Return cost of the memory address x.
15077 For i386, it is better to use a complex address than let gcc copy
15078 the address into a reg and make a new pseudo. But not if the address
15079 requires to two regs - that would mean more pseudos with longer
15080 lifetimes. */
15081 static int
15082 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
15083 {
15084 struct ix86_address parts;
15085 int cost = 1;
15086 int ok = ix86_decompose_address (x, &parts);
15087
15088 gcc_assert (ok);
15089
15090 if (parts.base && SUBREG_P (parts.base))
15091 parts.base = SUBREG_REG (parts.base);
15092 if (parts.index && SUBREG_P (parts.index))
15093 parts.index = SUBREG_REG (parts.index);
15094
15095 /* Attempt to minimize number of registers in the address by increasing
15096 address cost for each used register. We don't increase address cost
15097 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
15098 is not invariant itself it most likely means that base or index is not
15099 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
15100 which is not profitable for x86. */
15101 if (parts.base
15102 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
15103 && (current_pass->type == GIMPLE_PASS
15104 || !pic_offset_table_rtx
15105 || !REG_P (parts.base)
15106 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
15107 cost++;
15108
15109 if (parts.index
15110 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
15111 && (current_pass->type == GIMPLE_PASS
15112 || !pic_offset_table_rtx
15113 || !REG_P (parts.index)
15114 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
15115 cost++;
15116
15117 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
15118 since it's predecode logic can't detect the length of instructions
15119 and it degenerates to vector decoded. Increase cost of such
15120 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
15121 to split such addresses or even refuse such addresses at all.
15122
15123 Following addressing modes are affected:
15124 [base+scale*index]
15125 [scale*index+disp]
15126 [base+index]
15127
15128 The first and last case may be avoidable by explicitly coding the zero in
15129 memory address, but I don't have AMD-K6 machine handy to check this
15130 theory. */
15131
15132 if (TARGET_K6
15133 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
15134 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
15135 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
15136 cost += 10;
15137
15138 return cost;
15139 }
15140 \f
15141 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15142 this is used for to form addresses to local data when -fPIC is in
15143 use. */
15144
15145 static bool
15146 darwin_local_data_pic (rtx disp)
15147 {
15148 return (GET_CODE (disp) == UNSPEC
15149 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
15150 }
15151
15152 /* True if operand X should be loaded from GOT. */
15153
15154 bool
15155 ix86_force_load_from_GOT_p (rtx x)
15156 {
15157 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15158 && !TARGET_PECOFF && !TARGET_MACHO
15159 && !flag_plt && !flag_pic
15160 && ix86_cmodel != CM_LARGE
15161 && GET_CODE (x) == SYMBOL_REF
15162 && SYMBOL_REF_FUNCTION_P (x)
15163 && !SYMBOL_REF_LOCAL_P (x));
15164 }
15165
15166 /* Determine if a given RTX is a valid constant. We already know this
15167 satisfies CONSTANT_P. */
15168
15169 static bool
15170 ix86_legitimate_constant_p (machine_mode mode, rtx x)
15171 {
15172 /* Pointer bounds constants are not valid. */
15173 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
15174 return false;
15175
15176 switch (GET_CODE (x))
15177 {
15178 case CONST:
15179 x = XEXP (x, 0);
15180
15181 if (GET_CODE (x) == PLUS)
15182 {
15183 if (!CONST_INT_P (XEXP (x, 1)))
15184 return false;
15185 x = XEXP (x, 0);
15186 }
15187
15188 if (TARGET_MACHO && darwin_local_data_pic (x))
15189 return true;
15190
15191 /* Only some unspecs are valid as "constants". */
15192 if (GET_CODE (x) == UNSPEC)
15193 switch (XINT (x, 1))
15194 {
15195 case UNSPEC_GOT:
15196 case UNSPEC_GOTOFF:
15197 case UNSPEC_PLTOFF:
15198 return TARGET_64BIT;
15199 case UNSPEC_TPOFF:
15200 case UNSPEC_NTPOFF:
15201 x = XVECEXP (x, 0, 0);
15202 return (GET_CODE (x) == SYMBOL_REF
15203 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15204 case UNSPEC_DTPOFF:
15205 x = XVECEXP (x, 0, 0);
15206 return (GET_CODE (x) == SYMBOL_REF
15207 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
15208 default:
15209 return false;
15210 }
15211
15212 /* We must have drilled down to a symbol. */
15213 if (GET_CODE (x) == LABEL_REF)
15214 return true;
15215 if (GET_CODE (x) != SYMBOL_REF)
15216 return false;
15217 /* FALLTHRU */
15218
15219 case SYMBOL_REF:
15220 /* TLS symbols are never valid. */
15221 if (SYMBOL_REF_TLS_MODEL (x))
15222 return false;
15223
15224 /* DLLIMPORT symbols are never valid. */
15225 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15226 && SYMBOL_REF_DLLIMPORT_P (x))
15227 return false;
15228
15229 #if TARGET_MACHO
15230 /* mdynamic-no-pic */
15231 if (MACHO_DYNAMIC_NO_PIC_P)
15232 return machopic_symbol_defined_p (x);
15233 #endif
15234
15235 /* External function address should be loaded
15236 via the GOT slot to avoid PLT. */
15237 if (ix86_force_load_from_GOT_p (x))
15238 return false;
15239
15240 break;
15241
15242 CASE_CONST_SCALAR_INT:
15243 switch (mode)
15244 {
15245 case TImode:
15246 if (TARGET_64BIT)
15247 return true;
15248 /* FALLTHRU */
15249 case OImode:
15250 case XImode:
15251 if (!standard_sse_constant_p (x, mode))
15252 return false;
15253 default:
15254 break;
15255 }
15256 break;
15257
15258 case CONST_VECTOR:
15259 if (!standard_sse_constant_p (x, mode))
15260 return false;
15261
15262 default:
15263 break;
15264 }
15265
15266 /* Otherwise we handle everything else in the move patterns. */
15267 return true;
15268 }
15269
15270 /* Determine if it's legal to put X into the constant pool. This
15271 is not possible for the address of thread-local symbols, which
15272 is checked above. */
15273
15274 static bool
15275 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15276 {
15277 /* We can put any immediate constant in memory. */
15278 switch (GET_CODE (x))
15279 {
15280 CASE_CONST_ANY:
15281 return false;
15282
15283 default:
15284 break;
15285 }
15286
15287 return !ix86_legitimate_constant_p (mode, x);
15288 }
15289
15290 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15291 otherwise zero. */
15292
15293 static bool
15294 is_imported_p (rtx x)
15295 {
15296 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15297 || GET_CODE (x) != SYMBOL_REF)
15298 return false;
15299
15300 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15301 }
15302
15303
15304 /* Nonzero if the constant value X is a legitimate general operand
15305 when generating PIC code. It is given that flag_pic is on and
15306 that X satisfies CONSTANT_P. */
15307
15308 bool
15309 legitimate_pic_operand_p (rtx x)
15310 {
15311 rtx inner;
15312
15313 switch (GET_CODE (x))
15314 {
15315 case CONST:
15316 inner = XEXP (x, 0);
15317 if (GET_CODE (inner) == PLUS
15318 && CONST_INT_P (XEXP (inner, 1)))
15319 inner = XEXP (inner, 0);
15320
15321 /* Only some unspecs are valid as "constants". */
15322 if (GET_CODE (inner) == UNSPEC)
15323 switch (XINT (inner, 1))
15324 {
15325 case UNSPEC_GOT:
15326 case UNSPEC_GOTOFF:
15327 case UNSPEC_PLTOFF:
15328 return TARGET_64BIT;
15329 case UNSPEC_TPOFF:
15330 x = XVECEXP (inner, 0, 0);
15331 return (GET_CODE (x) == SYMBOL_REF
15332 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15333 case UNSPEC_MACHOPIC_OFFSET:
15334 return legitimate_pic_address_disp_p (x);
15335 default:
15336 return false;
15337 }
15338 /* FALLTHRU */
15339
15340 case SYMBOL_REF:
15341 case LABEL_REF:
15342 return legitimate_pic_address_disp_p (x);
15343
15344 default:
15345 return true;
15346 }
15347 }
15348
15349 /* Determine if a given CONST RTX is a valid memory displacement
15350 in PIC mode. */
15351
15352 bool
15353 legitimate_pic_address_disp_p (rtx disp)
15354 {
15355 bool saw_plus;
15356
15357 /* In 64bit mode we can allow direct addresses of symbols and labels
15358 when they are not dynamic symbols. */
15359 if (TARGET_64BIT)
15360 {
15361 rtx op0 = disp, op1;
15362
15363 switch (GET_CODE (disp))
15364 {
15365 case LABEL_REF:
15366 return true;
15367
15368 case CONST:
15369 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15370 break;
15371 op0 = XEXP (XEXP (disp, 0), 0);
15372 op1 = XEXP (XEXP (disp, 0), 1);
15373 if (!CONST_INT_P (op1)
15374 || INTVAL (op1) >= 16*1024*1024
15375 || INTVAL (op1) < -16*1024*1024)
15376 break;
15377 if (GET_CODE (op0) == LABEL_REF)
15378 return true;
15379 if (GET_CODE (op0) == CONST
15380 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15381 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15382 return true;
15383 if (GET_CODE (op0) == UNSPEC
15384 && XINT (op0, 1) == UNSPEC_PCREL)
15385 return true;
15386 if (GET_CODE (op0) != SYMBOL_REF)
15387 break;
15388 /* FALLTHRU */
15389
15390 case SYMBOL_REF:
15391 /* TLS references should always be enclosed in UNSPEC.
15392 The dllimported symbol needs always to be resolved. */
15393 if (SYMBOL_REF_TLS_MODEL (op0)
15394 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15395 return false;
15396
15397 if (TARGET_PECOFF)
15398 {
15399 if (is_imported_p (op0))
15400 return true;
15401
15402 if (SYMBOL_REF_FAR_ADDR_P (op0)
15403 || !SYMBOL_REF_LOCAL_P (op0))
15404 break;
15405
15406 /* Function-symbols need to be resolved only for
15407 large-model.
15408 For the small-model we don't need to resolve anything
15409 here. */
15410 if ((ix86_cmodel != CM_LARGE_PIC
15411 && SYMBOL_REF_FUNCTION_P (op0))
15412 || ix86_cmodel == CM_SMALL_PIC)
15413 return true;
15414 /* Non-external symbols don't need to be resolved for
15415 large, and medium-model. */
15416 if ((ix86_cmodel == CM_LARGE_PIC
15417 || ix86_cmodel == CM_MEDIUM_PIC)
15418 && !SYMBOL_REF_EXTERNAL_P (op0))
15419 return true;
15420 }
15421 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15422 && (SYMBOL_REF_LOCAL_P (op0)
15423 || (HAVE_LD_PIE_COPYRELOC
15424 && flag_pie
15425 && !SYMBOL_REF_WEAK (op0)
15426 && !SYMBOL_REF_FUNCTION_P (op0)))
15427 && ix86_cmodel != CM_LARGE_PIC)
15428 return true;
15429 break;
15430
15431 default:
15432 break;
15433 }
15434 }
15435 if (GET_CODE (disp) != CONST)
15436 return false;
15437 disp = XEXP (disp, 0);
15438
15439 if (TARGET_64BIT)
15440 {
15441 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15442 of GOT tables. We should not need these anyway. */
15443 if (GET_CODE (disp) != UNSPEC
15444 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15445 && XINT (disp, 1) != UNSPEC_GOTOFF
15446 && XINT (disp, 1) != UNSPEC_PCREL
15447 && XINT (disp, 1) != UNSPEC_PLTOFF))
15448 return false;
15449
15450 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15451 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15452 return false;
15453 return true;
15454 }
15455
15456 saw_plus = false;
15457 if (GET_CODE (disp) == PLUS)
15458 {
15459 if (!CONST_INT_P (XEXP (disp, 1)))
15460 return false;
15461 disp = XEXP (disp, 0);
15462 saw_plus = true;
15463 }
15464
15465 if (TARGET_MACHO && darwin_local_data_pic (disp))
15466 return true;
15467
15468 if (GET_CODE (disp) != UNSPEC)
15469 return false;
15470
15471 switch (XINT (disp, 1))
15472 {
15473 case UNSPEC_GOT:
15474 if (saw_plus)
15475 return false;
15476 /* We need to check for both symbols and labels because VxWorks loads
15477 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15478 details. */
15479 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15480 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15481 case UNSPEC_GOTOFF:
15482 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15483 While ABI specify also 32bit relocation but we don't produce it in
15484 small PIC model at all. */
15485 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15486 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15487 && !TARGET_64BIT)
15488 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15489 return false;
15490 case UNSPEC_GOTTPOFF:
15491 case UNSPEC_GOTNTPOFF:
15492 case UNSPEC_INDNTPOFF:
15493 if (saw_plus)
15494 return false;
15495 disp = XVECEXP (disp, 0, 0);
15496 return (GET_CODE (disp) == SYMBOL_REF
15497 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15498 case UNSPEC_NTPOFF:
15499 disp = XVECEXP (disp, 0, 0);
15500 return (GET_CODE (disp) == SYMBOL_REF
15501 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15502 case UNSPEC_DTPOFF:
15503 disp = XVECEXP (disp, 0, 0);
15504 return (GET_CODE (disp) == SYMBOL_REF
15505 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15506 }
15507
15508 return false;
15509 }
15510
15511 /* Determine if op is suitable RTX for an address register.
15512 Return naked register if a register or a register subreg is
15513 found, otherwise return NULL_RTX. */
15514
15515 static rtx
15516 ix86_validate_address_register (rtx op)
15517 {
15518 machine_mode mode = GET_MODE (op);
15519
15520 /* Only SImode or DImode registers can form the address. */
15521 if (mode != SImode && mode != DImode)
15522 return NULL_RTX;
15523
15524 if (REG_P (op))
15525 return op;
15526 else if (SUBREG_P (op))
15527 {
15528 rtx reg = SUBREG_REG (op);
15529
15530 if (!REG_P (reg))
15531 return NULL_RTX;
15532
15533 mode = GET_MODE (reg);
15534
15535 /* Don't allow SUBREGs that span more than a word. It can
15536 lead to spill failures when the register is one word out
15537 of a two word structure. */
15538 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15539 return NULL_RTX;
15540
15541 /* Allow only SUBREGs of non-eliminable hard registers. */
15542 if (register_no_elim_operand (reg, mode))
15543 return reg;
15544 }
15545
15546 /* Op is not a register. */
15547 return NULL_RTX;
15548 }
15549
15550 /* Recognizes RTL expressions that are valid memory addresses for an
15551 instruction. The MODE argument is the machine mode for the MEM
15552 expression that wants to use this address.
15553
15554 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15555 convert common non-canonical forms to canonical form so that they will
15556 be recognized. */
15557
15558 static bool
15559 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
15560 {
15561 struct ix86_address parts;
15562 rtx base, index, disp;
15563 HOST_WIDE_INT scale;
15564 addr_space_t seg;
15565
15566 if (ix86_decompose_address (addr, &parts) <= 0)
15567 /* Decomposition failed. */
15568 return false;
15569
15570 base = parts.base;
15571 index = parts.index;
15572 disp = parts.disp;
15573 scale = parts.scale;
15574 seg = parts.seg;
15575
15576 /* Validate base register. */
15577 if (base)
15578 {
15579 rtx reg = ix86_validate_address_register (base);
15580
15581 if (reg == NULL_RTX)
15582 return false;
15583
15584 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
15585 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
15586 /* Base is not valid. */
15587 return false;
15588 }
15589
15590 /* Validate index register. */
15591 if (index)
15592 {
15593 rtx reg = ix86_validate_address_register (index);
15594
15595 if (reg == NULL_RTX)
15596 return false;
15597
15598 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
15599 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
15600 /* Index is not valid. */
15601 return false;
15602 }
15603
15604 /* Index and base should have the same mode. */
15605 if (base && index
15606 && GET_MODE (base) != GET_MODE (index))
15607 return false;
15608
15609 /* Address override works only on the (%reg) part of %fs:(%reg). */
15610 if (seg != ADDR_SPACE_GENERIC
15611 && ((base && GET_MODE (base) != word_mode)
15612 || (index && GET_MODE (index) != word_mode)))
15613 return false;
15614
15615 /* Validate scale factor. */
15616 if (scale != 1)
15617 {
15618 if (!index)
15619 /* Scale without index. */
15620 return false;
15621
15622 if (scale != 2 && scale != 4 && scale != 8)
15623 /* Scale is not a valid multiplier. */
15624 return false;
15625 }
15626
15627 /* Validate displacement. */
15628 if (disp)
15629 {
15630 if (GET_CODE (disp) == CONST
15631 && GET_CODE (XEXP (disp, 0)) == UNSPEC
15632 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
15633 switch (XINT (XEXP (disp, 0), 1))
15634 {
15635 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15636 when used. While ABI specify also 32bit relocations, we
15637 don't produce them at all and use IP relative instead.
15638 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15639 should be loaded via GOT. */
15640 case UNSPEC_GOT:
15641 if (!TARGET_64BIT
15642 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15643 goto is_legitimate_pic;
15644 /* FALLTHRU */
15645 case UNSPEC_GOTOFF:
15646 gcc_assert (flag_pic);
15647 if (!TARGET_64BIT)
15648 goto is_legitimate_pic;
15649
15650 /* 64bit address unspec. */
15651 return false;
15652
15653 case UNSPEC_GOTPCREL:
15654 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15655 goto is_legitimate_pic;
15656 /* FALLTHRU */
15657 case UNSPEC_PCREL:
15658 gcc_assert (flag_pic);
15659 goto is_legitimate_pic;
15660
15661 case UNSPEC_GOTTPOFF:
15662 case UNSPEC_GOTNTPOFF:
15663 case UNSPEC_INDNTPOFF:
15664 case UNSPEC_NTPOFF:
15665 case UNSPEC_DTPOFF:
15666 break;
15667
15668 case UNSPEC_STACK_CHECK:
15669 gcc_assert (flag_split_stack);
15670 break;
15671
15672 default:
15673 /* Invalid address unspec. */
15674 return false;
15675 }
15676
15677 else if (SYMBOLIC_CONST (disp)
15678 && (flag_pic
15679 || (TARGET_MACHO
15680 #if TARGET_MACHO
15681 && MACHOPIC_INDIRECT
15682 && !machopic_operand_p (disp)
15683 #endif
15684 )))
15685 {
15686
15687 is_legitimate_pic:
15688 if (TARGET_64BIT && (index || base))
15689 {
15690 /* foo@dtpoff(%rX) is ok. */
15691 if (GET_CODE (disp) != CONST
15692 || GET_CODE (XEXP (disp, 0)) != PLUS
15693 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
15694 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
15695 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
15696 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
15697 /* Non-constant pic memory reference. */
15698 return false;
15699 }
15700 else if ((!TARGET_MACHO || flag_pic)
15701 && ! legitimate_pic_address_disp_p (disp))
15702 /* Displacement is an invalid pic construct. */
15703 return false;
15704 #if TARGET_MACHO
15705 else if (MACHO_DYNAMIC_NO_PIC_P
15706 && !ix86_legitimate_constant_p (Pmode, disp))
15707 /* displacment must be referenced via non_lazy_pointer */
15708 return false;
15709 #endif
15710
15711 /* This code used to verify that a symbolic pic displacement
15712 includes the pic_offset_table_rtx register.
15713
15714 While this is good idea, unfortunately these constructs may
15715 be created by "adds using lea" optimization for incorrect
15716 code like:
15717
15718 int a;
15719 int foo(int i)
15720 {
15721 return *(&a+i);
15722 }
15723
15724 This code is nonsensical, but results in addressing
15725 GOT table with pic_offset_table_rtx base. We can't
15726 just refuse it easily, since it gets matched by
15727 "addsi3" pattern, that later gets split to lea in the
15728 case output register differs from input. While this
15729 can be handled by separate addsi pattern for this case
15730 that never results in lea, this seems to be easier and
15731 correct fix for crash to disable this test. */
15732 }
15733 else if (GET_CODE (disp) != LABEL_REF
15734 && !CONST_INT_P (disp)
15735 && (GET_CODE (disp) != CONST
15736 || !ix86_legitimate_constant_p (Pmode, disp))
15737 && (GET_CODE (disp) != SYMBOL_REF
15738 || !ix86_legitimate_constant_p (Pmode, disp)))
15739 /* Displacement is not constant. */
15740 return false;
15741 else if (TARGET_64BIT
15742 && !x86_64_immediate_operand (disp, VOIDmode))
15743 /* Displacement is out of range. */
15744 return false;
15745 /* In x32 mode, constant addresses are sign extended to 64bit, so
15746 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15747 else if (TARGET_X32 && !(index || base)
15748 && CONST_INT_P (disp)
15749 && val_signbit_known_set_p (SImode, INTVAL (disp)))
15750 return false;
15751 }
15752
15753 /* Everything looks valid. */
15754 return true;
15755 }
15756
15757 /* Determine if a given RTX is a valid constant address. */
15758
15759 bool
15760 constant_address_p (rtx x)
15761 {
15762 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
15763 }
15764 \f
15765 /* Return a unique alias set for the GOT. */
15766
15767 static alias_set_type
15768 ix86_GOT_alias_set (void)
15769 {
15770 static alias_set_type set = -1;
15771 if (set == -1)
15772 set = new_alias_set ();
15773 return set;
15774 }
15775
15776 /* Return a legitimate reference for ORIG (an address) using the
15777 register REG. If REG is 0, a new pseudo is generated.
15778
15779 There are two types of references that must be handled:
15780
15781 1. Global data references must load the address from the GOT, via
15782 the PIC reg. An insn is emitted to do this load, and the reg is
15783 returned.
15784
15785 2. Static data references, constant pool addresses, and code labels
15786 compute the address as an offset from the GOT, whose base is in
15787 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15788 differentiate them from global data objects. The returned
15789 address is the PIC reg + an unspec constant.
15790
15791 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15792 reg also appears in the address. */
15793
15794 static rtx
15795 legitimize_pic_address (rtx orig, rtx reg)
15796 {
15797 rtx addr = orig;
15798 rtx new_rtx = orig;
15799
15800 #if TARGET_MACHO
15801 if (TARGET_MACHO && !TARGET_64BIT)
15802 {
15803 if (reg == 0)
15804 reg = gen_reg_rtx (Pmode);
15805 /* Use the generic Mach-O PIC machinery. */
15806 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15807 }
15808 #endif
15809
15810 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15811 {
15812 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15813 if (tmp)
15814 return tmp;
15815 }
15816
15817 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15818 new_rtx = addr;
15819 else if ((!TARGET_64BIT
15820 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
15821 && !TARGET_PECOFF
15822 && gotoff_operand (addr, Pmode))
15823 {
15824 /* This symbol may be referenced via a displacement
15825 from the PIC base address (@GOTOFF). */
15826 if (GET_CODE (addr) == CONST)
15827 addr = XEXP (addr, 0);
15828
15829 if (GET_CODE (addr) == PLUS)
15830 {
15831 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15832 UNSPEC_GOTOFF);
15833 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15834 }
15835 else
15836 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15837
15838 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15839
15840 if (TARGET_64BIT)
15841 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15842
15843 if (reg != 0)
15844 {
15845 gcc_assert (REG_P (reg));
15846 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
15847 new_rtx, reg, 1, OPTAB_DIRECT);
15848 }
15849 else
15850 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15851 }
15852 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15853 /* We can't use @GOTOFF for text labels
15854 on VxWorks, see gotoff_operand. */
15855 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15856 {
15857 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15858 if (tmp)
15859 return tmp;
15860
15861 /* For x64 PE-COFF there is no GOT table,
15862 so we use address directly. */
15863 if (TARGET_64BIT && TARGET_PECOFF)
15864 {
15865 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15866 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15867 }
15868 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15869 {
15870 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
15871 UNSPEC_GOTPCREL);
15872 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15873 new_rtx = gen_const_mem (Pmode, new_rtx);
15874 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15875 }
15876 else
15877 {
15878 /* This symbol must be referenced via a load
15879 from the Global Offset Table (@GOT). */
15880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15881 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15882 if (TARGET_64BIT)
15883 new_rtx = force_reg (Pmode, new_rtx);
15884 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15885 new_rtx = gen_const_mem (Pmode, new_rtx);
15886 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15887 }
15888
15889 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15890 }
15891 else
15892 {
15893 if (CONST_INT_P (addr)
15894 && !x86_64_immediate_operand (addr, VOIDmode))
15895 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
15896 else if (GET_CODE (addr) == CONST)
15897 {
15898 addr = XEXP (addr, 0);
15899
15900 /* We must match stuff we generate before. Assume the only
15901 unspecs that can get here are ours. Not that we could do
15902 anything with them anyway.... */
15903 if (GET_CODE (addr) == UNSPEC
15904 || (GET_CODE (addr) == PLUS
15905 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15906 return orig;
15907 gcc_assert (GET_CODE (addr) == PLUS);
15908 }
15909
15910 if (GET_CODE (addr) == PLUS)
15911 {
15912 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15913
15914 /* Check first to see if this is a constant
15915 offset from a @GOTOFF symbol reference. */
15916 if (!TARGET_PECOFF
15917 && gotoff_operand (op0, Pmode)
15918 && CONST_INT_P (op1))
15919 {
15920 if (!TARGET_64BIT)
15921 {
15922 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15923 UNSPEC_GOTOFF);
15924 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15925 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15926
15927 if (reg != 0)
15928 {
15929 gcc_assert (REG_P (reg));
15930 new_rtx = expand_simple_binop (Pmode, PLUS,
15931 pic_offset_table_rtx,
15932 new_rtx, reg, 1,
15933 OPTAB_DIRECT);
15934 }
15935 else
15936 new_rtx
15937 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15938 }
15939 else
15940 {
15941 if (INTVAL (op1) < -16*1024*1024
15942 || INTVAL (op1) >= 16*1024*1024)
15943 {
15944 if (!x86_64_immediate_operand (op1, Pmode))
15945 op1 = force_reg (Pmode, op1);
15946
15947 new_rtx
15948 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15949 }
15950 }
15951 }
15952 else
15953 {
15954 rtx base = legitimize_pic_address (op0, reg);
15955 machine_mode mode = GET_MODE (base);
15956 new_rtx
15957 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15958
15959 if (CONST_INT_P (new_rtx))
15960 {
15961 if (INTVAL (new_rtx) < -16*1024*1024
15962 || INTVAL (new_rtx) >= 16*1024*1024)
15963 {
15964 if (!x86_64_immediate_operand (new_rtx, mode))
15965 new_rtx = force_reg (mode, new_rtx);
15966
15967 new_rtx
15968 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15969 }
15970 else
15971 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15972 }
15973 else
15974 {
15975 /* For %rip addressing, we have to use
15976 just disp32, not base nor index. */
15977 if (TARGET_64BIT
15978 && (GET_CODE (base) == SYMBOL_REF
15979 || GET_CODE (base) == LABEL_REF))
15980 base = force_reg (mode, base);
15981 if (GET_CODE (new_rtx) == PLUS
15982 && CONSTANT_P (XEXP (new_rtx, 1)))
15983 {
15984 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15985 new_rtx = XEXP (new_rtx, 1);
15986 }
15987 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15988 }
15989 }
15990 }
15991 }
15992 return new_rtx;
15993 }
15994 \f
15995 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15996
15997 static rtx
15998 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15999 {
16000 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
16001
16002 if (GET_MODE (tp) != tp_mode)
16003 {
16004 gcc_assert (GET_MODE (tp) == SImode);
16005 gcc_assert (tp_mode == DImode);
16006
16007 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
16008 }
16009
16010 if (to_reg)
16011 tp = copy_to_mode_reg (tp_mode, tp);
16012
16013 return tp;
16014 }
16015
16016 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16017
16018 static GTY(()) rtx ix86_tls_symbol;
16019
16020 static rtx
16021 ix86_tls_get_addr (void)
16022 {
16023 if (!ix86_tls_symbol)
16024 {
16025 const char *sym
16026 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
16027 ? "___tls_get_addr" : "__tls_get_addr");
16028
16029 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
16030 }
16031
16032 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
16033 {
16034 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
16035 UNSPEC_PLTOFF);
16036 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
16037 gen_rtx_CONST (Pmode, unspec));
16038 }
16039
16040 return ix86_tls_symbol;
16041 }
16042
16043 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16044
16045 static GTY(()) rtx ix86_tls_module_base_symbol;
16046
16047 rtx
16048 ix86_tls_module_base (void)
16049 {
16050 if (!ix86_tls_module_base_symbol)
16051 {
16052 ix86_tls_module_base_symbol
16053 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
16054
16055 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16056 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16057 }
16058
16059 return ix86_tls_module_base_symbol;
16060 }
16061
16062 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
16063 false if we expect this to be used for a memory address and true if
16064 we expect to load the address into a register. */
16065
16066 static rtx
16067 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
16068 {
16069 rtx dest, base, off;
16070 rtx pic = NULL_RTX, tp = NULL_RTX;
16071 machine_mode tp_mode = Pmode;
16072 int type;
16073
16074 /* Fall back to global dynamic model if tool chain cannot support local
16075 dynamic. */
16076 if (TARGET_SUN_TLS && !TARGET_64BIT
16077 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
16078 && model == TLS_MODEL_LOCAL_DYNAMIC)
16079 model = TLS_MODEL_GLOBAL_DYNAMIC;
16080
16081 switch (model)
16082 {
16083 case TLS_MODEL_GLOBAL_DYNAMIC:
16084 dest = gen_reg_rtx (Pmode);
16085
16086 if (!TARGET_64BIT)
16087 {
16088 if (flag_pic && !TARGET_PECOFF)
16089 pic = pic_offset_table_rtx;
16090 else
16091 {
16092 pic = gen_reg_rtx (Pmode);
16093 emit_insn (gen_set_got (pic));
16094 }
16095 }
16096
16097 if (TARGET_GNU2_TLS)
16098 {
16099 if (TARGET_64BIT)
16100 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
16101 else
16102 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
16103
16104 tp = get_thread_pointer (Pmode, true);
16105 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
16106
16107 if (GET_MODE (x) != Pmode)
16108 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16109
16110 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16111 }
16112 else
16113 {
16114 rtx caddr = ix86_tls_get_addr ();
16115
16116 if (TARGET_64BIT)
16117 {
16118 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16119 rtx_insn *insns;
16120
16121 start_sequence ();
16122 emit_call_insn
16123 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
16124 insns = get_insns ();
16125 end_sequence ();
16126
16127 if (GET_MODE (x) != Pmode)
16128 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16129
16130 RTL_CONST_CALL_P (insns) = 1;
16131 emit_libcall_block (insns, dest, rax, x);
16132 }
16133 else
16134 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
16135 }
16136 break;
16137
16138 case TLS_MODEL_LOCAL_DYNAMIC:
16139 base = gen_reg_rtx (Pmode);
16140
16141 if (!TARGET_64BIT)
16142 {
16143 if (flag_pic)
16144 pic = pic_offset_table_rtx;
16145 else
16146 {
16147 pic = gen_reg_rtx (Pmode);
16148 emit_insn (gen_set_got (pic));
16149 }
16150 }
16151
16152 if (TARGET_GNU2_TLS)
16153 {
16154 rtx tmp = ix86_tls_module_base ();
16155
16156 if (TARGET_64BIT)
16157 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
16158 else
16159 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
16160
16161 tp = get_thread_pointer (Pmode, true);
16162 set_unique_reg_note (get_last_insn (), REG_EQUAL,
16163 gen_rtx_MINUS (Pmode, tmp, tp));
16164 }
16165 else
16166 {
16167 rtx caddr = ix86_tls_get_addr ();
16168
16169 if (TARGET_64BIT)
16170 {
16171 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16172 rtx_insn *insns;
16173 rtx eqv;
16174
16175 start_sequence ();
16176 emit_call_insn
16177 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
16178 insns = get_insns ();
16179 end_sequence ();
16180
16181 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16182 share the LD_BASE result with other LD model accesses. */
16183 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
16184 UNSPEC_TLS_LD_BASE);
16185
16186 RTL_CONST_CALL_P (insns) = 1;
16187 emit_libcall_block (insns, base, rax, eqv);
16188 }
16189 else
16190 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
16191 }
16192
16193 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
16194 off = gen_rtx_CONST (Pmode, off);
16195
16196 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
16197
16198 if (TARGET_GNU2_TLS)
16199 {
16200 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
16201
16202 if (GET_MODE (x) != Pmode)
16203 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16204
16205 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16206 }
16207 break;
16208
16209 case TLS_MODEL_INITIAL_EXEC:
16210 if (TARGET_64BIT)
16211 {
16212 if (TARGET_SUN_TLS && !TARGET_X32)
16213 {
16214 /* The Sun linker took the AMD64 TLS spec literally
16215 and can only handle %rax as destination of the
16216 initial executable code sequence. */
16217
16218 dest = gen_reg_rtx (DImode);
16219 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
16220 return dest;
16221 }
16222
16223 /* Generate DImode references to avoid %fs:(%reg32)
16224 problems and linker IE->LE relaxation bug. */
16225 tp_mode = DImode;
16226 pic = NULL;
16227 type = UNSPEC_GOTNTPOFF;
16228 }
16229 else if (flag_pic)
16230 {
16231 pic = pic_offset_table_rtx;
16232 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16233 }
16234 else if (!TARGET_ANY_GNU_TLS)
16235 {
16236 pic = gen_reg_rtx (Pmode);
16237 emit_insn (gen_set_got (pic));
16238 type = UNSPEC_GOTTPOFF;
16239 }
16240 else
16241 {
16242 pic = NULL;
16243 type = UNSPEC_INDNTPOFF;
16244 }
16245
16246 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16247 off = gen_rtx_CONST (tp_mode, off);
16248 if (pic)
16249 off = gen_rtx_PLUS (tp_mode, pic, off);
16250 off = gen_const_mem (tp_mode, off);
16251 set_mem_alias_set (off, ix86_GOT_alias_set ());
16252
16253 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16254 {
16255 base = get_thread_pointer (tp_mode,
16256 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16257 off = force_reg (tp_mode, off);
16258 return gen_rtx_PLUS (tp_mode, base, off);
16259 }
16260 else
16261 {
16262 base = get_thread_pointer (Pmode, true);
16263 dest = gen_reg_rtx (Pmode);
16264 emit_insn (ix86_gen_sub3 (dest, base, off));
16265 }
16266 break;
16267
16268 case TLS_MODEL_LOCAL_EXEC:
16269 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16270 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16271 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16272 off = gen_rtx_CONST (Pmode, off);
16273
16274 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16275 {
16276 base = get_thread_pointer (Pmode,
16277 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16278 return gen_rtx_PLUS (Pmode, base, off);
16279 }
16280 else
16281 {
16282 base = get_thread_pointer (Pmode, true);
16283 dest = gen_reg_rtx (Pmode);
16284 emit_insn (ix86_gen_sub3 (dest, base, off));
16285 }
16286 break;
16287
16288 default:
16289 gcc_unreachable ();
16290 }
16291
16292 return dest;
16293 }
16294
16295 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16296 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16297 unique refptr-DECL symbol corresponding to symbol DECL. */
16298
16299 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16300 {
16301 static inline hashval_t hash (tree_map *m) { return m->hash; }
16302 static inline bool
16303 equal (tree_map *a, tree_map *b)
16304 {
16305 return a->base.from == b->base.from;
16306 }
16307
16308 static int
16309 keep_cache_entry (tree_map *&m)
16310 {
16311 return ggc_marked_p (m->base.from);
16312 }
16313 };
16314
16315 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16316
16317 static tree
16318 get_dllimport_decl (tree decl, bool beimport)
16319 {
16320 struct tree_map *h, in;
16321 const char *name;
16322 const char *prefix;
16323 size_t namelen, prefixlen;
16324 char *imp_name;
16325 tree to;
16326 rtx rtl;
16327
16328 if (!dllimport_map)
16329 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16330
16331 in.hash = htab_hash_pointer (decl);
16332 in.base.from = decl;
16333 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16334 h = *loc;
16335 if (h)
16336 return h->to;
16337
16338 *loc = h = ggc_alloc<tree_map> ();
16339 h->hash = in.hash;
16340 h->base.from = decl;
16341 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16342 VAR_DECL, NULL, ptr_type_node);
16343 DECL_ARTIFICIAL (to) = 1;
16344 DECL_IGNORED_P (to) = 1;
16345 DECL_EXTERNAL (to) = 1;
16346 TREE_READONLY (to) = 1;
16347
16348 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16349 name = targetm.strip_name_encoding (name);
16350 if (beimport)
16351 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16352 ? "*__imp_" : "*__imp__";
16353 else
16354 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16355 namelen = strlen (name);
16356 prefixlen = strlen (prefix);
16357 imp_name = (char *) alloca (namelen + prefixlen + 1);
16358 memcpy (imp_name, prefix, prefixlen);
16359 memcpy (imp_name + prefixlen, name, namelen + 1);
16360
16361 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16362 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16363 SET_SYMBOL_REF_DECL (rtl, to);
16364 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16365 if (!beimport)
16366 {
16367 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16368 #ifdef SUB_TARGET_RECORD_STUB
16369 SUB_TARGET_RECORD_STUB (name);
16370 #endif
16371 }
16372
16373 rtl = gen_const_mem (Pmode, rtl);
16374 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16375
16376 SET_DECL_RTL (to, rtl);
16377 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16378
16379 return to;
16380 }
16381
16382 /* Expand SYMBOL into its corresponding far-addresse symbol.
16383 WANT_REG is true if we require the result be a register. */
16384
16385 static rtx
16386 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16387 {
16388 tree imp_decl;
16389 rtx x;
16390
16391 gcc_assert (SYMBOL_REF_DECL (symbol));
16392 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16393
16394 x = DECL_RTL (imp_decl);
16395 if (want_reg)
16396 x = force_reg (Pmode, x);
16397 return x;
16398 }
16399
16400 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16401 true if we require the result be a register. */
16402
16403 static rtx
16404 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16405 {
16406 tree imp_decl;
16407 rtx x;
16408
16409 gcc_assert (SYMBOL_REF_DECL (symbol));
16410 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16411
16412 x = DECL_RTL (imp_decl);
16413 if (want_reg)
16414 x = force_reg (Pmode, x);
16415 return x;
16416 }
16417
16418 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16419 is true if we require the result be a register. */
16420
16421 static rtx
16422 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16423 {
16424 if (!TARGET_PECOFF)
16425 return NULL_RTX;
16426
16427 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16428 {
16429 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
16430 return legitimize_dllimport_symbol (addr, inreg);
16431 if (GET_CODE (addr) == CONST
16432 && GET_CODE (XEXP (addr, 0)) == PLUS
16433 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16434 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
16435 {
16436 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
16437 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16438 }
16439 }
16440
16441 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
16442 return NULL_RTX;
16443 if (GET_CODE (addr) == SYMBOL_REF
16444 && !is_imported_p (addr)
16445 && SYMBOL_REF_EXTERNAL_P (addr)
16446 && SYMBOL_REF_DECL (addr))
16447 return legitimize_pe_coff_extern_decl (addr, inreg);
16448
16449 if (GET_CODE (addr) == CONST
16450 && GET_CODE (XEXP (addr, 0)) == PLUS
16451 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16452 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
16453 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
16454 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
16455 {
16456 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
16457 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16458 }
16459 return NULL_RTX;
16460 }
16461
16462 /* Try machine-dependent ways of modifying an illegitimate address
16463 to be legitimate. If we find one, return the new, valid address.
16464 This macro is used in only one place: `memory_address' in explow.c.
16465
16466 OLDX is the address as it was before break_out_memory_refs was called.
16467 In some cases it is useful to look at this to decide what needs to be done.
16468
16469 It is always safe for this macro to do nothing. It exists to recognize
16470 opportunities to optimize the output.
16471
16472 For the 80386, we handle X+REG by loading X into a register R and
16473 using R+REG. R will go in a general reg and indexing will be used.
16474 However, if REG is a broken-out memory address or multiplication,
16475 nothing needs to be done because REG can certainly go in a general reg.
16476
16477 When -fpic is used, special handling is needed for symbolic references.
16478 See comments by legitimize_pic_address in i386.c for details. */
16479
16480 static rtx
16481 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
16482 {
16483 bool changed = false;
16484 unsigned log;
16485
16486 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
16487 if (log)
16488 return legitimize_tls_address (x, (enum tls_model) log, false);
16489 if (GET_CODE (x) == CONST
16490 && GET_CODE (XEXP (x, 0)) == PLUS
16491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
16492 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
16493 {
16494 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
16495 (enum tls_model) log, false);
16496 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
16497 }
16498
16499 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16500 {
16501 rtx tmp = legitimize_pe_coff_symbol (x, true);
16502 if (tmp)
16503 return tmp;
16504 }
16505
16506 if (flag_pic && SYMBOLIC_CONST (x))
16507 return legitimize_pic_address (x, 0);
16508
16509 #if TARGET_MACHO
16510 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
16511 return machopic_indirect_data_reference (x, 0);
16512 #endif
16513
16514 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16515 if (GET_CODE (x) == ASHIFT
16516 && CONST_INT_P (XEXP (x, 1))
16517 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
16518 {
16519 changed = true;
16520 log = INTVAL (XEXP (x, 1));
16521 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
16522 GEN_INT (1 << log));
16523 }
16524
16525 if (GET_CODE (x) == PLUS)
16526 {
16527 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16528
16529 if (GET_CODE (XEXP (x, 0)) == ASHIFT
16530 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16531 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
16532 {
16533 changed = true;
16534 log = INTVAL (XEXP (XEXP (x, 0), 1));
16535 XEXP (x, 0) = gen_rtx_MULT (Pmode,
16536 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
16537 GEN_INT (1 << log));
16538 }
16539
16540 if (GET_CODE (XEXP (x, 1)) == ASHIFT
16541 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
16542 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
16543 {
16544 changed = true;
16545 log = INTVAL (XEXP (XEXP (x, 1), 1));
16546 XEXP (x, 1) = gen_rtx_MULT (Pmode,
16547 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
16548 GEN_INT (1 << log));
16549 }
16550
16551 /* Put multiply first if it isn't already. */
16552 if (GET_CODE (XEXP (x, 1)) == MULT)
16553 {
16554 std::swap (XEXP (x, 0), XEXP (x, 1));
16555 changed = true;
16556 }
16557
16558 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16559 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16560 created by virtual register instantiation, register elimination, and
16561 similar optimizations. */
16562 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
16563 {
16564 changed = true;
16565 x = gen_rtx_PLUS (Pmode,
16566 gen_rtx_PLUS (Pmode, XEXP (x, 0),
16567 XEXP (XEXP (x, 1), 0)),
16568 XEXP (XEXP (x, 1), 1));
16569 }
16570
16571 /* Canonicalize
16572 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16573 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16574 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
16575 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16576 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
16577 && CONSTANT_P (XEXP (x, 1)))
16578 {
16579 rtx constant;
16580 rtx other = NULL_RTX;
16581
16582 if (CONST_INT_P (XEXP (x, 1)))
16583 {
16584 constant = XEXP (x, 1);
16585 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
16586 }
16587 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
16588 {
16589 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
16590 other = XEXP (x, 1);
16591 }
16592 else
16593 constant = 0;
16594
16595 if (constant)
16596 {
16597 changed = true;
16598 x = gen_rtx_PLUS (Pmode,
16599 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
16600 XEXP (XEXP (XEXP (x, 0), 1), 0)),
16601 plus_constant (Pmode, other,
16602 INTVAL (constant)));
16603 }
16604 }
16605
16606 if (changed && ix86_legitimate_address_p (mode, x, false))
16607 return x;
16608
16609 if (GET_CODE (XEXP (x, 0)) == MULT)
16610 {
16611 changed = true;
16612 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
16613 }
16614
16615 if (GET_CODE (XEXP (x, 1)) == MULT)
16616 {
16617 changed = true;
16618 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
16619 }
16620
16621 if (changed
16622 && REG_P (XEXP (x, 1))
16623 && REG_P (XEXP (x, 0)))
16624 return x;
16625
16626 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
16627 {
16628 changed = true;
16629 x = legitimize_pic_address (x, 0);
16630 }
16631
16632 if (changed && ix86_legitimate_address_p (mode, x, false))
16633 return x;
16634
16635 if (REG_P (XEXP (x, 0)))
16636 {
16637 rtx temp = gen_reg_rtx (Pmode);
16638 rtx val = force_operand (XEXP (x, 1), temp);
16639 if (val != temp)
16640 {
16641 val = convert_to_mode (Pmode, val, 1);
16642 emit_move_insn (temp, val);
16643 }
16644
16645 XEXP (x, 1) = temp;
16646 return x;
16647 }
16648
16649 else if (REG_P (XEXP (x, 1)))
16650 {
16651 rtx temp = gen_reg_rtx (Pmode);
16652 rtx val = force_operand (XEXP (x, 0), temp);
16653 if (val != temp)
16654 {
16655 val = convert_to_mode (Pmode, val, 1);
16656 emit_move_insn (temp, val);
16657 }
16658
16659 XEXP (x, 0) = temp;
16660 return x;
16661 }
16662 }
16663
16664 return x;
16665 }
16666 \f
16667 /* Print an integer constant expression in assembler syntax. Addition
16668 and subtraction are the only arithmetic that may appear in these
16669 expressions. FILE is the stdio stream to write to, X is the rtx, and
16670 CODE is the operand print code from the output string. */
16671
16672 static void
16673 output_pic_addr_const (FILE *file, rtx x, int code)
16674 {
16675 char buf[256];
16676
16677 switch (GET_CODE (x))
16678 {
16679 case PC:
16680 gcc_assert (flag_pic);
16681 putc ('.', file);
16682 break;
16683
16684 case SYMBOL_REF:
16685 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
16686 output_addr_const (file, x);
16687 else
16688 {
16689 const char *name = XSTR (x, 0);
16690
16691 /* Mark the decl as referenced so that cgraph will
16692 output the function. */
16693 if (SYMBOL_REF_DECL (x))
16694 mark_decl_referenced (SYMBOL_REF_DECL (x));
16695
16696 #if TARGET_MACHO
16697 if (MACHOPIC_INDIRECT
16698 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
16699 name = machopic_indirection_name (x, /*stub_p=*/true);
16700 #endif
16701 assemble_name (file, name);
16702 }
16703 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
16704 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
16705 fputs ("@PLT", file);
16706 break;
16707
16708 case LABEL_REF:
16709 x = XEXP (x, 0);
16710 /* FALLTHRU */
16711 case CODE_LABEL:
16712 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
16713 assemble_name (asm_out_file, buf);
16714 break;
16715
16716 case CONST_INT:
16717 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16718 break;
16719
16720 case CONST:
16721 /* This used to output parentheses around the expression,
16722 but that does not work on the 386 (either ATT or BSD assembler). */
16723 output_pic_addr_const (file, XEXP (x, 0), code);
16724 break;
16725
16726 case CONST_DOUBLE:
16727 /* We can't handle floating point constants;
16728 TARGET_PRINT_OPERAND must handle them. */
16729 output_operand_lossage ("floating constant misused");
16730 break;
16731
16732 case PLUS:
16733 /* Some assemblers need integer constants to appear first. */
16734 if (CONST_INT_P (XEXP (x, 0)))
16735 {
16736 output_pic_addr_const (file, XEXP (x, 0), code);
16737 putc ('+', file);
16738 output_pic_addr_const (file, XEXP (x, 1), code);
16739 }
16740 else
16741 {
16742 gcc_assert (CONST_INT_P (XEXP (x, 1)));
16743 output_pic_addr_const (file, XEXP (x, 1), code);
16744 putc ('+', file);
16745 output_pic_addr_const (file, XEXP (x, 0), code);
16746 }
16747 break;
16748
16749 case MINUS:
16750 if (!TARGET_MACHO)
16751 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
16752 output_pic_addr_const (file, XEXP (x, 0), code);
16753 putc ('-', file);
16754 output_pic_addr_const (file, XEXP (x, 1), code);
16755 if (!TARGET_MACHO)
16756 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
16757 break;
16758
16759 case UNSPEC:
16760 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
16761 {
16762 bool f = i386_asm_output_addr_const_extra (file, x);
16763 gcc_assert (f);
16764 break;
16765 }
16766
16767 gcc_assert (XVECLEN (x, 0) == 1);
16768 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16769 switch (XINT (x, 1))
16770 {
16771 case UNSPEC_GOT:
16772 fputs ("@GOT", file);
16773 break;
16774 case UNSPEC_GOTOFF:
16775 fputs ("@GOTOFF", file);
16776 break;
16777 case UNSPEC_PLTOFF:
16778 fputs ("@PLTOFF", file);
16779 break;
16780 case UNSPEC_PCREL:
16781 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16782 "(%rip)" : "[rip]", file);
16783 break;
16784 case UNSPEC_GOTPCREL:
16785 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16786 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16787 break;
16788 case UNSPEC_GOTTPOFF:
16789 /* FIXME: This might be @TPOFF in Sun ld too. */
16790 fputs ("@gottpoff", file);
16791 break;
16792 case UNSPEC_TPOFF:
16793 fputs ("@tpoff", file);
16794 break;
16795 case UNSPEC_NTPOFF:
16796 if (TARGET_64BIT)
16797 fputs ("@tpoff", file);
16798 else
16799 fputs ("@ntpoff", file);
16800 break;
16801 case UNSPEC_DTPOFF:
16802 fputs ("@dtpoff", file);
16803 break;
16804 case UNSPEC_GOTNTPOFF:
16805 if (TARGET_64BIT)
16806 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16807 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16808 else
16809 fputs ("@gotntpoff", file);
16810 break;
16811 case UNSPEC_INDNTPOFF:
16812 fputs ("@indntpoff", file);
16813 break;
16814 #if TARGET_MACHO
16815 case UNSPEC_MACHOPIC_OFFSET:
16816 putc ('-', file);
16817 machopic_output_function_base_name (file);
16818 break;
16819 #endif
16820 default:
16821 output_operand_lossage ("invalid UNSPEC as operand");
16822 break;
16823 }
16824 break;
16825
16826 default:
16827 output_operand_lossage ("invalid expression as operand");
16828 }
16829 }
16830
16831 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16832 We need to emit DTP-relative relocations. */
16833
16834 static void ATTRIBUTE_UNUSED
16835 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16836 {
16837 fputs (ASM_LONG, file);
16838 output_addr_const (file, x);
16839 fputs ("@dtpoff", file);
16840 switch (size)
16841 {
16842 case 4:
16843 break;
16844 case 8:
16845 fputs (", 0", file);
16846 break;
16847 default:
16848 gcc_unreachable ();
16849 }
16850 }
16851
16852 /* Return true if X is a representation of the PIC register. This copes
16853 with calls from ix86_find_base_term, where the register might have
16854 been replaced by a cselib value. */
16855
16856 static bool
16857 ix86_pic_register_p (rtx x)
16858 {
16859 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16860 return (pic_offset_table_rtx
16861 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16862 else if (!REG_P (x))
16863 return false;
16864 else if (pic_offset_table_rtx)
16865 {
16866 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16867 return true;
16868 if (HARD_REGISTER_P (x)
16869 && !HARD_REGISTER_P (pic_offset_table_rtx)
16870 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16871 return true;
16872 return false;
16873 }
16874 else
16875 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16876 }
16877
16878 /* Helper function for ix86_delegitimize_address.
16879 Attempt to delegitimize TLS local-exec accesses. */
16880
16881 static rtx
16882 ix86_delegitimize_tls_address (rtx orig_x)
16883 {
16884 rtx x = orig_x, unspec;
16885 struct ix86_address addr;
16886
16887 if (!TARGET_TLS_DIRECT_SEG_REFS)
16888 return orig_x;
16889 if (MEM_P (x))
16890 x = XEXP (x, 0);
16891 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16892 return orig_x;
16893 if (ix86_decompose_address (x, &addr) == 0
16894 || addr.seg != DEFAULT_TLS_SEG_REG
16895 || addr.disp == NULL_RTX
16896 || GET_CODE (addr.disp) != CONST)
16897 return orig_x;
16898 unspec = XEXP (addr.disp, 0);
16899 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16900 unspec = XEXP (unspec, 0);
16901 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16902 return orig_x;
16903 x = XVECEXP (unspec, 0, 0);
16904 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16905 if (unspec != XEXP (addr.disp, 0))
16906 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16907 if (addr.index)
16908 {
16909 rtx idx = addr.index;
16910 if (addr.scale != 1)
16911 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16912 x = gen_rtx_PLUS (Pmode, idx, x);
16913 }
16914 if (addr.base)
16915 x = gen_rtx_PLUS (Pmode, addr.base, x);
16916 if (MEM_P (orig_x))
16917 x = replace_equiv_address_nv (orig_x, x);
16918 return x;
16919 }
16920
16921 /* In the name of slightly smaller debug output, and to cater to
16922 general assembler lossage, recognize PIC+GOTOFF and turn it back
16923 into a direct symbol reference.
16924
16925 On Darwin, this is necessary to avoid a crash, because Darwin
16926 has a different PIC label for each routine but the DWARF debugging
16927 information is not associated with any particular routine, so it's
16928 necessary to remove references to the PIC label from RTL stored by
16929 the DWARF output code. */
16930
16931 static rtx
16932 ix86_delegitimize_address (rtx x)
16933 {
16934 rtx orig_x = delegitimize_mem_from_attrs (x);
16935 /* addend is NULL or some rtx if x is something+GOTOFF where
16936 something doesn't include the PIC register. */
16937 rtx addend = NULL_RTX;
16938 /* reg_addend is NULL or a multiple of some register. */
16939 rtx reg_addend = NULL_RTX;
16940 /* const_addend is NULL or a const_int. */
16941 rtx const_addend = NULL_RTX;
16942 /* This is the result, or NULL. */
16943 rtx result = NULL_RTX;
16944
16945 x = orig_x;
16946
16947 if (MEM_P (x))
16948 x = XEXP (x, 0);
16949
16950 if (TARGET_64BIT)
16951 {
16952 if (GET_CODE (x) == CONST
16953 && GET_CODE (XEXP (x, 0)) == PLUS
16954 && GET_MODE (XEXP (x, 0)) == Pmode
16955 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16956 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16957 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16958 {
16959 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16960 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16961 if (MEM_P (orig_x))
16962 x = replace_equiv_address_nv (orig_x, x);
16963 return x;
16964 }
16965
16966 if (GET_CODE (x) == CONST
16967 && GET_CODE (XEXP (x, 0)) == UNSPEC
16968 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16969 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16970 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16971 {
16972 x = XVECEXP (XEXP (x, 0), 0, 0);
16973 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16974 {
16975 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
16976 if (x == NULL_RTX)
16977 return orig_x;
16978 }
16979 return x;
16980 }
16981
16982 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16983 return ix86_delegitimize_tls_address (orig_x);
16984
16985 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16986 and -mcmodel=medium -fpic. */
16987 }
16988
16989 if (GET_CODE (x) != PLUS
16990 || GET_CODE (XEXP (x, 1)) != CONST)
16991 return ix86_delegitimize_tls_address (orig_x);
16992
16993 if (ix86_pic_register_p (XEXP (x, 0)))
16994 /* %ebx + GOT/GOTOFF */
16995 ;
16996 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16997 {
16998 /* %ebx + %reg * scale + GOT/GOTOFF */
16999 reg_addend = XEXP (x, 0);
17000 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
17001 reg_addend = XEXP (reg_addend, 1);
17002 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
17003 reg_addend = XEXP (reg_addend, 0);
17004 else
17005 {
17006 reg_addend = NULL_RTX;
17007 addend = XEXP (x, 0);
17008 }
17009 }
17010 else
17011 addend = XEXP (x, 0);
17012
17013 x = XEXP (XEXP (x, 1), 0);
17014 if (GET_CODE (x) == PLUS
17015 && CONST_INT_P (XEXP (x, 1)))
17016 {
17017 const_addend = XEXP (x, 1);
17018 x = XEXP (x, 0);
17019 }
17020
17021 if (GET_CODE (x) == UNSPEC
17022 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
17023 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
17024 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
17025 && !MEM_P (orig_x) && !addend)))
17026 result = XVECEXP (x, 0, 0);
17027
17028 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
17029 && !MEM_P (orig_x))
17030 result = XVECEXP (x, 0, 0);
17031
17032 if (! result)
17033 return ix86_delegitimize_tls_address (orig_x);
17034
17035 if (const_addend)
17036 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
17037 if (reg_addend)
17038 result = gen_rtx_PLUS (Pmode, reg_addend, result);
17039 if (addend)
17040 {
17041 /* If the rest of original X doesn't involve the PIC register, add
17042 addend and subtract pic_offset_table_rtx. This can happen e.g.
17043 for code like:
17044 leal (%ebx, %ecx, 4), %ecx
17045 ...
17046 movl foo@GOTOFF(%ecx), %edx
17047 in which case we return (%ecx - %ebx) + foo
17048 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17049 and reload has completed. */
17050 if (pic_offset_table_rtx
17051 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
17052 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
17053 pic_offset_table_rtx),
17054 result);
17055 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
17056 {
17057 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
17058 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
17059 result = gen_rtx_PLUS (Pmode, tmp, result);
17060 }
17061 else
17062 return orig_x;
17063 }
17064 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
17065 {
17066 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
17067 if (result == NULL_RTX)
17068 return orig_x;
17069 }
17070 return result;
17071 }
17072
17073 /* If X is a machine specific address (i.e. a symbol or label being
17074 referenced as a displacement from the GOT implemented using an
17075 UNSPEC), then return the base term. Otherwise return X. */
17076
17077 rtx
17078 ix86_find_base_term (rtx x)
17079 {
17080 rtx term;
17081
17082 if (TARGET_64BIT)
17083 {
17084 if (GET_CODE (x) != CONST)
17085 return x;
17086 term = XEXP (x, 0);
17087 if (GET_CODE (term) == PLUS
17088 && CONST_INT_P (XEXP (term, 1)))
17089 term = XEXP (term, 0);
17090 if (GET_CODE (term) != UNSPEC
17091 || (XINT (term, 1) != UNSPEC_GOTPCREL
17092 && XINT (term, 1) != UNSPEC_PCREL))
17093 return x;
17094
17095 return XVECEXP (term, 0, 0);
17096 }
17097
17098 return ix86_delegitimize_address (x);
17099 }
17100 \f
17101 static void
17102 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
17103 bool fp, FILE *file)
17104 {
17105 const char *suffix;
17106
17107 if (mode == CCFPmode || mode == CCFPUmode)
17108 {
17109 code = ix86_fp_compare_code_to_integer (code);
17110 mode = CCmode;
17111 }
17112 if (reverse)
17113 code = reverse_condition (code);
17114
17115 switch (code)
17116 {
17117 case EQ:
17118 switch (mode)
17119 {
17120 case CCAmode:
17121 suffix = "a";
17122 break;
17123 case CCCmode:
17124 suffix = "c";
17125 break;
17126 case CCOmode:
17127 suffix = "o";
17128 break;
17129 case CCPmode:
17130 suffix = "p";
17131 break;
17132 case CCSmode:
17133 suffix = "s";
17134 break;
17135 default:
17136 suffix = "e";
17137 break;
17138 }
17139 break;
17140 case NE:
17141 switch (mode)
17142 {
17143 case CCAmode:
17144 suffix = "na";
17145 break;
17146 case CCCmode:
17147 suffix = "nc";
17148 break;
17149 case CCOmode:
17150 suffix = "no";
17151 break;
17152 case CCPmode:
17153 suffix = "np";
17154 break;
17155 case CCSmode:
17156 suffix = "ns";
17157 break;
17158 default:
17159 suffix = "ne";
17160 break;
17161 }
17162 break;
17163 case GT:
17164 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17165 suffix = "g";
17166 break;
17167 case GTU:
17168 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17169 Those same assemblers have the same but opposite lossage on cmov. */
17170 if (mode == CCmode)
17171 suffix = fp ? "nbe" : "a";
17172 else
17173 gcc_unreachable ();
17174 break;
17175 case LT:
17176 switch (mode)
17177 {
17178 case CCNOmode:
17179 case CCGOCmode:
17180 suffix = "s";
17181 break;
17182
17183 case CCmode:
17184 case CCGCmode:
17185 suffix = "l";
17186 break;
17187
17188 default:
17189 gcc_unreachable ();
17190 }
17191 break;
17192 case LTU:
17193 if (mode == CCmode)
17194 suffix = "b";
17195 else if (mode == CCCmode)
17196 suffix = fp ? "b" : "c";
17197 else
17198 gcc_unreachable ();
17199 break;
17200 case GE:
17201 switch (mode)
17202 {
17203 case CCNOmode:
17204 case CCGOCmode:
17205 suffix = "ns";
17206 break;
17207
17208 case CCmode:
17209 case CCGCmode:
17210 suffix = "ge";
17211 break;
17212
17213 default:
17214 gcc_unreachable ();
17215 }
17216 break;
17217 case GEU:
17218 if (mode == CCmode)
17219 suffix = "nb";
17220 else if (mode == CCCmode)
17221 suffix = fp ? "nb" : "nc";
17222 else
17223 gcc_unreachable ();
17224 break;
17225 case LE:
17226 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17227 suffix = "le";
17228 break;
17229 case LEU:
17230 if (mode == CCmode)
17231 suffix = "be";
17232 else
17233 gcc_unreachable ();
17234 break;
17235 case UNORDERED:
17236 suffix = fp ? "u" : "p";
17237 break;
17238 case ORDERED:
17239 suffix = fp ? "nu" : "np";
17240 break;
17241 default:
17242 gcc_unreachable ();
17243 }
17244 fputs (suffix, file);
17245 }
17246
17247 /* Print the name of register X to FILE based on its machine mode and number.
17248 If CODE is 'w', pretend the mode is HImode.
17249 If CODE is 'b', pretend the mode is QImode.
17250 If CODE is 'k', pretend the mode is SImode.
17251 If CODE is 'q', pretend the mode is DImode.
17252 If CODE is 'x', pretend the mode is V4SFmode.
17253 If CODE is 't', pretend the mode is V8SFmode.
17254 If CODE is 'g', pretend the mode is V16SFmode.
17255 If CODE is 'h', pretend the reg is the 'high' byte register.
17256 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17257 If CODE is 'd', duplicate the operand for AVX instruction.
17258 */
17259
17260 void
17261 print_reg (rtx x, int code, FILE *file)
17262 {
17263 const char *reg;
17264 int msize;
17265 unsigned int regno;
17266 bool duplicated;
17267
17268 if (ASSEMBLER_DIALECT == ASM_ATT)
17269 putc ('%', file);
17270
17271 if (x == pc_rtx)
17272 {
17273 gcc_assert (TARGET_64BIT);
17274 fputs ("rip", file);
17275 return;
17276 }
17277
17278 if (code == 'y' && STACK_TOP_P (x))
17279 {
17280 fputs ("st(0)", file);
17281 return;
17282 }
17283
17284 if (code == 'w')
17285 msize = 2;
17286 else if (code == 'b')
17287 msize = 1;
17288 else if (code == 'k')
17289 msize = 4;
17290 else if (code == 'q')
17291 msize = 8;
17292 else if (code == 'h')
17293 msize = 0;
17294 else if (code == 'x')
17295 msize = 16;
17296 else if (code == 't')
17297 msize = 32;
17298 else if (code == 'g')
17299 msize = 64;
17300 else
17301 msize = GET_MODE_SIZE (GET_MODE (x));
17302
17303 regno = true_regnum (x);
17304
17305 gcc_assert (regno != ARG_POINTER_REGNUM
17306 && regno != FRAME_POINTER_REGNUM
17307 && regno != FPSR_REG
17308 && regno != FPCR_REG);
17309
17310 if (regno == FLAGS_REG)
17311 {
17312 output_operand_lossage ("invalid use of asm flag output");
17313 return;
17314 }
17315
17316 duplicated = code == 'd' && TARGET_AVX;
17317
17318 switch (msize)
17319 {
17320 case 8:
17321 case 4:
17322 if (LEGACY_INT_REGNO_P (regno))
17323 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
17324 /* FALLTHRU */
17325 case 16:
17326 case 12:
17327 case 2:
17328 normal:
17329 reg = hi_reg_name[regno];
17330 break;
17331 case 1:
17332 if (regno >= ARRAY_SIZE (qi_reg_name))
17333 goto normal;
17334 reg = qi_reg_name[regno];
17335 break;
17336 case 0:
17337 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17338 goto normal;
17339 reg = qi_high_reg_name[regno];
17340 break;
17341 case 32:
17342 case 64:
17343 if (SSE_REGNO_P (regno))
17344 {
17345 gcc_assert (!duplicated);
17346 putc (msize == 32 ? 'y' : 'z', file);
17347 reg = hi_reg_name[regno] + 1;
17348 break;
17349 }
17350 goto normal;
17351 default:
17352 gcc_unreachable ();
17353 }
17354
17355 fputs (reg, file);
17356
17357 /* Irritatingly, AMD extended registers use
17358 different naming convention: "r%d[bwd]" */
17359 if (REX_INT_REGNO_P (regno))
17360 {
17361 gcc_assert (TARGET_64BIT);
17362 switch (msize)
17363 {
17364 case 0:
17365 error ("extended registers have no high halves");
17366 break;
17367 case 1:
17368 putc ('b', file);
17369 break;
17370 case 2:
17371 putc ('w', file);
17372 break;
17373 case 4:
17374 putc ('d', file);
17375 break;
17376 case 8:
17377 /* no suffix */
17378 break;
17379 default:
17380 error ("unsupported operand size for extended register");
17381 break;
17382 }
17383 return;
17384 }
17385
17386 if (duplicated)
17387 {
17388 if (ASSEMBLER_DIALECT == ASM_ATT)
17389 fprintf (file, ", %%%s", reg);
17390 else
17391 fprintf (file, ", %s", reg);
17392 }
17393 }
17394
17395 /* Meaning of CODE:
17396 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17397 C -- print opcode suffix for set/cmov insn.
17398 c -- like C, but print reversed condition
17399 F,f -- likewise, but for floating-point.
17400 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17401 otherwise nothing
17402 R -- print embeded rounding and sae.
17403 r -- print only sae.
17404 z -- print the opcode suffix for the size of the current operand.
17405 Z -- likewise, with special suffixes for x87 instructions.
17406 * -- print a star (in certain assembler syntax)
17407 A -- print an absolute memory reference.
17408 E -- print address with DImode register names if TARGET_64BIT.
17409 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17410 s -- print a shift double count, followed by the assemblers argument
17411 delimiter.
17412 b -- print the QImode name of the register for the indicated operand.
17413 %b0 would print %al if operands[0] is reg 0.
17414 w -- likewise, print the HImode name of the register.
17415 k -- likewise, print the SImode name of the register.
17416 q -- likewise, print the DImode name of the register.
17417 x -- likewise, print the V4SFmode name of the register.
17418 t -- likewise, print the V8SFmode name of the register.
17419 g -- likewise, print the V16SFmode name of the register.
17420 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17421 y -- print "st(0)" instead of "st" as a register.
17422 d -- print duplicated register operand for AVX instruction.
17423 D -- print condition for SSE cmp instruction.
17424 P -- if PIC, print an @PLT suffix.
17425 p -- print raw symbol name.
17426 X -- don't print any sort of PIC '@' suffix for a symbol.
17427 & -- print some in-use local-dynamic symbol name.
17428 H -- print a memory address offset by 8; used for sse high-parts
17429 Y -- print condition for XOP pcom* instruction.
17430 + -- print a branch hint as 'cs' or 'ds' prefix
17431 ; -- print a semicolon (after prefixes due to bug in older gas).
17432 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17433 @ -- print a segment register of thread base pointer load
17434 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17435 ! -- print MPX prefix for jxx/call/ret instructions if required.
17436 */
17437
17438 void
17439 ix86_print_operand (FILE *file, rtx x, int code)
17440 {
17441 if (code)
17442 {
17443 switch (code)
17444 {
17445 case 'A':
17446 switch (ASSEMBLER_DIALECT)
17447 {
17448 case ASM_ATT:
17449 putc ('*', file);
17450 break;
17451
17452 case ASM_INTEL:
17453 /* Intel syntax. For absolute addresses, registers should not
17454 be surrounded by braces. */
17455 if (!REG_P (x))
17456 {
17457 putc ('[', file);
17458 ix86_print_operand (file, x, 0);
17459 putc (']', file);
17460 return;
17461 }
17462 break;
17463
17464 default:
17465 gcc_unreachable ();
17466 }
17467
17468 ix86_print_operand (file, x, 0);
17469 return;
17470
17471 case 'E':
17472 /* Wrap address in an UNSPEC to declare special handling. */
17473 if (TARGET_64BIT)
17474 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
17475
17476 output_address (VOIDmode, x);
17477 return;
17478
17479 case 'L':
17480 if (ASSEMBLER_DIALECT == ASM_ATT)
17481 putc ('l', file);
17482 return;
17483
17484 case 'W':
17485 if (ASSEMBLER_DIALECT == ASM_ATT)
17486 putc ('w', file);
17487 return;
17488
17489 case 'B':
17490 if (ASSEMBLER_DIALECT == ASM_ATT)
17491 putc ('b', file);
17492 return;
17493
17494 case 'Q':
17495 if (ASSEMBLER_DIALECT == ASM_ATT)
17496 putc ('l', file);
17497 return;
17498
17499 case 'S':
17500 if (ASSEMBLER_DIALECT == ASM_ATT)
17501 putc ('s', file);
17502 return;
17503
17504 case 'T':
17505 if (ASSEMBLER_DIALECT == ASM_ATT)
17506 putc ('t', file);
17507 return;
17508
17509 case 'O':
17510 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17511 if (ASSEMBLER_DIALECT != ASM_ATT)
17512 return;
17513
17514 switch (GET_MODE_SIZE (GET_MODE (x)))
17515 {
17516 case 2:
17517 putc ('w', file);
17518 break;
17519
17520 case 4:
17521 putc ('l', file);
17522 break;
17523
17524 case 8:
17525 putc ('q', file);
17526 break;
17527
17528 default:
17529 output_operand_lossage
17530 ("invalid operand size for operand code 'O'");
17531 return;
17532 }
17533
17534 putc ('.', file);
17535 #endif
17536 return;
17537
17538 case 'z':
17539 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17540 {
17541 /* Opcodes don't get size suffixes if using Intel opcodes. */
17542 if (ASSEMBLER_DIALECT == ASM_INTEL)
17543 return;
17544
17545 switch (GET_MODE_SIZE (GET_MODE (x)))
17546 {
17547 case 1:
17548 putc ('b', file);
17549 return;
17550
17551 case 2:
17552 putc ('w', file);
17553 return;
17554
17555 case 4:
17556 putc ('l', file);
17557 return;
17558
17559 case 8:
17560 putc ('q', file);
17561 return;
17562
17563 default:
17564 output_operand_lossage
17565 ("invalid operand size for operand code 'z'");
17566 return;
17567 }
17568 }
17569
17570 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17571 warning
17572 (0, "non-integer operand used with operand code 'z'");
17573 /* FALLTHRU */
17574
17575 case 'Z':
17576 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17577 if (ASSEMBLER_DIALECT == ASM_INTEL)
17578 return;
17579
17580 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17581 {
17582 switch (GET_MODE_SIZE (GET_MODE (x)))
17583 {
17584 case 2:
17585 #ifdef HAVE_AS_IX86_FILDS
17586 putc ('s', file);
17587 #endif
17588 return;
17589
17590 case 4:
17591 putc ('l', file);
17592 return;
17593
17594 case 8:
17595 #ifdef HAVE_AS_IX86_FILDQ
17596 putc ('q', file);
17597 #else
17598 fputs ("ll", file);
17599 #endif
17600 return;
17601
17602 default:
17603 break;
17604 }
17605 }
17606 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17607 {
17608 /* 387 opcodes don't get size suffixes
17609 if the operands are registers. */
17610 if (STACK_REG_P (x))
17611 return;
17612
17613 switch (GET_MODE_SIZE (GET_MODE (x)))
17614 {
17615 case 4:
17616 putc ('s', file);
17617 return;
17618
17619 case 8:
17620 putc ('l', file);
17621 return;
17622
17623 case 12:
17624 case 16:
17625 putc ('t', file);
17626 return;
17627
17628 default:
17629 break;
17630 }
17631 }
17632 else
17633 {
17634 output_operand_lossage
17635 ("invalid operand type used with operand code 'Z'");
17636 return;
17637 }
17638
17639 output_operand_lossage
17640 ("invalid operand size for operand code 'Z'");
17641 return;
17642
17643 case 'd':
17644 case 'b':
17645 case 'w':
17646 case 'k':
17647 case 'q':
17648 case 'h':
17649 case 't':
17650 case 'g':
17651 case 'y':
17652 case 'x':
17653 case 'X':
17654 case 'P':
17655 case 'p':
17656 break;
17657
17658 case 's':
17659 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
17660 {
17661 ix86_print_operand (file, x, 0);
17662 fputs (", ", file);
17663 }
17664 return;
17665
17666 case 'Y':
17667 switch (GET_CODE (x))
17668 {
17669 case NE:
17670 fputs ("neq", file);
17671 break;
17672 case EQ:
17673 fputs ("eq", file);
17674 break;
17675 case GE:
17676 case GEU:
17677 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
17678 break;
17679 case GT:
17680 case GTU:
17681 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
17682 break;
17683 case LE:
17684 case LEU:
17685 fputs ("le", file);
17686 break;
17687 case LT:
17688 case LTU:
17689 fputs ("lt", file);
17690 break;
17691 case UNORDERED:
17692 fputs ("unord", file);
17693 break;
17694 case ORDERED:
17695 fputs ("ord", file);
17696 break;
17697 case UNEQ:
17698 fputs ("ueq", file);
17699 break;
17700 case UNGE:
17701 fputs ("nlt", file);
17702 break;
17703 case UNGT:
17704 fputs ("nle", file);
17705 break;
17706 case UNLE:
17707 fputs ("ule", file);
17708 break;
17709 case UNLT:
17710 fputs ("ult", file);
17711 break;
17712 case LTGT:
17713 fputs ("une", file);
17714 break;
17715 default:
17716 output_operand_lossage ("operand is not a condition code, "
17717 "invalid operand code 'Y'");
17718 return;
17719 }
17720 return;
17721
17722 case 'D':
17723 /* Little bit of braindamage here. The SSE compare instructions
17724 does use completely different names for the comparisons that the
17725 fp conditional moves. */
17726 switch (GET_CODE (x))
17727 {
17728 case UNEQ:
17729 if (TARGET_AVX)
17730 {
17731 fputs ("eq_us", file);
17732 break;
17733 }
17734 /* FALLTHRU */
17735 case EQ:
17736 fputs ("eq", file);
17737 break;
17738 case UNLT:
17739 if (TARGET_AVX)
17740 {
17741 fputs ("nge", file);
17742 break;
17743 }
17744 /* FALLTHRU */
17745 case LT:
17746 fputs ("lt", file);
17747 break;
17748 case UNLE:
17749 if (TARGET_AVX)
17750 {
17751 fputs ("ngt", file);
17752 break;
17753 }
17754 /* FALLTHRU */
17755 case LE:
17756 fputs ("le", file);
17757 break;
17758 case UNORDERED:
17759 fputs ("unord", file);
17760 break;
17761 case LTGT:
17762 if (TARGET_AVX)
17763 {
17764 fputs ("neq_oq", file);
17765 break;
17766 }
17767 /* FALLTHRU */
17768 case NE:
17769 fputs ("neq", file);
17770 break;
17771 case GE:
17772 if (TARGET_AVX)
17773 {
17774 fputs ("ge", file);
17775 break;
17776 }
17777 /* FALLTHRU */
17778 case UNGE:
17779 fputs ("nlt", file);
17780 break;
17781 case GT:
17782 if (TARGET_AVX)
17783 {
17784 fputs ("gt", file);
17785 break;
17786 }
17787 /* FALLTHRU */
17788 case UNGT:
17789 fputs ("nle", file);
17790 break;
17791 case ORDERED:
17792 fputs ("ord", file);
17793 break;
17794 default:
17795 output_operand_lossage ("operand is not a condition code, "
17796 "invalid operand code 'D'");
17797 return;
17798 }
17799 return;
17800
17801 case 'F':
17802 case 'f':
17803 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17804 if (ASSEMBLER_DIALECT == ASM_ATT)
17805 putc ('.', file);
17806 #endif
17807
17808 case 'C':
17809 case 'c':
17810 if (!COMPARISON_P (x))
17811 {
17812 output_operand_lossage ("operand is not a condition code, "
17813 "invalid operand code '%c'", code);
17814 return;
17815 }
17816 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17817 code == 'c' || code == 'f',
17818 code == 'F' || code == 'f',
17819 file);
17820 return;
17821
17822 case 'H':
17823 if (!offsettable_memref_p (x))
17824 {
17825 output_operand_lossage ("operand is not an offsettable memory "
17826 "reference, invalid operand code 'H'");
17827 return;
17828 }
17829 /* It doesn't actually matter what mode we use here, as we're
17830 only going to use this for printing. */
17831 x = adjust_address_nv (x, DImode, 8);
17832 /* Output 'qword ptr' for intel assembler dialect. */
17833 if (ASSEMBLER_DIALECT == ASM_INTEL)
17834 code = 'q';
17835 break;
17836
17837 case 'K':
17838 gcc_assert (CONST_INT_P (x));
17839
17840 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17841 #ifdef HAVE_AS_IX86_HLE
17842 fputs ("xacquire ", file);
17843 #else
17844 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17845 #endif
17846 else if (INTVAL (x) & IX86_HLE_RELEASE)
17847 #ifdef HAVE_AS_IX86_HLE
17848 fputs ("xrelease ", file);
17849 #else
17850 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17851 #endif
17852 /* We do not want to print value of the operand. */
17853 return;
17854
17855 case 'N':
17856 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17857 fputs ("{z}", file);
17858 return;
17859
17860 case 'r':
17861 gcc_assert (CONST_INT_P (x));
17862 gcc_assert (INTVAL (x) == ROUND_SAE);
17863
17864 if (ASSEMBLER_DIALECT == ASM_INTEL)
17865 fputs (", ", file);
17866
17867 fputs ("{sae}", file);
17868
17869 if (ASSEMBLER_DIALECT == ASM_ATT)
17870 fputs (", ", file);
17871
17872 return;
17873
17874 case 'R':
17875 gcc_assert (CONST_INT_P (x));
17876
17877 if (ASSEMBLER_DIALECT == ASM_INTEL)
17878 fputs (", ", file);
17879
17880 switch (INTVAL (x))
17881 {
17882 case ROUND_NEAREST_INT | ROUND_SAE:
17883 fputs ("{rn-sae}", file);
17884 break;
17885 case ROUND_NEG_INF | ROUND_SAE:
17886 fputs ("{rd-sae}", file);
17887 break;
17888 case ROUND_POS_INF | ROUND_SAE:
17889 fputs ("{ru-sae}", file);
17890 break;
17891 case ROUND_ZERO | ROUND_SAE:
17892 fputs ("{rz-sae}", file);
17893 break;
17894 default:
17895 gcc_unreachable ();
17896 }
17897
17898 if (ASSEMBLER_DIALECT == ASM_ATT)
17899 fputs (", ", file);
17900
17901 return;
17902
17903 case '*':
17904 if (ASSEMBLER_DIALECT == ASM_ATT)
17905 putc ('*', file);
17906 return;
17907
17908 case '&':
17909 {
17910 const char *name = get_some_local_dynamic_name ();
17911 if (name == NULL)
17912 output_operand_lossage ("'%%&' used without any "
17913 "local dynamic TLS references");
17914 else
17915 assemble_name (file, name);
17916 return;
17917 }
17918
17919 case '+':
17920 {
17921 rtx x;
17922
17923 if (!optimize
17924 || optimize_function_for_size_p (cfun)
17925 || !TARGET_BRANCH_PREDICTION_HINTS)
17926 return;
17927
17928 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17929 if (x)
17930 {
17931 int pred_val = XINT (x, 0);
17932
17933 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17934 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17935 {
17936 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17937 bool cputaken
17938 = final_forward_branch_p (current_output_insn) == 0;
17939
17940 /* Emit hints only in the case default branch prediction
17941 heuristics would fail. */
17942 if (taken != cputaken)
17943 {
17944 /* We use 3e (DS) prefix for taken branches and
17945 2e (CS) prefix for not taken branches. */
17946 if (taken)
17947 fputs ("ds ; ", file);
17948 else
17949 fputs ("cs ; ", file);
17950 }
17951 }
17952 }
17953 return;
17954 }
17955
17956 case ';':
17957 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17958 putc (';', file);
17959 #endif
17960 return;
17961
17962 case '@':
17963 if (ASSEMBLER_DIALECT == ASM_ATT)
17964 putc ('%', file);
17965
17966 /* The kernel uses a different segment register for performance
17967 reasons; a system call would not have to trash the userspace
17968 segment register, which would be expensive. */
17969 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17970 fputs ("fs", file);
17971 else
17972 fputs ("gs", file);
17973 return;
17974
17975 case '~':
17976 putc (TARGET_AVX2 ? 'i' : 'f', file);
17977 return;
17978
17979 case '^':
17980 if (TARGET_64BIT && Pmode != word_mode)
17981 fputs ("addr32 ", file);
17982 return;
17983
17984 case '!':
17985 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17986 fputs ("bnd ", file);
17987 return;
17988
17989 default:
17990 output_operand_lossage ("invalid operand code '%c'", code);
17991 }
17992 }
17993
17994 if (REG_P (x))
17995 print_reg (x, code, file);
17996
17997 else if (MEM_P (x))
17998 {
17999 rtx addr = XEXP (x, 0);
18000
18001 /* No `byte ptr' prefix for call instructions ... */
18002 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
18003 {
18004 machine_mode mode = GET_MODE (x);
18005 const char *size;
18006
18007 /* Check for explicit size override codes. */
18008 if (code == 'b')
18009 size = "BYTE";
18010 else if (code == 'w')
18011 size = "WORD";
18012 else if (code == 'k')
18013 size = "DWORD";
18014 else if (code == 'q')
18015 size = "QWORD";
18016 else if (code == 'x')
18017 size = "XMMWORD";
18018 else if (code == 't')
18019 size = "YMMWORD";
18020 else if (code == 'g')
18021 size = "ZMMWORD";
18022 else if (mode == BLKmode)
18023 /* ... or BLKmode operands, when not overridden. */
18024 size = NULL;
18025 else
18026 switch (GET_MODE_SIZE (mode))
18027 {
18028 case 1: size = "BYTE"; break;
18029 case 2: size = "WORD"; break;
18030 case 4: size = "DWORD"; break;
18031 case 8: size = "QWORD"; break;
18032 case 12: size = "TBYTE"; break;
18033 case 16:
18034 if (mode == XFmode)
18035 size = "TBYTE";
18036 else
18037 size = "XMMWORD";
18038 break;
18039 case 32: size = "YMMWORD"; break;
18040 case 64: size = "ZMMWORD"; break;
18041 default:
18042 gcc_unreachable ();
18043 }
18044 if (size)
18045 {
18046 fputs (size, file);
18047 fputs (" PTR ", file);
18048 }
18049 }
18050
18051 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
18052 output_operand_lossage ("invalid constraints for operand");
18053 else
18054 ix86_print_operand_address_as
18055 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
18056 }
18057
18058 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
18059 {
18060 long l;
18061
18062 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18063
18064 if (ASSEMBLER_DIALECT == ASM_ATT)
18065 putc ('$', file);
18066 /* Sign extend 32bit SFmode immediate to 8 bytes. */
18067 if (code == 'q')
18068 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
18069 (unsigned long long) (int) l);
18070 else
18071 fprintf (file, "0x%08x", (unsigned int) l);
18072 }
18073
18074 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
18075 {
18076 long l[2];
18077
18078 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18079
18080 if (ASSEMBLER_DIALECT == ASM_ATT)
18081 putc ('$', file);
18082 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
18083 }
18084
18085 /* These float cases don't actually occur as immediate operands. */
18086 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
18087 {
18088 char dstr[30];
18089
18090 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
18091 fputs (dstr, file);
18092 }
18093
18094 else
18095 {
18096 /* We have patterns that allow zero sets of memory, for instance.
18097 In 64-bit mode, we should probably support all 8-byte vectors,
18098 since we can in fact encode that into an immediate. */
18099 if (GET_CODE (x) == CONST_VECTOR)
18100 {
18101 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
18102 x = const0_rtx;
18103 }
18104
18105 if (code != 'P' && code != 'p')
18106 {
18107 if (CONST_INT_P (x))
18108 {
18109 if (ASSEMBLER_DIALECT == ASM_ATT)
18110 putc ('$', file);
18111 }
18112 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18113 || GET_CODE (x) == LABEL_REF)
18114 {
18115 if (ASSEMBLER_DIALECT == ASM_ATT)
18116 putc ('$', file);
18117 else
18118 fputs ("OFFSET FLAT:", file);
18119 }
18120 }
18121 if (CONST_INT_P (x))
18122 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18123 else if (flag_pic || MACHOPIC_INDIRECT)
18124 output_pic_addr_const (file, x, code);
18125 else
18126 output_addr_const (file, x);
18127 }
18128 }
18129
18130 static bool
18131 ix86_print_operand_punct_valid_p (unsigned char code)
18132 {
18133 return (code == '@' || code == '*' || code == '+' || code == '&'
18134 || code == ';' || code == '~' || code == '^' || code == '!');
18135 }
18136 \f
18137 /* Print a memory operand whose address is ADDR. */
18138
18139 static void
18140 ix86_print_operand_address_as (FILE *file, rtx addr,
18141 addr_space_t as, bool no_rip)
18142 {
18143 struct ix86_address parts;
18144 rtx base, index, disp;
18145 int scale;
18146 int ok;
18147 bool vsib = false;
18148 int code = 0;
18149
18150 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18151 {
18152 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18153 gcc_assert (parts.index == NULL_RTX);
18154 parts.index = XVECEXP (addr, 0, 1);
18155 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18156 addr = XVECEXP (addr, 0, 0);
18157 vsib = true;
18158 }
18159 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18160 {
18161 gcc_assert (TARGET_64BIT);
18162 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18163 code = 'q';
18164 }
18165 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18166 {
18167 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18168 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18169 if (parts.base != NULL_RTX)
18170 {
18171 parts.index = parts.base;
18172 parts.scale = 1;
18173 }
18174 parts.base = XVECEXP (addr, 0, 0);
18175 addr = XVECEXP (addr, 0, 0);
18176 }
18177 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18178 {
18179 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18180 gcc_assert (parts.index == NULL_RTX);
18181 parts.index = XVECEXP (addr, 0, 1);
18182 addr = XVECEXP (addr, 0, 0);
18183 }
18184 else
18185 ok = ix86_decompose_address (addr, &parts);
18186
18187 gcc_assert (ok);
18188
18189 base = parts.base;
18190 index = parts.index;
18191 disp = parts.disp;
18192 scale = parts.scale;
18193
18194 if (ADDR_SPACE_GENERIC_P (as))
18195 as = parts.seg;
18196 else
18197 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18198
18199 if (!ADDR_SPACE_GENERIC_P (as))
18200 {
18201 const char *string;
18202
18203 if (as == ADDR_SPACE_SEG_FS)
18204 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18205 else if (as == ADDR_SPACE_SEG_GS)
18206 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18207 else
18208 gcc_unreachable ();
18209 fputs (string, file);
18210 }
18211
18212 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18213 if (TARGET_64BIT && !base && !index && !no_rip)
18214 {
18215 rtx symbol = disp;
18216
18217 if (GET_CODE (disp) == CONST
18218 && GET_CODE (XEXP (disp, 0)) == PLUS
18219 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18220 symbol = XEXP (XEXP (disp, 0), 0);
18221
18222 if (GET_CODE (symbol) == LABEL_REF
18223 || (GET_CODE (symbol) == SYMBOL_REF
18224 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18225 base = pc_rtx;
18226 }
18227
18228 if (!base && !index)
18229 {
18230 /* Displacement only requires special attention. */
18231 if (CONST_INT_P (disp))
18232 {
18233 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
18234 fputs ("ds:", file);
18235 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18236 }
18237 /* Load the external function address via the GOT slot to avoid PLT. */
18238 else if (GET_CODE (disp) == CONST
18239 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18240 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18241 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18242 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18243 output_pic_addr_const (file, disp, 0);
18244 else if (flag_pic)
18245 output_pic_addr_const (file, disp, 0);
18246 else
18247 output_addr_const (file, disp);
18248 }
18249 else
18250 {
18251 /* Print SImode register names to force addr32 prefix. */
18252 if (SImode_address_operand (addr, VOIDmode))
18253 {
18254 if (flag_checking)
18255 {
18256 gcc_assert (TARGET_64BIT);
18257 switch (GET_CODE (addr))
18258 {
18259 case SUBREG:
18260 gcc_assert (GET_MODE (addr) == SImode);
18261 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18262 break;
18263 case ZERO_EXTEND:
18264 case AND:
18265 gcc_assert (GET_MODE (addr) == DImode);
18266 break;
18267 default:
18268 gcc_unreachable ();
18269 }
18270 }
18271 gcc_assert (!code);
18272 code = 'k';
18273 }
18274 else if (code == 0
18275 && TARGET_X32
18276 && disp
18277 && CONST_INT_P (disp)
18278 && INTVAL (disp) < -16*1024*1024)
18279 {
18280 /* X32 runs in 64-bit mode, where displacement, DISP, in
18281 address DISP(%r64), is encoded as 32-bit immediate sign-
18282 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18283 address is %r64 + 0xffffffffbffffd00. When %r64 <
18284 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18285 which is invalid for x32. The correct address is %r64
18286 - 0x40000300 == 0xf7ffdd64. To properly encode
18287 -0x40000300(%r64) for x32, we zero-extend negative
18288 displacement by forcing addr32 prefix which truncates
18289 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18290 zero-extend all negative displacements, including -1(%rsp).
18291 However, for small negative displacements, sign-extension
18292 won't cause overflow. We only zero-extend negative
18293 displacements if they < -16*1024*1024, which is also used
18294 to check legitimate address displacements for PIC. */
18295 code = 'k';
18296 }
18297
18298 if (ASSEMBLER_DIALECT == ASM_ATT)
18299 {
18300 if (disp)
18301 {
18302 if (flag_pic)
18303 output_pic_addr_const (file, disp, 0);
18304 else if (GET_CODE (disp) == LABEL_REF)
18305 output_asm_label (disp);
18306 else
18307 output_addr_const (file, disp);
18308 }
18309
18310 putc ('(', file);
18311 if (base)
18312 print_reg (base, code, file);
18313 if (index)
18314 {
18315 putc (',', file);
18316 print_reg (index, vsib ? 0 : code, file);
18317 if (scale != 1 || vsib)
18318 fprintf (file, ",%d", scale);
18319 }
18320 putc (')', file);
18321 }
18322 else
18323 {
18324 rtx offset = NULL_RTX;
18325
18326 if (disp)
18327 {
18328 /* Pull out the offset of a symbol; print any symbol itself. */
18329 if (GET_CODE (disp) == CONST
18330 && GET_CODE (XEXP (disp, 0)) == PLUS
18331 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18332 {
18333 offset = XEXP (XEXP (disp, 0), 1);
18334 disp = gen_rtx_CONST (VOIDmode,
18335 XEXP (XEXP (disp, 0), 0));
18336 }
18337
18338 if (flag_pic)
18339 output_pic_addr_const (file, disp, 0);
18340 else if (GET_CODE (disp) == LABEL_REF)
18341 output_asm_label (disp);
18342 else if (CONST_INT_P (disp))
18343 offset = disp;
18344 else
18345 output_addr_const (file, disp);
18346 }
18347
18348 putc ('[', file);
18349 if (base)
18350 {
18351 print_reg (base, code, file);
18352 if (offset)
18353 {
18354 if (INTVAL (offset) >= 0)
18355 putc ('+', file);
18356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18357 }
18358 }
18359 else if (offset)
18360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18361 else
18362 putc ('0', file);
18363
18364 if (index)
18365 {
18366 putc ('+', file);
18367 print_reg (index, vsib ? 0 : code, file);
18368 if (scale != 1 || vsib)
18369 fprintf (file, "*%d", scale);
18370 }
18371 putc (']', file);
18372 }
18373 }
18374 }
18375
18376 static void
18377 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
18378 {
18379 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
18380 }
18381
18382 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18383
18384 static bool
18385 i386_asm_output_addr_const_extra (FILE *file, rtx x)
18386 {
18387 rtx op;
18388
18389 if (GET_CODE (x) != UNSPEC)
18390 return false;
18391
18392 op = XVECEXP (x, 0, 0);
18393 switch (XINT (x, 1))
18394 {
18395 case UNSPEC_GOTTPOFF:
18396 output_addr_const (file, op);
18397 /* FIXME: This might be @TPOFF in Sun ld. */
18398 fputs ("@gottpoff", file);
18399 break;
18400 case UNSPEC_TPOFF:
18401 output_addr_const (file, op);
18402 fputs ("@tpoff", file);
18403 break;
18404 case UNSPEC_NTPOFF:
18405 output_addr_const (file, op);
18406 if (TARGET_64BIT)
18407 fputs ("@tpoff", file);
18408 else
18409 fputs ("@ntpoff", file);
18410 break;
18411 case UNSPEC_DTPOFF:
18412 output_addr_const (file, op);
18413 fputs ("@dtpoff", file);
18414 break;
18415 case UNSPEC_GOTNTPOFF:
18416 output_addr_const (file, op);
18417 if (TARGET_64BIT)
18418 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
18419 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
18420 else
18421 fputs ("@gotntpoff", file);
18422 break;
18423 case UNSPEC_INDNTPOFF:
18424 output_addr_const (file, op);
18425 fputs ("@indntpoff", file);
18426 break;
18427 #if TARGET_MACHO
18428 case UNSPEC_MACHOPIC_OFFSET:
18429 output_addr_const (file, op);
18430 putc ('-', file);
18431 machopic_output_function_base_name (file);
18432 break;
18433 #endif
18434
18435 case UNSPEC_STACK_CHECK:
18436 {
18437 int offset;
18438
18439 gcc_assert (flag_split_stack);
18440
18441 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
18442 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
18443 #else
18444 gcc_unreachable ();
18445 #endif
18446
18447 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
18448 }
18449 break;
18450
18451 default:
18452 return false;
18453 }
18454
18455 return true;
18456 }
18457 \f
18458 /* Split one or more double-mode RTL references into pairs of half-mode
18459 references. The RTL can be REG, offsettable MEM, integer constant, or
18460 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18461 split and "num" is its length. lo_half and hi_half are output arrays
18462 that parallel "operands". */
18463
18464 void
18465 split_double_mode (machine_mode mode, rtx operands[],
18466 int num, rtx lo_half[], rtx hi_half[])
18467 {
18468 machine_mode half_mode;
18469 unsigned int byte;
18470
18471 switch (mode)
18472 {
18473 case TImode:
18474 half_mode = DImode;
18475 break;
18476 case DImode:
18477 half_mode = SImode;
18478 break;
18479 default:
18480 gcc_unreachable ();
18481 }
18482
18483 byte = GET_MODE_SIZE (half_mode);
18484
18485 while (num--)
18486 {
18487 rtx op = operands[num];
18488
18489 /* simplify_subreg refuse to split volatile memory addresses,
18490 but we still have to handle it. */
18491 if (MEM_P (op))
18492 {
18493 lo_half[num] = adjust_address (op, half_mode, 0);
18494 hi_half[num] = adjust_address (op, half_mode, byte);
18495 }
18496 else
18497 {
18498 lo_half[num] = simplify_gen_subreg (half_mode, op,
18499 GET_MODE (op) == VOIDmode
18500 ? mode : GET_MODE (op), 0);
18501 hi_half[num] = simplify_gen_subreg (half_mode, op,
18502 GET_MODE (op) == VOIDmode
18503 ? mode : GET_MODE (op), byte);
18504 }
18505 }
18506 }
18507 \f
18508 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18509 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18510 is the expression of the binary operation. The output may either be
18511 emitted here, or returned to the caller, like all output_* functions.
18512
18513 There is no guarantee that the operands are the same mode, as they
18514 might be within FLOAT or FLOAT_EXTEND expressions. */
18515
18516 #ifndef SYSV386_COMPAT
18517 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18518 wants to fix the assemblers because that causes incompatibility
18519 with gcc. No-one wants to fix gcc because that causes
18520 incompatibility with assemblers... You can use the option of
18521 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18522 #define SYSV386_COMPAT 1
18523 #endif
18524
18525 const char *
18526 output_387_binary_op (rtx insn, rtx *operands)
18527 {
18528 static char buf[40];
18529 const char *p;
18530 const char *ssep;
18531 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
18532
18533 /* Even if we do not want to check the inputs, this documents input
18534 constraints. Which helps in understanding the following code. */
18535 if (flag_checking)
18536 {
18537 if (STACK_REG_P (operands[0])
18538 && ((REG_P (operands[1])
18539 && REGNO (operands[0]) == REGNO (operands[1])
18540 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
18541 || (REG_P (operands[2])
18542 && REGNO (operands[0]) == REGNO (operands[2])
18543 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
18544 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
18545 ; /* ok */
18546 else
18547 gcc_assert (is_sse);
18548 }
18549
18550 switch (GET_CODE (operands[3]))
18551 {
18552 case PLUS:
18553 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18554 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18555 p = "fiadd";
18556 else
18557 p = "fadd";
18558 ssep = "vadd";
18559 break;
18560
18561 case MINUS:
18562 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18563 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18564 p = "fisub";
18565 else
18566 p = "fsub";
18567 ssep = "vsub";
18568 break;
18569
18570 case MULT:
18571 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18572 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18573 p = "fimul";
18574 else
18575 p = "fmul";
18576 ssep = "vmul";
18577 break;
18578
18579 case DIV:
18580 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18581 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18582 p = "fidiv";
18583 else
18584 p = "fdiv";
18585 ssep = "vdiv";
18586 break;
18587
18588 default:
18589 gcc_unreachable ();
18590 }
18591
18592 if (is_sse)
18593 {
18594 if (TARGET_AVX)
18595 {
18596 strcpy (buf, ssep);
18597 if (GET_MODE (operands[0]) == SFmode)
18598 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
18599 else
18600 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
18601 }
18602 else
18603 {
18604 strcpy (buf, ssep + 1);
18605 if (GET_MODE (operands[0]) == SFmode)
18606 strcat (buf, "ss\t{%2, %0|%0, %2}");
18607 else
18608 strcat (buf, "sd\t{%2, %0|%0, %2}");
18609 }
18610 return buf;
18611 }
18612 strcpy (buf, p);
18613
18614 switch (GET_CODE (operands[3]))
18615 {
18616 case MULT:
18617 case PLUS:
18618 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
18619 std::swap (operands[1], operands[2]);
18620
18621 /* know operands[0] == operands[1]. */
18622
18623 if (MEM_P (operands[2]))
18624 {
18625 p = "%Z2\t%2";
18626 break;
18627 }
18628
18629 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18630 {
18631 if (STACK_TOP_P (operands[0]))
18632 /* How is it that we are storing to a dead operand[2]?
18633 Well, presumably operands[1] is dead too. We can't
18634 store the result to st(0) as st(0) gets popped on this
18635 instruction. Instead store to operands[2] (which I
18636 think has to be st(1)). st(1) will be popped later.
18637 gcc <= 2.8.1 didn't have this check and generated
18638 assembly code that the Unixware assembler rejected. */
18639 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18640 else
18641 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18642 break;
18643 }
18644
18645 if (STACK_TOP_P (operands[0]))
18646 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18647 else
18648 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18649 break;
18650
18651 case MINUS:
18652 case DIV:
18653 if (MEM_P (operands[1]))
18654 {
18655 p = "r%Z1\t%1";
18656 break;
18657 }
18658
18659 if (MEM_P (operands[2]))
18660 {
18661 p = "%Z2\t%2";
18662 break;
18663 }
18664
18665 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18666 {
18667 #if SYSV386_COMPAT
18668 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18669 derived assemblers, confusingly reverse the direction of
18670 the operation for fsub{r} and fdiv{r} when the
18671 destination register is not st(0). The Intel assembler
18672 doesn't have this brain damage. Read !SYSV386_COMPAT to
18673 figure out what the hardware really does. */
18674 if (STACK_TOP_P (operands[0]))
18675 p = "{p\t%0, %2|rp\t%2, %0}";
18676 else
18677 p = "{rp\t%2, %0|p\t%0, %2}";
18678 #else
18679 if (STACK_TOP_P (operands[0]))
18680 /* As above for fmul/fadd, we can't store to st(0). */
18681 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18682 else
18683 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18684 #endif
18685 break;
18686 }
18687
18688 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18689 {
18690 #if SYSV386_COMPAT
18691 if (STACK_TOP_P (operands[0]))
18692 p = "{rp\t%0, %1|p\t%1, %0}";
18693 else
18694 p = "{p\t%1, %0|rp\t%0, %1}";
18695 #else
18696 if (STACK_TOP_P (operands[0]))
18697 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18698 else
18699 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18700 #endif
18701 break;
18702 }
18703
18704 if (STACK_TOP_P (operands[0]))
18705 {
18706 if (STACK_TOP_P (operands[1]))
18707 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18708 else
18709 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18710 break;
18711 }
18712 else if (STACK_TOP_P (operands[1]))
18713 {
18714 #if SYSV386_COMPAT
18715 p = "{\t%1, %0|r\t%0, %1}";
18716 #else
18717 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18718 #endif
18719 }
18720 else
18721 {
18722 #if SYSV386_COMPAT
18723 p = "{r\t%2, %0|\t%0, %2}";
18724 #else
18725 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18726 #endif
18727 }
18728 break;
18729
18730 default:
18731 gcc_unreachable ();
18732 }
18733
18734 strcat (buf, p);
18735 return buf;
18736 }
18737
18738 /* Return needed mode for entity in optimize_mode_switching pass. */
18739
18740 static int
18741 ix86_dirflag_mode_needed (rtx_insn *insn)
18742 {
18743 if (CALL_P (insn))
18744 {
18745 if (cfun->machine->func_type == TYPE_NORMAL)
18746 return X86_DIRFLAG_ANY;
18747 else
18748 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18749 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
18750 }
18751
18752 if (recog_memoized (insn) < 0)
18753 return X86_DIRFLAG_ANY;
18754
18755 if (get_attr_type (insn) == TYPE_STR)
18756 {
18757 /* Emit cld instruction if stringops are used in the function. */
18758 if (cfun->machine->func_type == TYPE_NORMAL)
18759 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
18760 else
18761 return X86_DIRFLAG_RESET;
18762 }
18763
18764 return X86_DIRFLAG_ANY;
18765 }
18766
18767 /* Check if a 256bit AVX register is referenced inside of EXP. */
18768
18769 static bool
18770 ix86_check_avx256_register (const_rtx exp)
18771 {
18772 if (SUBREG_P (exp))
18773 exp = SUBREG_REG (exp);
18774
18775 return (REG_P (exp)
18776 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
18777 }
18778
18779 /* Return needed mode for entity in optimize_mode_switching pass. */
18780
18781 static int
18782 ix86_avx_u128_mode_needed (rtx_insn *insn)
18783 {
18784 if (CALL_P (insn))
18785 {
18786 rtx link;
18787
18788 /* Needed mode is set to AVX_U128_CLEAN if there are
18789 no 256bit modes used in function arguments. */
18790 for (link = CALL_INSN_FUNCTION_USAGE (insn);
18791 link;
18792 link = XEXP (link, 1))
18793 {
18794 if (GET_CODE (XEXP (link, 0)) == USE)
18795 {
18796 rtx arg = XEXP (XEXP (link, 0), 0);
18797
18798 if (ix86_check_avx256_register (arg))
18799 return AVX_U128_DIRTY;
18800 }
18801 }
18802
18803 return AVX_U128_CLEAN;
18804 }
18805
18806 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
18807 changes state only when a 256bit register is written to, but we need
18808 to prevent the compiler from moving optimal insertion point above
18809 eventual read from 256bit register. */
18810 subrtx_iterator::array_type array;
18811 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
18812 if (ix86_check_avx256_register (*iter))
18813 return AVX_U128_DIRTY;
18814
18815 return AVX_U128_ANY;
18816 }
18817
18818 /* Return mode that i387 must be switched into
18819 prior to the execution of insn. */
18820
18821 static int
18822 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18823 {
18824 enum attr_i387_cw mode;
18825
18826 /* The mode UNINITIALIZED is used to store control word after a
18827 function call or ASM pattern. The mode ANY specify that function
18828 has no requirements on the control word and make no changes in the
18829 bits we are interested in. */
18830
18831 if (CALL_P (insn)
18832 || (NONJUMP_INSN_P (insn)
18833 && (asm_noperands (PATTERN (insn)) >= 0
18834 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18835 return I387_CW_UNINITIALIZED;
18836
18837 if (recog_memoized (insn) < 0)
18838 return I387_CW_ANY;
18839
18840 mode = get_attr_i387_cw (insn);
18841
18842 switch (entity)
18843 {
18844 case I387_TRUNC:
18845 if (mode == I387_CW_TRUNC)
18846 return mode;
18847 break;
18848
18849 case I387_FLOOR:
18850 if (mode == I387_CW_FLOOR)
18851 return mode;
18852 break;
18853
18854 case I387_CEIL:
18855 if (mode == I387_CW_CEIL)
18856 return mode;
18857 break;
18858
18859 case I387_MASK_PM:
18860 if (mode == I387_CW_MASK_PM)
18861 return mode;
18862 break;
18863
18864 default:
18865 gcc_unreachable ();
18866 }
18867
18868 return I387_CW_ANY;
18869 }
18870
18871 /* Return mode that entity must be switched into
18872 prior to the execution of insn. */
18873
18874 static int
18875 ix86_mode_needed (int entity, rtx_insn *insn)
18876 {
18877 switch (entity)
18878 {
18879 case X86_DIRFLAG:
18880 return ix86_dirflag_mode_needed (insn);
18881 case AVX_U128:
18882 return ix86_avx_u128_mode_needed (insn);
18883 case I387_TRUNC:
18884 case I387_FLOOR:
18885 case I387_CEIL:
18886 case I387_MASK_PM:
18887 return ix86_i387_mode_needed (entity, insn);
18888 default:
18889 gcc_unreachable ();
18890 }
18891 return 0;
18892 }
18893
18894 /* Check if a 256bit AVX register is referenced in stores. */
18895
18896 static void
18897 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18898 {
18899 if (ix86_check_avx256_register (dest))
18900 {
18901 bool *used = (bool *) data;
18902 *used = true;
18903 }
18904 }
18905
18906 /* Calculate mode of upper 128bit AVX registers after the insn. */
18907
18908 static int
18909 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18910 {
18911 rtx pat = PATTERN (insn);
18912
18913 if (vzeroupper_operation (pat, VOIDmode)
18914 || vzeroall_operation (pat, VOIDmode))
18915 return AVX_U128_CLEAN;
18916
18917 /* We know that state is clean after CALL insn if there are no
18918 256bit registers used in the function return register. */
18919 if (CALL_P (insn))
18920 {
18921 bool avx_reg256_found = false;
18922 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18923
18924 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18925 }
18926
18927 /* Otherwise, return current mode. Remember that if insn
18928 references AVX 256bit registers, the mode was already changed
18929 to DIRTY from MODE_NEEDED. */
18930 return mode;
18931 }
18932
18933 /* Return the mode that an insn results in. */
18934
18935 static int
18936 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18937 {
18938 switch (entity)
18939 {
18940 case X86_DIRFLAG:
18941 return mode;
18942 case AVX_U128:
18943 return ix86_avx_u128_mode_after (mode, insn);
18944 case I387_TRUNC:
18945 case I387_FLOOR:
18946 case I387_CEIL:
18947 case I387_MASK_PM:
18948 return mode;
18949 default:
18950 gcc_unreachable ();
18951 }
18952 }
18953
18954 static int
18955 ix86_dirflag_mode_entry (void)
18956 {
18957 /* For TARGET_CLD or in the interrupt handler we can't assume
18958 direction flag state at function entry. */
18959 if (TARGET_CLD
18960 || cfun->machine->func_type != TYPE_NORMAL)
18961 return X86_DIRFLAG_ANY;
18962
18963 return X86_DIRFLAG_RESET;
18964 }
18965
18966 static int
18967 ix86_avx_u128_mode_entry (void)
18968 {
18969 tree arg;
18970
18971 /* Entry mode is set to AVX_U128_DIRTY if there are
18972 256bit modes used in function arguments. */
18973 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18974 arg = TREE_CHAIN (arg))
18975 {
18976 rtx incoming = DECL_INCOMING_RTL (arg);
18977
18978 if (incoming && ix86_check_avx256_register (incoming))
18979 return AVX_U128_DIRTY;
18980 }
18981
18982 return AVX_U128_CLEAN;
18983 }
18984
18985 /* Return a mode that ENTITY is assumed to be
18986 switched to at function entry. */
18987
18988 static int
18989 ix86_mode_entry (int entity)
18990 {
18991 switch (entity)
18992 {
18993 case X86_DIRFLAG:
18994 return ix86_dirflag_mode_entry ();
18995 case AVX_U128:
18996 return ix86_avx_u128_mode_entry ();
18997 case I387_TRUNC:
18998 case I387_FLOOR:
18999 case I387_CEIL:
19000 case I387_MASK_PM:
19001 return I387_CW_ANY;
19002 default:
19003 gcc_unreachable ();
19004 }
19005 }
19006
19007 static int
19008 ix86_avx_u128_mode_exit (void)
19009 {
19010 rtx reg = crtl->return_rtx;
19011
19012 /* Exit mode is set to AVX_U128_DIRTY if there are
19013 256bit modes used in the function return register. */
19014 if (reg && ix86_check_avx256_register (reg))
19015 return AVX_U128_DIRTY;
19016
19017 return AVX_U128_CLEAN;
19018 }
19019
19020 /* Return a mode that ENTITY is assumed to be
19021 switched to at function exit. */
19022
19023 static int
19024 ix86_mode_exit (int entity)
19025 {
19026 switch (entity)
19027 {
19028 case X86_DIRFLAG:
19029 return X86_DIRFLAG_ANY;
19030 case AVX_U128:
19031 return ix86_avx_u128_mode_exit ();
19032 case I387_TRUNC:
19033 case I387_FLOOR:
19034 case I387_CEIL:
19035 case I387_MASK_PM:
19036 return I387_CW_ANY;
19037 default:
19038 gcc_unreachable ();
19039 }
19040 }
19041
19042 static int
19043 ix86_mode_priority (int, int n)
19044 {
19045 return n;
19046 }
19047
19048 /* Output code to initialize control word copies used by trunc?f?i and
19049 rounding patterns. CURRENT_MODE is set to current control word,
19050 while NEW_MODE is set to new control word. */
19051
19052 static void
19053 emit_i387_cw_initialization (int mode)
19054 {
19055 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
19056 rtx new_mode;
19057
19058 enum ix86_stack_slot slot;
19059
19060 rtx reg = gen_reg_rtx (HImode);
19061
19062 emit_insn (gen_x86_fnstcw_1 (stored_mode));
19063 emit_move_insn (reg, copy_rtx (stored_mode));
19064
19065 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
19066 || optimize_insn_for_size_p ())
19067 {
19068 switch (mode)
19069 {
19070 case I387_CW_TRUNC:
19071 /* round toward zero (truncate) */
19072 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
19073 slot = SLOT_CW_TRUNC;
19074 break;
19075
19076 case I387_CW_FLOOR:
19077 /* round down toward -oo */
19078 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
19080 slot = SLOT_CW_FLOOR;
19081 break;
19082
19083 case I387_CW_CEIL:
19084 /* round up toward +oo */
19085 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
19087 slot = SLOT_CW_CEIL;
19088 break;
19089
19090 case I387_CW_MASK_PM:
19091 /* mask precision exception for nearbyint() */
19092 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19093 slot = SLOT_CW_MASK_PM;
19094 break;
19095
19096 default:
19097 gcc_unreachable ();
19098 }
19099 }
19100 else
19101 {
19102 switch (mode)
19103 {
19104 case I387_CW_TRUNC:
19105 /* round toward zero (truncate) */
19106 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
19107 slot = SLOT_CW_TRUNC;
19108 break;
19109
19110 case I387_CW_FLOOR:
19111 /* round down toward -oo */
19112 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
19113 slot = SLOT_CW_FLOOR;
19114 break;
19115
19116 case I387_CW_CEIL:
19117 /* round up toward +oo */
19118 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
19119 slot = SLOT_CW_CEIL;
19120 break;
19121
19122 case I387_CW_MASK_PM:
19123 /* mask precision exception for nearbyint() */
19124 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19125 slot = SLOT_CW_MASK_PM;
19126 break;
19127
19128 default:
19129 gcc_unreachable ();
19130 }
19131 }
19132
19133 gcc_assert (slot < MAX_386_STACK_LOCALS);
19134
19135 new_mode = assign_386_stack_local (HImode, slot);
19136 emit_move_insn (new_mode, reg);
19137 }
19138
19139 /* Emit vzeroupper. */
19140
19141 void
19142 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
19143 {
19144 int i;
19145
19146 /* Cancel automatic vzeroupper insertion if there are
19147 live call-saved SSE registers at the insertion point. */
19148
19149 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19150 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19151 return;
19152
19153 if (TARGET_64BIT)
19154 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19155 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19156 return;
19157
19158 emit_insn (gen_avx_vzeroupper ());
19159 }
19160
19161 /* Generate one or more insns to set ENTITY to MODE. */
19162
19163 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
19164 is the set of hard registers live at the point where the insn(s)
19165 are to be inserted. */
19166
19167 static void
19168 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19169 HARD_REG_SET regs_live)
19170 {
19171 switch (entity)
19172 {
19173 case X86_DIRFLAG:
19174 if (mode == X86_DIRFLAG_RESET)
19175 emit_insn (gen_cld ());
19176 break;
19177 case AVX_U128:
19178 if (mode == AVX_U128_CLEAN)
19179 ix86_avx_emit_vzeroupper (regs_live);
19180 break;
19181 case I387_TRUNC:
19182 case I387_FLOOR:
19183 case I387_CEIL:
19184 case I387_MASK_PM:
19185 if (mode != I387_CW_ANY
19186 && mode != I387_CW_UNINITIALIZED)
19187 emit_i387_cw_initialization (mode);
19188 break;
19189 default:
19190 gcc_unreachable ();
19191 }
19192 }
19193
19194 /* Output code for INSN to convert a float to a signed int. OPERANDS
19195 are the insn operands. The output may be [HSD]Imode and the input
19196 operand may be [SDX]Fmode. */
19197
19198 const char *
19199 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19200 {
19201 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
19202 int dimode_p = GET_MODE (operands[0]) == DImode;
19203 int round_mode = get_attr_i387_cw (insn);
19204
19205 /* Jump through a hoop or two for DImode, since the hardware has no
19206 non-popping instruction. We used to do this a different way, but
19207 that was somewhat fragile and broke with post-reload splitters. */
19208 if ((dimode_p || fisttp) && !stack_top_dies)
19209 output_asm_insn ("fld\t%y1", operands);
19210
19211 gcc_assert (STACK_TOP_P (operands[1]));
19212 gcc_assert (MEM_P (operands[0]));
19213 gcc_assert (GET_MODE (operands[1]) != TFmode);
19214
19215 if (fisttp)
19216 output_asm_insn ("fisttp%Z0\t%0", operands);
19217 else
19218 {
19219 if (round_mode != I387_CW_ANY)
19220 output_asm_insn ("fldcw\t%3", operands);
19221 if (stack_top_dies || dimode_p)
19222 output_asm_insn ("fistp%Z0\t%0", operands);
19223 else
19224 output_asm_insn ("fist%Z0\t%0", operands);
19225 if (round_mode != I387_CW_ANY)
19226 output_asm_insn ("fldcw\t%2", operands);
19227 }
19228
19229 return "";
19230 }
19231
19232 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19233 have the values zero or one, indicates the ffreep insn's operand
19234 from the OPERANDS array. */
19235
19236 static const char *
19237 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19238 {
19239 if (TARGET_USE_FFREEP)
19240 #ifdef HAVE_AS_IX86_FFREEP
19241 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19242 #else
19243 {
19244 static char retval[32];
19245 int regno = REGNO (operands[opno]);
19246
19247 gcc_assert (STACK_REGNO_P (regno));
19248
19249 regno -= FIRST_STACK_REG;
19250
19251 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19252 return retval;
19253 }
19254 #endif
19255
19256 return opno ? "fstp\t%y1" : "fstp\t%y0";
19257 }
19258
19259
19260 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19261 should be used. UNORDERED_P is true when fucom should be used. */
19262
19263 const char *
19264 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
19265 {
19266 int stack_top_dies;
19267 rtx cmp_op0, cmp_op1;
19268 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
19269
19270 if (eflags_p)
19271 {
19272 cmp_op0 = operands[0];
19273 cmp_op1 = operands[1];
19274 }
19275 else
19276 {
19277 cmp_op0 = operands[1];
19278 cmp_op1 = operands[2];
19279 }
19280
19281 if (is_sse)
19282 {
19283 if (GET_MODE (operands[0]) == SFmode)
19284 if (unordered_p)
19285 return "%vucomiss\t{%1, %0|%0, %1}";
19286 else
19287 return "%vcomiss\t{%1, %0|%0, %1}";
19288 else
19289 if (unordered_p)
19290 return "%vucomisd\t{%1, %0|%0, %1}";
19291 else
19292 return "%vcomisd\t{%1, %0|%0, %1}";
19293 }
19294
19295 gcc_assert (STACK_TOP_P (cmp_op0));
19296
19297 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
19298
19299 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
19300 {
19301 if (stack_top_dies)
19302 {
19303 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
19304 return output_387_ffreep (operands, 1);
19305 }
19306 else
19307 return "ftst\n\tfnstsw\t%0";
19308 }
19309
19310 if (STACK_REG_P (cmp_op1)
19311 && stack_top_dies
19312 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
19313 && REGNO (cmp_op1) != FIRST_STACK_REG)
19314 {
19315 /* If both the top of the 387 stack dies, and the other operand
19316 is also a stack register that dies, then this must be a
19317 `fcompp' float compare */
19318
19319 if (eflags_p)
19320 {
19321 /* There is no double popping fcomi variant. Fortunately,
19322 eflags is immune from the fstp's cc clobbering. */
19323 if (unordered_p)
19324 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
19325 else
19326 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
19327 return output_387_ffreep (operands, 0);
19328 }
19329 else
19330 {
19331 if (unordered_p)
19332 return "fucompp\n\tfnstsw\t%0";
19333 else
19334 return "fcompp\n\tfnstsw\t%0";
19335 }
19336 }
19337 else
19338 {
19339 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
19340
19341 static const char * const alt[16] =
19342 {
19343 "fcom%Z2\t%y2\n\tfnstsw\t%0",
19344 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
19345 "fucom%Z2\t%y2\n\tfnstsw\t%0",
19346 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
19347
19348 "ficom%Z2\t%y2\n\tfnstsw\t%0",
19349 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
19350 NULL,
19351 NULL,
19352
19353 "fcomi\t{%y1, %0|%0, %y1}",
19354 "fcomip\t{%y1, %0|%0, %y1}",
19355 "fucomi\t{%y1, %0|%0, %y1}",
19356 "fucomip\t{%y1, %0|%0, %y1}",
19357
19358 NULL,
19359 NULL,
19360 NULL,
19361 NULL
19362 };
19363
19364 int mask;
19365 const char *ret;
19366
19367 mask = eflags_p << 3;
19368 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
19369 mask |= unordered_p << 1;
19370 mask |= stack_top_dies;
19371
19372 gcc_assert (mask < 16);
19373 ret = alt[mask];
19374 gcc_assert (ret);
19375
19376 return ret;
19377 }
19378 }
19379
19380 void
19381 ix86_output_addr_vec_elt (FILE *file, int value)
19382 {
19383 const char *directive = ASM_LONG;
19384
19385 #ifdef ASM_QUAD
19386 if (TARGET_LP64)
19387 directive = ASM_QUAD;
19388 #else
19389 gcc_assert (!TARGET_64BIT);
19390 #endif
19391
19392 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19393 }
19394
19395 void
19396 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19397 {
19398 const char *directive = ASM_LONG;
19399
19400 #ifdef ASM_QUAD
19401 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19402 directive = ASM_QUAD;
19403 #else
19404 gcc_assert (!TARGET_64BIT);
19405 #endif
19406 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19407 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19408 fprintf (file, "%s%s%d-%s%d\n",
19409 directive, LPREFIX, value, LPREFIX, rel);
19410 else if (HAVE_AS_GOTOFF_IN_DATA)
19411 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19412 #if TARGET_MACHO
19413 else if (TARGET_MACHO)
19414 {
19415 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19416 machopic_output_function_base_name (file);
19417 putc ('\n', file);
19418 }
19419 #endif
19420 else
19421 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19422 GOT_SYMBOL_NAME, LPREFIX, value);
19423 }
19424 \f
19425 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19426 for the target. */
19427
19428 void
19429 ix86_expand_clear (rtx dest)
19430 {
19431 rtx tmp;
19432
19433 /* We play register width games, which are only valid after reload. */
19434 gcc_assert (reload_completed);
19435
19436 /* Avoid HImode and its attendant prefix byte. */
19437 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
19438 dest = gen_rtx_REG (SImode, REGNO (dest));
19439 tmp = gen_rtx_SET (dest, const0_rtx);
19440
19441 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
19442 {
19443 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19444 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
19445 }
19446
19447 emit_insn (tmp);
19448 }
19449
19450 /* X is an unchanging MEM. If it is a constant pool reference, return
19451 the constant pool rtx, else NULL. */
19452
19453 rtx
19454 maybe_get_pool_constant (rtx x)
19455 {
19456 x = ix86_delegitimize_address (XEXP (x, 0));
19457
19458 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
19459 return get_pool_constant (x);
19460
19461 return NULL_RTX;
19462 }
19463
19464 void
19465 ix86_expand_move (machine_mode mode, rtx operands[])
19466 {
19467 rtx op0, op1;
19468 rtx tmp, addend = NULL_RTX;
19469 enum tls_model model;
19470
19471 op0 = operands[0];
19472 op1 = operands[1];
19473
19474 switch (GET_CODE (op1))
19475 {
19476 case CONST:
19477 tmp = XEXP (op1, 0);
19478
19479 if (GET_CODE (tmp) != PLUS
19480 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19481 break;
19482
19483 op1 = XEXP (tmp, 0);
19484 addend = XEXP (tmp, 1);
19485 /* FALLTHRU */
19486
19487 case SYMBOL_REF:
19488 model = SYMBOL_REF_TLS_MODEL (op1);
19489
19490 if (model)
19491 op1 = legitimize_tls_address (op1, model, true);
19492 else if (ix86_force_load_from_GOT_p (op1))
19493 {
19494 /* Load the external function address via GOT slot to avoid PLT. */
19495 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
19496 (TARGET_64BIT
19497 ? UNSPEC_GOTPCREL
19498 : UNSPEC_GOT));
19499 op1 = gen_rtx_CONST (Pmode, op1);
19500 op1 = gen_const_mem (Pmode, op1);
19501 set_mem_alias_set (op1, ix86_GOT_alias_set ());
19502 }
19503 else
19504 {
19505 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
19506 if (tmp)
19507 {
19508 op1 = tmp;
19509 if (!addend)
19510 break;
19511 }
19512 else
19513 {
19514 op1 = operands[1];
19515 break;
19516 }
19517 }
19518
19519 if (addend)
19520 {
19521 op1 = force_operand (op1, NULL_RTX);
19522 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
19523 op0, 1, OPTAB_DIRECT);
19524 }
19525 else
19526 op1 = force_operand (op1, op0);
19527
19528 if (op1 == op0)
19529 return;
19530
19531 op1 = convert_to_mode (mode, op1, 1);
19532
19533 default:
19534 break;
19535 }
19536
19537 if ((flag_pic || MACHOPIC_INDIRECT)
19538 && symbolic_operand (op1, mode))
19539 {
19540 if (TARGET_MACHO && !TARGET_64BIT)
19541 {
19542 #if TARGET_MACHO
19543 /* dynamic-no-pic */
19544 if (MACHOPIC_INDIRECT)
19545 {
19546 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
19547 ? op0 : gen_reg_rtx (Pmode);
19548 op1 = machopic_indirect_data_reference (op1, temp);
19549 if (MACHOPIC_PURE)
19550 op1 = machopic_legitimize_pic_address (op1, mode,
19551 temp == op1 ? 0 : temp);
19552 }
19553 if (op0 != op1 && GET_CODE (op0) != MEM)
19554 {
19555 rtx insn = gen_rtx_SET (op0, op1);
19556 emit_insn (insn);
19557 return;
19558 }
19559 if (GET_CODE (op0) == MEM)
19560 op1 = force_reg (Pmode, op1);
19561 else
19562 {
19563 rtx temp = op0;
19564 if (GET_CODE (temp) != REG)
19565 temp = gen_reg_rtx (Pmode);
19566 temp = legitimize_pic_address (op1, temp);
19567 if (temp == op0)
19568 return;
19569 op1 = temp;
19570 }
19571 /* dynamic-no-pic */
19572 #endif
19573 }
19574 else
19575 {
19576 if (MEM_P (op0))
19577 op1 = force_reg (mode, op1);
19578 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
19579 {
19580 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
19581 op1 = legitimize_pic_address (op1, reg);
19582 if (op0 == op1)
19583 return;
19584 op1 = convert_to_mode (mode, op1, 1);
19585 }
19586 }
19587 }
19588 else
19589 {
19590 if (MEM_P (op0)
19591 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
19592 || !push_operand (op0, mode))
19593 && MEM_P (op1))
19594 op1 = force_reg (mode, op1);
19595
19596 if (push_operand (op0, mode)
19597 && ! general_no_elim_operand (op1, mode))
19598 op1 = copy_to_mode_reg (mode, op1);
19599
19600 /* Force large constants in 64bit compilation into register
19601 to get them CSEed. */
19602 if (can_create_pseudo_p ()
19603 && (mode == DImode) && TARGET_64BIT
19604 && immediate_operand (op1, mode)
19605 && !x86_64_zext_immediate_operand (op1, VOIDmode)
19606 && !register_operand (op0, mode)
19607 && optimize)
19608 op1 = copy_to_mode_reg (mode, op1);
19609
19610 if (can_create_pseudo_p ()
19611 && CONST_DOUBLE_P (op1))
19612 {
19613 /* If we are loading a floating point constant to a register,
19614 force the value to memory now, since we'll get better code
19615 out the back end. */
19616
19617 op1 = validize_mem (force_const_mem (mode, op1));
19618 if (!register_operand (op0, mode))
19619 {
19620 rtx temp = gen_reg_rtx (mode);
19621 emit_insn (gen_rtx_SET (temp, op1));
19622 emit_move_insn (op0, temp);
19623 return;
19624 }
19625 }
19626 }
19627
19628 emit_insn (gen_rtx_SET (op0, op1));
19629 }
19630
19631 void
19632 ix86_expand_vector_move (machine_mode mode, rtx operands[])
19633 {
19634 rtx op0 = operands[0], op1 = operands[1];
19635 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19636 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19637 unsigned int align = (TARGET_IAMCU
19638 ? GET_MODE_BITSIZE (mode)
19639 : GET_MODE_ALIGNMENT (mode));
19640
19641 if (push_operand (op0, VOIDmode))
19642 op0 = emit_move_resolve_push (mode, op0);
19643
19644 /* Force constants other than zero into memory. We do not know how
19645 the instructions used to build constants modify the upper 64 bits
19646 of the register, once we have that information we may be able
19647 to handle some of them more efficiently. */
19648 if (can_create_pseudo_p ()
19649 && (CONSTANT_P (op1)
19650 || (SUBREG_P (op1)
19651 && CONSTANT_P (SUBREG_REG (op1))))
19652 && ((register_operand (op0, mode)
19653 && !standard_sse_constant_p (op1, mode))
19654 /* ix86_expand_vector_move_misalign() does not like constants. */
19655 || (SSE_REG_MODE_P (mode)
19656 && MEM_P (op0)
19657 && MEM_ALIGN (op0) < align)))
19658 {
19659 if (SUBREG_P (op1))
19660 {
19661 machine_mode imode = GET_MODE (SUBREG_REG (op1));
19662 rtx r = force_const_mem (imode, SUBREG_REG (op1));
19663 if (r)
19664 r = validize_mem (r);
19665 else
19666 r = force_reg (imode, SUBREG_REG (op1));
19667 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
19668 }
19669 else
19670 op1 = validize_mem (force_const_mem (mode, op1));
19671 }
19672
19673 /* We need to check memory alignment for SSE mode since attribute
19674 can make operands unaligned. */
19675 if (can_create_pseudo_p ()
19676 && SSE_REG_MODE_P (mode)
19677 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
19678 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
19679 {
19680 rtx tmp[2];
19681
19682 /* ix86_expand_vector_move_misalign() does not like both
19683 arguments in memory. */
19684 if (!register_operand (op0, mode)
19685 && !register_operand (op1, mode))
19686 op1 = force_reg (mode, op1);
19687
19688 tmp[0] = op0; tmp[1] = op1;
19689 ix86_expand_vector_move_misalign (mode, tmp);
19690 return;
19691 }
19692
19693 /* Make operand1 a register if it isn't already. */
19694 if (can_create_pseudo_p ()
19695 && !register_operand (op0, mode)
19696 && !register_operand (op1, mode))
19697 {
19698 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
19699 return;
19700 }
19701
19702 emit_insn (gen_rtx_SET (op0, op1));
19703 }
19704
19705 /* Split 32-byte AVX unaligned load and store if needed. */
19706
19707 static void
19708 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
19709 {
19710 rtx m;
19711 rtx (*extract) (rtx, rtx, rtx);
19712 machine_mode mode;
19713
19714 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
19715 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
19716 {
19717 emit_insn (gen_rtx_SET (op0, op1));
19718 return;
19719 }
19720
19721 rtx orig_op0 = NULL_RTX;
19722 mode = GET_MODE (op0);
19723 switch (GET_MODE_CLASS (mode))
19724 {
19725 case MODE_VECTOR_INT:
19726 case MODE_INT:
19727 if (mode != V32QImode)
19728 {
19729 if (!MEM_P (op0))
19730 {
19731 orig_op0 = op0;
19732 op0 = gen_reg_rtx (V32QImode);
19733 }
19734 else
19735 op0 = gen_lowpart (V32QImode, op0);
19736 op1 = gen_lowpart (V32QImode, op1);
19737 mode = V32QImode;
19738 }
19739 break;
19740 case MODE_VECTOR_FLOAT:
19741 break;
19742 default:
19743 gcc_unreachable ();
19744 }
19745
19746 switch (mode)
19747 {
19748 default:
19749 gcc_unreachable ();
19750 case V32QImode:
19751 extract = gen_avx_vextractf128v32qi;
19752 mode = V16QImode;
19753 break;
19754 case V8SFmode:
19755 extract = gen_avx_vextractf128v8sf;
19756 mode = V4SFmode;
19757 break;
19758 case V4DFmode:
19759 extract = gen_avx_vextractf128v4df;
19760 mode = V2DFmode;
19761 break;
19762 }
19763
19764 if (MEM_P (op1))
19765 {
19766 rtx r = gen_reg_rtx (mode);
19767 m = adjust_address (op1, mode, 0);
19768 emit_move_insn (r, m);
19769 m = adjust_address (op1, mode, 16);
19770 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
19771 emit_move_insn (op0, r);
19772 }
19773 else if (MEM_P (op0))
19774 {
19775 m = adjust_address (op0, mode, 0);
19776 emit_insn (extract (m, op1, const0_rtx));
19777 m = adjust_address (op0, mode, 16);
19778 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
19779 }
19780 else
19781 gcc_unreachable ();
19782
19783 if (orig_op0)
19784 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19785 }
19786
19787 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19788 straight to ix86_expand_vector_move. */
19789 /* Code generation for scalar reg-reg moves of single and double precision data:
19790 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19791 movaps reg, reg
19792 else
19793 movss reg, reg
19794 if (x86_sse_partial_reg_dependency == true)
19795 movapd reg, reg
19796 else
19797 movsd reg, reg
19798
19799 Code generation for scalar loads of double precision data:
19800 if (x86_sse_split_regs == true)
19801 movlpd mem, reg (gas syntax)
19802 else
19803 movsd mem, reg
19804
19805 Code generation for unaligned packed loads of single precision data
19806 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19807 if (x86_sse_unaligned_move_optimal)
19808 movups mem, reg
19809
19810 if (x86_sse_partial_reg_dependency == true)
19811 {
19812 xorps reg, reg
19813 movlps mem, reg
19814 movhps mem+8, reg
19815 }
19816 else
19817 {
19818 movlps mem, reg
19819 movhps mem+8, reg
19820 }
19821
19822 Code generation for unaligned packed loads of double precision data
19823 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19824 if (x86_sse_unaligned_move_optimal)
19825 movupd mem, reg
19826
19827 if (x86_sse_split_regs == true)
19828 {
19829 movlpd mem, reg
19830 movhpd mem+8, reg
19831 }
19832 else
19833 {
19834 movsd mem, reg
19835 movhpd mem+8, reg
19836 }
19837 */
19838
19839 void
19840 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
19841 {
19842 rtx op0, op1, m;
19843
19844 op0 = operands[0];
19845 op1 = operands[1];
19846
19847 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19848 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
19849 {
19850 emit_insn (gen_rtx_SET (op0, op1));
19851 return;
19852 }
19853
19854 if (TARGET_AVX)
19855 {
19856 if (GET_MODE_SIZE (mode) == 32)
19857 ix86_avx256_split_vector_move_misalign (op0, op1);
19858 else
19859 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19860 emit_insn (gen_rtx_SET (op0, op1));
19861 return;
19862 }
19863
19864 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19865 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
19866 {
19867 emit_insn (gen_rtx_SET (op0, op1));
19868 return;
19869 }
19870
19871 /* ??? If we have typed data, then it would appear that using
19872 movdqu is the only way to get unaligned data loaded with
19873 integer type. */
19874 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19875 {
19876 emit_insn (gen_rtx_SET (op0, op1));
19877 return;
19878 }
19879
19880 if (MEM_P (op1))
19881 {
19882 if (TARGET_SSE2 && mode == V2DFmode)
19883 {
19884 rtx zero;
19885
19886 /* When SSE registers are split into halves, we can avoid
19887 writing to the top half twice. */
19888 if (TARGET_SSE_SPLIT_REGS)
19889 {
19890 emit_clobber (op0);
19891 zero = op0;
19892 }
19893 else
19894 {
19895 /* ??? Not sure about the best option for the Intel chips.
19896 The following would seem to satisfy; the register is
19897 entirely cleared, breaking the dependency chain. We
19898 then store to the upper half, with a dependency depth
19899 of one. A rumor has it that Intel recommends two movsd
19900 followed by an unpacklpd, but this is unconfirmed. And
19901 given that the dependency depth of the unpacklpd would
19902 still be one, I'm not sure why this would be better. */
19903 zero = CONST0_RTX (V2DFmode);
19904 }
19905
19906 m = adjust_address (op1, DFmode, 0);
19907 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19908 m = adjust_address (op1, DFmode, 8);
19909 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19910 }
19911 else
19912 {
19913 rtx t;
19914
19915 if (mode != V4SFmode)
19916 t = gen_reg_rtx (V4SFmode);
19917 else
19918 t = op0;
19919
19920 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19921 emit_move_insn (t, CONST0_RTX (V4SFmode));
19922 else
19923 emit_clobber (t);
19924
19925 m = adjust_address (op1, V2SFmode, 0);
19926 emit_insn (gen_sse_loadlps (t, t, m));
19927 m = adjust_address (op1, V2SFmode, 8);
19928 emit_insn (gen_sse_loadhps (t, t, m));
19929 if (mode != V4SFmode)
19930 emit_move_insn (op0, gen_lowpart (mode, t));
19931 }
19932 }
19933 else if (MEM_P (op0))
19934 {
19935 if (TARGET_SSE2 && mode == V2DFmode)
19936 {
19937 m = adjust_address (op0, DFmode, 0);
19938 emit_insn (gen_sse2_storelpd (m, op1));
19939 m = adjust_address (op0, DFmode, 8);
19940 emit_insn (gen_sse2_storehpd (m, op1));
19941 }
19942 else
19943 {
19944 if (mode != V4SFmode)
19945 op1 = gen_lowpart (V4SFmode, op1);
19946
19947 m = adjust_address (op0, V2SFmode, 0);
19948 emit_insn (gen_sse_storelps (m, op1));
19949 m = adjust_address (op0, V2SFmode, 8);
19950 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
19951 }
19952 }
19953 else
19954 gcc_unreachable ();
19955 }
19956
19957 /* Helper function of ix86_fixup_binary_operands to canonicalize
19958 operand order. Returns true if the operands should be swapped. */
19959
19960 static bool
19961 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19962 rtx operands[])
19963 {
19964 rtx dst = operands[0];
19965 rtx src1 = operands[1];
19966 rtx src2 = operands[2];
19967
19968 /* If the operation is not commutative, we can't do anything. */
19969 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19970 return false;
19971
19972 /* Highest priority is that src1 should match dst. */
19973 if (rtx_equal_p (dst, src1))
19974 return false;
19975 if (rtx_equal_p (dst, src2))
19976 return true;
19977
19978 /* Next highest priority is that immediate constants come second. */
19979 if (immediate_operand (src2, mode))
19980 return false;
19981 if (immediate_operand (src1, mode))
19982 return true;
19983
19984 /* Lowest priority is that memory references should come second. */
19985 if (MEM_P (src2))
19986 return false;
19987 if (MEM_P (src1))
19988 return true;
19989
19990 return false;
19991 }
19992
19993
19994 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19995 destination to use for the operation. If different from the true
19996 destination in operands[0], a copy operation will be required. */
19997
19998 rtx
19999 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
20000 rtx operands[])
20001 {
20002 rtx dst = operands[0];
20003 rtx src1 = operands[1];
20004 rtx src2 = operands[2];
20005
20006 /* Canonicalize operand order. */
20007 if (ix86_swap_binary_operands_p (code, mode, operands))
20008 {
20009 /* It is invalid to swap operands of different modes. */
20010 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
20011
20012 std::swap (src1, src2);
20013 }
20014
20015 /* Both source operands cannot be in memory. */
20016 if (MEM_P (src1) && MEM_P (src2))
20017 {
20018 /* Optimization: Only read from memory once. */
20019 if (rtx_equal_p (src1, src2))
20020 {
20021 src2 = force_reg (mode, src2);
20022 src1 = src2;
20023 }
20024 else if (rtx_equal_p (dst, src1))
20025 src2 = force_reg (mode, src2);
20026 else
20027 src1 = force_reg (mode, src1);
20028 }
20029
20030 /* If the destination is memory, and we do not have matching source
20031 operands, do things in registers. */
20032 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20033 dst = gen_reg_rtx (mode);
20034
20035 /* Source 1 cannot be a constant. */
20036 if (CONSTANT_P (src1))
20037 src1 = force_reg (mode, src1);
20038
20039 /* Source 1 cannot be a non-matching memory. */
20040 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20041 src1 = force_reg (mode, src1);
20042
20043 /* Improve address combine. */
20044 if (code == PLUS
20045 && GET_MODE_CLASS (mode) == MODE_INT
20046 && MEM_P (src2))
20047 src2 = force_reg (mode, src2);
20048
20049 operands[1] = src1;
20050 operands[2] = src2;
20051 return dst;
20052 }
20053
20054 /* Similarly, but assume that the destination has already been
20055 set up properly. */
20056
20057 void
20058 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
20059 machine_mode mode, rtx operands[])
20060 {
20061 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
20062 gcc_assert (dst == operands[0]);
20063 }
20064
20065 /* Attempt to expand a binary operator. Make the expansion closer to the
20066 actual machine, then just general_operand, which will allow 3 separate
20067 memory references (one output, two input) in a single insn. */
20068
20069 void
20070 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
20071 rtx operands[])
20072 {
20073 rtx src1, src2, dst, op, clob;
20074
20075 dst = ix86_fixup_binary_operands (code, mode, operands);
20076 src1 = operands[1];
20077 src2 = operands[2];
20078
20079 /* Emit the instruction. */
20080
20081 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
20082
20083 if (reload_completed
20084 && code == PLUS
20085 && !rtx_equal_p (dst, src1))
20086 {
20087 /* This is going to be an LEA; avoid splitting it later. */
20088 emit_insn (op);
20089 }
20090 else
20091 {
20092 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20093 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20094 }
20095
20096 /* Fix up the destination if needed. */
20097 if (dst != operands[0])
20098 emit_move_insn (operands[0], dst);
20099 }
20100
20101 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20102 the given OPERANDS. */
20103
20104 void
20105 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
20106 rtx operands[])
20107 {
20108 rtx op1 = NULL_RTX, op2 = NULL_RTX;
20109 if (SUBREG_P (operands[1]))
20110 {
20111 op1 = operands[1];
20112 op2 = operands[2];
20113 }
20114 else if (SUBREG_P (operands[2]))
20115 {
20116 op1 = operands[2];
20117 op2 = operands[1];
20118 }
20119 /* Optimize (__m128i) d | (__m128i) e and similar code
20120 when d and e are float vectors into float vector logical
20121 insn. In C/C++ without using intrinsics there is no other way
20122 to express vector logical operation on float vectors than
20123 to cast them temporarily to integer vectors. */
20124 if (op1
20125 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20126 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
20127 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
20128 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
20129 && SUBREG_BYTE (op1) == 0
20130 && (GET_CODE (op2) == CONST_VECTOR
20131 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
20132 && SUBREG_BYTE (op2) == 0))
20133 && can_create_pseudo_p ())
20134 {
20135 rtx dst;
20136 switch (GET_MODE (SUBREG_REG (op1)))
20137 {
20138 case V4SFmode:
20139 case V8SFmode:
20140 case V16SFmode:
20141 case V2DFmode:
20142 case V4DFmode:
20143 case V8DFmode:
20144 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
20145 if (GET_CODE (op2) == CONST_VECTOR)
20146 {
20147 op2 = gen_lowpart (GET_MODE (dst), op2);
20148 op2 = force_reg (GET_MODE (dst), op2);
20149 }
20150 else
20151 {
20152 op1 = operands[1];
20153 op2 = SUBREG_REG (operands[2]);
20154 if (!vector_operand (op2, GET_MODE (dst)))
20155 op2 = force_reg (GET_MODE (dst), op2);
20156 }
20157 op1 = SUBREG_REG (op1);
20158 if (!vector_operand (op1, GET_MODE (dst)))
20159 op1 = force_reg (GET_MODE (dst), op1);
20160 emit_insn (gen_rtx_SET (dst,
20161 gen_rtx_fmt_ee (code, GET_MODE (dst),
20162 op1, op2)));
20163 emit_move_insn (operands[0], gen_lowpart (mode, dst));
20164 return;
20165 default:
20166 break;
20167 }
20168 }
20169 if (!vector_operand (operands[1], mode))
20170 operands[1] = force_reg (mode, operands[1]);
20171 if (!vector_operand (operands[2], mode))
20172 operands[2] = force_reg (mode, operands[2]);
20173 ix86_fixup_binary_operands_no_copy (code, mode, operands);
20174 emit_insn (gen_rtx_SET (operands[0],
20175 gen_rtx_fmt_ee (code, mode, operands[1],
20176 operands[2])));
20177 }
20178
20179 /* Return TRUE or FALSE depending on whether the binary operator meets the
20180 appropriate constraints. */
20181
20182 bool
20183 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
20184 rtx operands[3])
20185 {
20186 rtx dst = operands[0];
20187 rtx src1 = operands[1];
20188 rtx src2 = operands[2];
20189
20190 /* Both source operands cannot be in memory. */
20191 if (MEM_P (src1) && MEM_P (src2))
20192 return false;
20193
20194 /* Canonicalize operand order for commutative operators. */
20195 if (ix86_swap_binary_operands_p (code, mode, operands))
20196 std::swap (src1, src2);
20197
20198 /* If the destination is memory, we must have a matching source operand. */
20199 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20200 return false;
20201
20202 /* Source 1 cannot be a constant. */
20203 if (CONSTANT_P (src1))
20204 return false;
20205
20206 /* Source 1 cannot be a non-matching memory. */
20207 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20208 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20209 return (code == AND
20210 && (mode == HImode
20211 || mode == SImode
20212 || (TARGET_64BIT && mode == DImode))
20213 && satisfies_constraint_L (src2));
20214
20215 return true;
20216 }
20217
20218 /* Attempt to expand a unary operator. Make the expansion closer to the
20219 actual machine, then just general_operand, which will allow 2 separate
20220 memory references (one output, one input) in a single insn. */
20221
20222 void
20223 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20224 rtx operands[])
20225 {
20226 bool matching_memory = false;
20227 rtx src, dst, op, clob;
20228
20229 dst = operands[0];
20230 src = operands[1];
20231
20232 /* If the destination is memory, and we do not have matching source
20233 operands, do things in registers. */
20234 if (MEM_P (dst))
20235 {
20236 if (rtx_equal_p (dst, src))
20237 matching_memory = true;
20238 else
20239 dst = gen_reg_rtx (mode);
20240 }
20241
20242 /* When source operand is memory, destination must match. */
20243 if (MEM_P (src) && !matching_memory)
20244 src = force_reg (mode, src);
20245
20246 /* Emit the instruction. */
20247
20248 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20249
20250 if (code == NOT)
20251 emit_insn (op);
20252 else
20253 {
20254 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20255 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20256 }
20257
20258 /* Fix up the destination if needed. */
20259 if (dst != operands[0])
20260 emit_move_insn (operands[0], dst);
20261 }
20262
20263 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20264 divisor are within the range [0-255]. */
20265
20266 void
20267 ix86_split_idivmod (machine_mode mode, rtx operands[],
20268 bool signed_p)
20269 {
20270 rtx_code_label *end_label, *qimode_label;
20271 rtx insn, div, mod;
20272 rtx scratch, tmp0, tmp1, tmp2;
20273 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20274 rtx (*gen_zero_extend) (rtx, rtx);
20275 rtx (*gen_test_ccno_1) (rtx, rtx);
20276
20277 switch (mode)
20278 {
20279 case SImode:
20280 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20281 gen_test_ccno_1 = gen_testsi_ccno_1;
20282 gen_zero_extend = gen_zero_extendqisi2;
20283 break;
20284 case DImode:
20285 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20286 gen_test_ccno_1 = gen_testdi_ccno_1;
20287 gen_zero_extend = gen_zero_extendqidi2;
20288 break;
20289 default:
20290 gcc_unreachable ();
20291 }
20292
20293 end_label = gen_label_rtx ();
20294 qimode_label = gen_label_rtx ();
20295
20296 scratch = gen_reg_rtx (mode);
20297
20298 /* Use 8bit unsigned divimod if dividend and divisor are within
20299 the range [0-255]. */
20300 emit_move_insn (scratch, operands[2]);
20301 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20302 scratch, 1, OPTAB_DIRECT);
20303 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20304 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20305 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20306 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20307 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20308 pc_rtx);
20309 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20310 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20311 JUMP_LABEL (insn) = qimode_label;
20312
20313 /* Generate original signed/unsigned divimod. */
20314 div = gen_divmod4_1 (operands[0], operands[1],
20315 operands[2], operands[3]);
20316 emit_insn (div);
20317
20318 /* Branch to the end. */
20319 emit_jump_insn (gen_jump (end_label));
20320 emit_barrier ();
20321
20322 /* Generate 8bit unsigned divide. */
20323 emit_label (qimode_label);
20324 /* Don't use operands[0] for result of 8bit divide since not all
20325 registers support QImode ZERO_EXTRACT. */
20326 tmp0 = lowpart_subreg (HImode, scratch, mode);
20327 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20328 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20329 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20330
20331 if (signed_p)
20332 {
20333 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
20334 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
20335 }
20336 else
20337 {
20338 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
20339 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
20340 }
20341
20342 /* Extract remainder from AH. */
20343 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
20344 if (REG_P (operands[1]))
20345 insn = emit_move_insn (operands[1], tmp1);
20346 else
20347 {
20348 /* Need a new scratch register since the old one has result
20349 of 8bit divide. */
20350 scratch = gen_reg_rtx (mode);
20351 emit_move_insn (scratch, tmp1);
20352 insn = emit_move_insn (operands[1], scratch);
20353 }
20354 set_unique_reg_note (insn, REG_EQUAL, mod);
20355
20356 /* Zero extend quotient from AL. */
20357 tmp1 = gen_lowpart (QImode, tmp0);
20358 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20359 set_unique_reg_note (insn, REG_EQUAL, div);
20360
20361 emit_label (end_label);
20362 }
20363
20364 #define LEA_MAX_STALL (3)
20365 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20366
20367 /* Increase given DISTANCE in half-cycles according to
20368 dependencies between PREV and NEXT instructions.
20369 Add 1 half-cycle if there is no dependency and
20370 go to next cycle if there is some dependecy. */
20371
20372 static unsigned int
20373 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20374 {
20375 df_ref def, use;
20376
20377 if (!prev || !next)
20378 return distance + (distance & 1) + 2;
20379
20380 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20381 return distance + 1;
20382
20383 FOR_EACH_INSN_USE (use, next)
20384 FOR_EACH_INSN_DEF (def, prev)
20385 if (!DF_REF_IS_ARTIFICIAL (def)
20386 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20387 return distance + (distance & 1) + 2;
20388
20389 return distance + 1;
20390 }
20391
20392 /* Function checks if instruction INSN defines register number
20393 REGNO1 or REGNO2. */
20394
20395 static bool
20396 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20397 rtx_insn *insn)
20398 {
20399 df_ref def;
20400
20401 FOR_EACH_INSN_DEF (def, insn)
20402 if (DF_REF_REG_DEF_P (def)
20403 && !DF_REF_IS_ARTIFICIAL (def)
20404 && (regno1 == DF_REF_REGNO (def)
20405 || regno2 == DF_REF_REGNO (def)))
20406 return true;
20407
20408 return false;
20409 }
20410
20411 /* Function checks if instruction INSN uses register number
20412 REGNO as a part of address expression. */
20413
20414 static bool
20415 insn_uses_reg_mem (unsigned int regno, rtx insn)
20416 {
20417 df_ref use;
20418
20419 FOR_EACH_INSN_USE (use, insn)
20420 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20421 return true;
20422
20423 return false;
20424 }
20425
20426 /* Search backward for non-agu definition of register number REGNO1
20427 or register number REGNO2 in basic block starting from instruction
20428 START up to head of basic block or instruction INSN.
20429
20430 Function puts true value into *FOUND var if definition was found
20431 and false otherwise.
20432
20433 Distance in half-cycles between START and found instruction or head
20434 of BB is added to DISTANCE and returned. */
20435
20436 static int
20437 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20438 rtx_insn *insn, int distance,
20439 rtx_insn *start, bool *found)
20440 {
20441 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20442 rtx_insn *prev = start;
20443 rtx_insn *next = NULL;
20444
20445 *found = false;
20446
20447 while (prev
20448 && prev != insn
20449 && distance < LEA_SEARCH_THRESHOLD)
20450 {
20451 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20452 {
20453 distance = increase_distance (prev, next, distance);
20454 if (insn_defines_reg (regno1, regno2, prev))
20455 {
20456 if (recog_memoized (prev) < 0
20457 || get_attr_type (prev) != TYPE_LEA)
20458 {
20459 *found = true;
20460 return distance;
20461 }
20462 }
20463
20464 next = prev;
20465 }
20466 if (prev == BB_HEAD (bb))
20467 break;
20468
20469 prev = PREV_INSN (prev);
20470 }
20471
20472 return distance;
20473 }
20474
20475 /* Search backward for non-agu definition of register number REGNO1
20476 or register number REGNO2 in INSN's basic block until
20477 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20478 2. Reach neighbor BBs boundary, or
20479 3. Reach agu definition.
20480 Returns the distance between the non-agu definition point and INSN.
20481 If no definition point, returns -1. */
20482
20483 static int
20484 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
20485 rtx_insn *insn)
20486 {
20487 basic_block bb = BLOCK_FOR_INSN (insn);
20488 int distance = 0;
20489 bool found = false;
20490
20491 if (insn != BB_HEAD (bb))
20492 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
20493 distance, PREV_INSN (insn),
20494 &found);
20495
20496 if (!found && distance < LEA_SEARCH_THRESHOLD)
20497 {
20498 edge e;
20499 edge_iterator ei;
20500 bool simple_loop = false;
20501
20502 FOR_EACH_EDGE (e, ei, bb->preds)
20503 if (e->src == bb)
20504 {
20505 simple_loop = true;
20506 break;
20507 }
20508
20509 if (simple_loop)
20510 distance = distance_non_agu_define_in_bb (regno1, regno2,
20511 insn, distance,
20512 BB_END (bb), &found);
20513 else
20514 {
20515 int shortest_dist = -1;
20516 bool found_in_bb = false;
20517
20518 FOR_EACH_EDGE (e, ei, bb->preds)
20519 {
20520 int bb_dist
20521 = distance_non_agu_define_in_bb (regno1, regno2,
20522 insn, distance,
20523 BB_END (e->src),
20524 &found_in_bb);
20525 if (found_in_bb)
20526 {
20527 if (shortest_dist < 0)
20528 shortest_dist = bb_dist;
20529 else if (bb_dist > 0)
20530 shortest_dist = MIN (bb_dist, shortest_dist);
20531
20532 found = true;
20533 }
20534 }
20535
20536 distance = shortest_dist;
20537 }
20538 }
20539
20540 /* get_attr_type may modify recog data. We want to make sure
20541 that recog data is valid for instruction INSN, on which
20542 distance_non_agu_define is called. INSN is unchanged here. */
20543 extract_insn_cached (insn);
20544
20545 if (!found)
20546 return -1;
20547
20548 return distance >> 1;
20549 }
20550
20551 /* Return the distance in half-cycles between INSN and the next
20552 insn that uses register number REGNO in memory address added
20553 to DISTANCE. Return -1 if REGNO0 is set.
20554
20555 Put true value into *FOUND if register usage was found and
20556 false otherwise.
20557 Put true value into *REDEFINED if register redefinition was
20558 found and false otherwise. */
20559
20560 static int
20561 distance_agu_use_in_bb (unsigned int regno,
20562 rtx_insn *insn, int distance, rtx_insn *start,
20563 bool *found, bool *redefined)
20564 {
20565 basic_block bb = NULL;
20566 rtx_insn *next = start;
20567 rtx_insn *prev = NULL;
20568
20569 *found = false;
20570 *redefined = false;
20571
20572 if (start != NULL_RTX)
20573 {
20574 bb = BLOCK_FOR_INSN (start);
20575 if (start != BB_HEAD (bb))
20576 /* If insn and start belong to the same bb, set prev to insn,
20577 so the call to increase_distance will increase the distance
20578 between insns by 1. */
20579 prev = insn;
20580 }
20581
20582 while (next
20583 && next != insn
20584 && distance < LEA_SEARCH_THRESHOLD)
20585 {
20586 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
20587 {
20588 distance = increase_distance(prev, next, distance);
20589 if (insn_uses_reg_mem (regno, next))
20590 {
20591 /* Return DISTANCE if OP0 is used in memory
20592 address in NEXT. */
20593 *found = true;
20594 return distance;
20595 }
20596
20597 if (insn_defines_reg (regno, INVALID_REGNUM, next))
20598 {
20599 /* Return -1 if OP0 is set in NEXT. */
20600 *redefined = true;
20601 return -1;
20602 }
20603
20604 prev = next;
20605 }
20606
20607 if (next == BB_END (bb))
20608 break;
20609
20610 next = NEXT_INSN (next);
20611 }
20612
20613 return distance;
20614 }
20615
20616 /* Return the distance between INSN and the next insn that uses
20617 register number REGNO0 in memory address. Return -1 if no such
20618 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20619
20620 static int
20621 distance_agu_use (unsigned int regno0, rtx_insn *insn)
20622 {
20623 basic_block bb = BLOCK_FOR_INSN (insn);
20624 int distance = 0;
20625 bool found = false;
20626 bool redefined = false;
20627
20628 if (insn != BB_END (bb))
20629 distance = distance_agu_use_in_bb (regno0, insn, distance,
20630 NEXT_INSN (insn),
20631 &found, &redefined);
20632
20633 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
20634 {
20635 edge e;
20636 edge_iterator ei;
20637 bool simple_loop = false;
20638
20639 FOR_EACH_EDGE (e, ei, bb->succs)
20640 if (e->dest == bb)
20641 {
20642 simple_loop = true;
20643 break;
20644 }
20645
20646 if (simple_loop)
20647 distance = distance_agu_use_in_bb (regno0, insn,
20648 distance, BB_HEAD (bb),
20649 &found, &redefined);
20650 else
20651 {
20652 int shortest_dist = -1;
20653 bool found_in_bb = false;
20654 bool redefined_in_bb = false;
20655
20656 FOR_EACH_EDGE (e, ei, bb->succs)
20657 {
20658 int bb_dist
20659 = distance_agu_use_in_bb (regno0, insn,
20660 distance, BB_HEAD (e->dest),
20661 &found_in_bb, &redefined_in_bb);
20662 if (found_in_bb)
20663 {
20664 if (shortest_dist < 0)
20665 shortest_dist = bb_dist;
20666 else if (bb_dist > 0)
20667 shortest_dist = MIN (bb_dist, shortest_dist);
20668
20669 found = true;
20670 }
20671 }
20672
20673 distance = shortest_dist;
20674 }
20675 }
20676
20677 if (!found || redefined)
20678 return -1;
20679
20680 return distance >> 1;
20681 }
20682
20683 /* Define this macro to tune LEA priority vs ADD, it take effect when
20684 there is a dilemma of choicing LEA or ADD
20685 Negative value: ADD is more preferred than LEA
20686 Zero: Netrual
20687 Positive value: LEA is more preferred than ADD*/
20688 #define IX86_LEA_PRIORITY 0
20689
20690 /* Return true if usage of lea INSN has performance advantage
20691 over a sequence of instructions. Instructions sequence has
20692 SPLIT_COST cycles higher latency than lea latency. */
20693
20694 static bool
20695 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
20696 unsigned int regno2, int split_cost, bool has_scale)
20697 {
20698 int dist_define, dist_use;
20699
20700 /* For Silvermont if using a 2-source or 3-source LEA for
20701 non-destructive destination purposes, or due to wanting
20702 ability to use SCALE, the use of LEA is justified. */
20703 if (TARGET_SILVERMONT || TARGET_INTEL)
20704 {
20705 if (has_scale)
20706 return true;
20707 if (split_cost < 1)
20708 return false;
20709 if (regno0 == regno1 || regno0 == regno2)
20710 return false;
20711 return true;
20712 }
20713
20714 dist_define = distance_non_agu_define (regno1, regno2, insn);
20715 dist_use = distance_agu_use (regno0, insn);
20716
20717 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
20718 {
20719 /* If there is no non AGU operand definition, no AGU
20720 operand usage and split cost is 0 then both lea
20721 and non lea variants have same priority. Currently
20722 we prefer lea for 64 bit code and non lea on 32 bit
20723 code. */
20724 if (dist_use < 0 && split_cost == 0)
20725 return TARGET_64BIT || IX86_LEA_PRIORITY;
20726 else
20727 return true;
20728 }
20729
20730 /* With longer definitions distance lea is more preferable.
20731 Here we change it to take into account splitting cost and
20732 lea priority. */
20733 dist_define += split_cost + IX86_LEA_PRIORITY;
20734
20735 /* If there is no use in memory addess then we just check
20736 that split cost exceeds AGU stall. */
20737 if (dist_use < 0)
20738 return dist_define > LEA_MAX_STALL;
20739
20740 /* If this insn has both backward non-agu dependence and forward
20741 agu dependence, the one with short distance takes effect. */
20742 return dist_define >= dist_use;
20743 }
20744
20745 /* Return true if it is legal to clobber flags by INSN and
20746 false otherwise. */
20747
20748 static bool
20749 ix86_ok_to_clobber_flags (rtx_insn *insn)
20750 {
20751 basic_block bb = BLOCK_FOR_INSN (insn);
20752 df_ref use;
20753 bitmap live;
20754
20755 while (insn)
20756 {
20757 if (NONDEBUG_INSN_P (insn))
20758 {
20759 FOR_EACH_INSN_USE (use, insn)
20760 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20761 return false;
20762
20763 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20764 return true;
20765 }
20766
20767 if (insn == BB_END (bb))
20768 break;
20769
20770 insn = NEXT_INSN (insn);
20771 }
20772
20773 live = df_get_live_out(bb);
20774 return !REGNO_REG_SET_P (live, FLAGS_REG);
20775 }
20776
20777 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20778 move and add to avoid AGU stalls. */
20779
20780 bool
20781 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20782 {
20783 unsigned int regno0, regno1, regno2;
20784
20785 /* Check if we need to optimize. */
20786 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20787 return false;
20788
20789 /* Check it is correct to split here. */
20790 if (!ix86_ok_to_clobber_flags(insn))
20791 return false;
20792
20793 regno0 = true_regnum (operands[0]);
20794 regno1 = true_regnum (operands[1]);
20795 regno2 = true_regnum (operands[2]);
20796
20797 /* We need to split only adds with non destructive
20798 destination operand. */
20799 if (regno0 == regno1 || regno0 == regno2)
20800 return false;
20801 else
20802 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20803 }
20804
20805 /* Return true if we should emit lea instruction instead of mov
20806 instruction. */
20807
20808 bool
20809 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20810 {
20811 unsigned int regno0, regno1;
20812
20813 /* Check if we need to optimize. */
20814 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20815 return false;
20816
20817 /* Use lea for reg to reg moves only. */
20818 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20819 return false;
20820
20821 regno0 = true_regnum (operands[0]);
20822 regno1 = true_regnum (operands[1]);
20823
20824 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20825 }
20826
20827 /* Return true if we need to split lea into a sequence of
20828 instructions to avoid AGU stalls. */
20829
20830 bool
20831 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20832 {
20833 unsigned int regno0, regno1, regno2;
20834 int split_cost;
20835 struct ix86_address parts;
20836 int ok;
20837
20838 /* Check we need to optimize. */
20839 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20840 return false;
20841
20842 /* The "at least two components" test below might not catch simple
20843 move or zero extension insns if parts.base is non-NULL and parts.disp
20844 is const0_rtx as the only components in the address, e.g. if the
20845 register is %rbp or %r13. As this test is much cheaper and moves or
20846 zero extensions are the common case, do this check first. */
20847 if (REG_P (operands[1])
20848 || (SImode_address_operand (operands[1], VOIDmode)
20849 && REG_P (XEXP (operands[1], 0))))
20850 return false;
20851
20852 /* Check if it is OK to split here. */
20853 if (!ix86_ok_to_clobber_flags (insn))
20854 return false;
20855
20856 ok = ix86_decompose_address (operands[1], &parts);
20857 gcc_assert (ok);
20858
20859 /* There should be at least two components in the address. */
20860 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20861 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20862 return false;
20863
20864 /* We should not split into add if non legitimate pic
20865 operand is used as displacement. */
20866 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20867 return false;
20868
20869 regno0 = true_regnum (operands[0]) ;
20870 regno1 = INVALID_REGNUM;
20871 regno2 = INVALID_REGNUM;
20872
20873 if (parts.base)
20874 regno1 = true_regnum (parts.base);
20875 if (parts.index)
20876 regno2 = true_regnum (parts.index);
20877
20878 split_cost = 0;
20879
20880 /* Compute how many cycles we will add to execution time
20881 if split lea into a sequence of instructions. */
20882 if (parts.base || parts.index)
20883 {
20884 /* Have to use mov instruction if non desctructive
20885 destination form is used. */
20886 if (regno1 != regno0 && regno2 != regno0)
20887 split_cost += 1;
20888
20889 /* Have to add index to base if both exist. */
20890 if (parts.base && parts.index)
20891 split_cost += 1;
20892
20893 /* Have to use shift and adds if scale is 2 or greater. */
20894 if (parts.scale > 1)
20895 {
20896 if (regno0 != regno1)
20897 split_cost += 1;
20898 else if (regno2 == regno0)
20899 split_cost += 4;
20900 else
20901 split_cost += parts.scale;
20902 }
20903
20904 /* Have to use add instruction with immediate if
20905 disp is non zero. */
20906 if (parts.disp && parts.disp != const0_rtx)
20907 split_cost += 1;
20908
20909 /* Subtract the price of lea. */
20910 split_cost -= 1;
20911 }
20912
20913 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20914 parts.scale > 1);
20915 }
20916
20917 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20918 matches destination. RTX includes clobber of FLAGS_REG. */
20919
20920 static void
20921 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20922 rtx dst, rtx src)
20923 {
20924 rtx op, clob;
20925
20926 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20927 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20928
20929 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20930 }
20931
20932 /* Return true if regno1 def is nearest to the insn. */
20933
20934 static bool
20935 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20936 {
20937 rtx_insn *prev = insn;
20938 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20939
20940 if (insn == start)
20941 return false;
20942 while (prev && prev != start)
20943 {
20944 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20945 {
20946 prev = PREV_INSN (prev);
20947 continue;
20948 }
20949 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20950 return true;
20951 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20952 return false;
20953 prev = PREV_INSN (prev);
20954 }
20955
20956 /* None of the regs is defined in the bb. */
20957 return false;
20958 }
20959
20960 /* Split lea instructions into a sequence of instructions
20961 which are executed on ALU to avoid AGU stalls.
20962 It is assumed that it is allowed to clobber flags register
20963 at lea position. */
20964
20965 void
20966 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20967 {
20968 unsigned int regno0, regno1, regno2;
20969 struct ix86_address parts;
20970 rtx target, tmp;
20971 int ok, adds;
20972
20973 ok = ix86_decompose_address (operands[1], &parts);
20974 gcc_assert (ok);
20975
20976 target = gen_lowpart (mode, operands[0]);
20977
20978 regno0 = true_regnum (target);
20979 regno1 = INVALID_REGNUM;
20980 regno2 = INVALID_REGNUM;
20981
20982 if (parts.base)
20983 {
20984 parts.base = gen_lowpart (mode, parts.base);
20985 regno1 = true_regnum (parts.base);
20986 }
20987
20988 if (parts.index)
20989 {
20990 parts.index = gen_lowpart (mode, parts.index);
20991 regno2 = true_regnum (parts.index);
20992 }
20993
20994 if (parts.disp)
20995 parts.disp = gen_lowpart (mode, parts.disp);
20996
20997 if (parts.scale > 1)
20998 {
20999 /* Case r1 = r1 + ... */
21000 if (regno1 == regno0)
21001 {
21002 /* If we have a case r1 = r1 + C * r2 then we
21003 should use multiplication which is very
21004 expensive. Assume cost model is wrong if we
21005 have such case here. */
21006 gcc_assert (regno2 != regno0);
21007
21008 for (adds = parts.scale; adds > 0; adds--)
21009 ix86_emit_binop (PLUS, mode, target, parts.index);
21010 }
21011 else
21012 {
21013 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
21014 if (regno0 != regno2)
21015 emit_insn (gen_rtx_SET (target, parts.index));
21016
21017 /* Use shift for scaling. */
21018 ix86_emit_binop (ASHIFT, mode, target,
21019 GEN_INT (exact_log2 (parts.scale)));
21020
21021 if (parts.base)
21022 ix86_emit_binop (PLUS, mode, target, parts.base);
21023
21024 if (parts.disp && parts.disp != const0_rtx)
21025 ix86_emit_binop (PLUS, mode, target, parts.disp);
21026 }
21027 }
21028 else if (!parts.base && !parts.index)
21029 {
21030 gcc_assert(parts.disp);
21031 emit_insn (gen_rtx_SET (target, parts.disp));
21032 }
21033 else
21034 {
21035 if (!parts.base)
21036 {
21037 if (regno0 != regno2)
21038 emit_insn (gen_rtx_SET (target, parts.index));
21039 }
21040 else if (!parts.index)
21041 {
21042 if (regno0 != regno1)
21043 emit_insn (gen_rtx_SET (target, parts.base));
21044 }
21045 else
21046 {
21047 if (regno0 == regno1)
21048 tmp = parts.index;
21049 else if (regno0 == regno2)
21050 tmp = parts.base;
21051 else
21052 {
21053 rtx tmp1;
21054
21055 /* Find better operand for SET instruction, depending
21056 on which definition is farther from the insn. */
21057 if (find_nearest_reg_def (insn, regno1, regno2))
21058 tmp = parts.index, tmp1 = parts.base;
21059 else
21060 tmp = parts.base, tmp1 = parts.index;
21061
21062 emit_insn (gen_rtx_SET (target, tmp));
21063
21064 if (parts.disp && parts.disp != const0_rtx)
21065 ix86_emit_binop (PLUS, mode, target, parts.disp);
21066
21067 ix86_emit_binop (PLUS, mode, target, tmp1);
21068 return;
21069 }
21070
21071 ix86_emit_binop (PLUS, mode, target, tmp);
21072 }
21073
21074 if (parts.disp && parts.disp != const0_rtx)
21075 ix86_emit_binop (PLUS, mode, target, parts.disp);
21076 }
21077 }
21078
21079 /* Return true if it is ok to optimize an ADD operation to LEA
21080 operation to avoid flag register consumation. For most processors,
21081 ADD is faster than LEA. For the processors like BONNELL, if the
21082 destination register of LEA holds an actual address which will be
21083 used soon, LEA is better and otherwise ADD is better. */
21084
21085 bool
21086 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
21087 {
21088 unsigned int regno0 = true_regnum (operands[0]);
21089 unsigned int regno1 = true_regnum (operands[1]);
21090 unsigned int regno2 = true_regnum (operands[2]);
21091
21092 /* If a = b + c, (a!=b && a!=c), must use lea form. */
21093 if (regno0 != regno1 && regno0 != regno2)
21094 return true;
21095
21096 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21097 return false;
21098
21099 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
21100 }
21101
21102 /* Return true if destination reg of SET_BODY is shift count of
21103 USE_BODY. */
21104
21105 static bool
21106 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
21107 {
21108 rtx set_dest;
21109 rtx shift_rtx;
21110 int i;
21111
21112 /* Retrieve destination of SET_BODY. */
21113 switch (GET_CODE (set_body))
21114 {
21115 case SET:
21116 set_dest = SET_DEST (set_body);
21117 if (!set_dest || !REG_P (set_dest))
21118 return false;
21119 break;
21120 case PARALLEL:
21121 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
21122 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
21123 use_body))
21124 return true;
21125 default:
21126 return false;
21127 break;
21128 }
21129
21130 /* Retrieve shift count of USE_BODY. */
21131 switch (GET_CODE (use_body))
21132 {
21133 case SET:
21134 shift_rtx = XEXP (use_body, 1);
21135 break;
21136 case PARALLEL:
21137 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
21138 if (ix86_dep_by_shift_count_body (set_body,
21139 XVECEXP (use_body, 0, i)))
21140 return true;
21141 default:
21142 return false;
21143 break;
21144 }
21145
21146 if (shift_rtx
21147 && (GET_CODE (shift_rtx) == ASHIFT
21148 || GET_CODE (shift_rtx) == LSHIFTRT
21149 || GET_CODE (shift_rtx) == ASHIFTRT
21150 || GET_CODE (shift_rtx) == ROTATE
21151 || GET_CODE (shift_rtx) == ROTATERT))
21152 {
21153 rtx shift_count = XEXP (shift_rtx, 1);
21154
21155 /* Return true if shift count is dest of SET_BODY. */
21156 if (REG_P (shift_count))
21157 {
21158 /* Add check since it can be invoked before register
21159 allocation in pre-reload schedule. */
21160 if (reload_completed
21161 && true_regnum (set_dest) == true_regnum (shift_count))
21162 return true;
21163 else if (REGNO(set_dest) == REGNO(shift_count))
21164 return true;
21165 }
21166 }
21167
21168 return false;
21169 }
21170
21171 /* Return true if destination reg of SET_INSN is shift count of
21172 USE_INSN. */
21173
21174 bool
21175 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21176 {
21177 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21178 PATTERN (use_insn));
21179 }
21180
21181 /* Return TRUE or FALSE depending on whether the unary operator meets the
21182 appropriate constraints. */
21183
21184 bool
21185 ix86_unary_operator_ok (enum rtx_code,
21186 machine_mode,
21187 rtx operands[2])
21188 {
21189 /* If one of operands is memory, source and destination must match. */
21190 if ((MEM_P (operands[0])
21191 || MEM_P (operands[1]))
21192 && ! rtx_equal_p (operands[0], operands[1]))
21193 return false;
21194 return true;
21195 }
21196
21197 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21198 are ok, keeping in mind the possible movddup alternative. */
21199
21200 bool
21201 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21202 {
21203 if (MEM_P (operands[0]))
21204 return rtx_equal_p (operands[0], operands[1 + high]);
21205 if (MEM_P (operands[1]) && MEM_P (operands[2]))
21206 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21207 return true;
21208 }
21209
21210 /* Post-reload splitter for converting an SF or DFmode value in an
21211 SSE register into an unsigned SImode. */
21212
21213 void
21214 ix86_split_convert_uns_si_sse (rtx operands[])
21215 {
21216 machine_mode vecmode;
21217 rtx value, large, zero_or_two31, input, two31, x;
21218
21219 large = operands[1];
21220 zero_or_two31 = operands[2];
21221 input = operands[3];
21222 two31 = operands[4];
21223 vecmode = GET_MODE (large);
21224 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21225
21226 /* Load up the value into the low element. We must ensure that the other
21227 elements are valid floats -- zero is the easiest such value. */
21228 if (MEM_P (input))
21229 {
21230 if (vecmode == V4SFmode)
21231 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21232 else
21233 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21234 }
21235 else
21236 {
21237 input = gen_rtx_REG (vecmode, REGNO (input));
21238 emit_move_insn (value, CONST0_RTX (vecmode));
21239 if (vecmode == V4SFmode)
21240 emit_insn (gen_sse_movss (value, value, input));
21241 else
21242 emit_insn (gen_sse2_movsd (value, value, input));
21243 }
21244
21245 emit_move_insn (large, two31);
21246 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21247
21248 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21249 emit_insn (gen_rtx_SET (large, x));
21250
21251 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21252 emit_insn (gen_rtx_SET (zero_or_two31, x));
21253
21254 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21255 emit_insn (gen_rtx_SET (value, x));
21256
21257 large = gen_rtx_REG (V4SImode, REGNO (large));
21258 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21259
21260 x = gen_rtx_REG (V4SImode, REGNO (value));
21261 if (vecmode == V4SFmode)
21262 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21263 else
21264 emit_insn (gen_sse2_cvttpd2dq (x, value));
21265 value = x;
21266
21267 emit_insn (gen_xorv4si3 (value, value, large));
21268 }
21269
21270 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21271 Expects the 64-bit DImode to be supplied in a pair of integral
21272 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21273 -mfpmath=sse, !optimize_size only. */
21274
21275 void
21276 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21277 {
21278 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21279 rtx int_xmm, fp_xmm;
21280 rtx biases, exponents;
21281 rtx x;
21282
21283 int_xmm = gen_reg_rtx (V4SImode);
21284 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21285 emit_insn (gen_movdi_to_sse (int_xmm, input));
21286 else if (TARGET_SSE_SPLIT_REGS)
21287 {
21288 emit_clobber (int_xmm);
21289 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21290 }
21291 else
21292 {
21293 x = gen_reg_rtx (V2DImode);
21294 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21295 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21296 }
21297
21298 x = gen_rtx_CONST_VECTOR (V4SImode,
21299 gen_rtvec (4, GEN_INT (0x43300000UL),
21300 GEN_INT (0x45300000UL),
21301 const0_rtx, const0_rtx));
21302 exponents = validize_mem (force_const_mem (V4SImode, x));
21303
21304 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21305 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21306
21307 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21308 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21309 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21310 (0x1.0p84 + double(fp_value_hi_xmm)).
21311 Note these exponents differ by 32. */
21312
21313 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21314
21315 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21316 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21317 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21318 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21319 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21320 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21321 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21322 biases = validize_mem (force_const_mem (V2DFmode, biases));
21323 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21324
21325 /* Add the upper and lower DFmode values together. */
21326 if (TARGET_SSE3)
21327 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21328 else
21329 {
21330 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21331 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21332 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21333 }
21334
21335 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21336 }
21337
21338 /* Not used, but eases macroization of patterns. */
21339 void
21340 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21341 {
21342 gcc_unreachable ();
21343 }
21344
21345 /* Convert an unsigned SImode value into a DFmode. Only currently used
21346 for SSE, but applicable anywhere. */
21347
21348 void
21349 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21350 {
21351 REAL_VALUE_TYPE TWO31r;
21352 rtx x, fp;
21353
21354 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21355 NULL, 1, OPTAB_DIRECT);
21356
21357 fp = gen_reg_rtx (DFmode);
21358 emit_insn (gen_floatsidf2 (fp, x));
21359
21360 real_ldexp (&TWO31r, &dconst1, 31);
21361 x = const_double_from_real_value (TWO31r, DFmode);
21362
21363 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21364 if (x != target)
21365 emit_move_insn (target, x);
21366 }
21367
21368 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21369 32-bit mode; otherwise we have a direct convert instruction. */
21370
21371 void
21372 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21373 {
21374 REAL_VALUE_TYPE TWO32r;
21375 rtx fp_lo, fp_hi, x;
21376
21377 fp_lo = gen_reg_rtx (DFmode);
21378 fp_hi = gen_reg_rtx (DFmode);
21379
21380 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21381
21382 real_ldexp (&TWO32r, &dconst1, 32);
21383 x = const_double_from_real_value (TWO32r, DFmode);
21384 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21385
21386 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21387
21388 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21389 0, OPTAB_DIRECT);
21390 if (x != target)
21391 emit_move_insn (target, x);
21392 }
21393
21394 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21395 For x86_32, -mfpmath=sse, !optimize_size only. */
21396 void
21397 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21398 {
21399 REAL_VALUE_TYPE ONE16r;
21400 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21401
21402 real_ldexp (&ONE16r, &dconst1, 16);
21403 x = const_double_from_real_value (ONE16r, SFmode);
21404 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21405 NULL, 0, OPTAB_DIRECT);
21406 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21407 NULL, 0, OPTAB_DIRECT);
21408 fp_hi = gen_reg_rtx (SFmode);
21409 fp_lo = gen_reg_rtx (SFmode);
21410 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21411 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21412 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21413 0, OPTAB_DIRECT);
21414 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21415 0, OPTAB_DIRECT);
21416 if (!rtx_equal_p (target, fp_hi))
21417 emit_move_insn (target, fp_hi);
21418 }
21419
21420 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21421 a vector of unsigned ints VAL to vector of floats TARGET. */
21422
21423 void
21424 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
21425 {
21426 rtx tmp[8];
21427 REAL_VALUE_TYPE TWO16r;
21428 machine_mode intmode = GET_MODE (val);
21429 machine_mode fltmode = GET_MODE (target);
21430 rtx (*cvt) (rtx, rtx);
21431
21432 if (intmode == V4SImode)
21433 cvt = gen_floatv4siv4sf2;
21434 else
21435 cvt = gen_floatv8siv8sf2;
21436 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21437 tmp[0] = force_reg (intmode, tmp[0]);
21438 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21439 OPTAB_DIRECT);
21440 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21441 NULL_RTX, 1, OPTAB_DIRECT);
21442 tmp[3] = gen_reg_rtx (fltmode);
21443 emit_insn (cvt (tmp[3], tmp[1]));
21444 tmp[4] = gen_reg_rtx (fltmode);
21445 emit_insn (cvt (tmp[4], tmp[2]));
21446 real_ldexp (&TWO16r, &dconst1, 16);
21447 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21448 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21449 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21450 OPTAB_DIRECT);
21451 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21452 OPTAB_DIRECT);
21453 if (tmp[7] != target)
21454 emit_move_insn (target, tmp[7]);
21455 }
21456
21457 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21458 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21459 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21460 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21461
21462 rtx
21463 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21464 {
21465 REAL_VALUE_TYPE TWO31r;
21466 rtx two31r, tmp[4];
21467 machine_mode mode = GET_MODE (val);
21468 machine_mode scalarmode = GET_MODE_INNER (mode);
21469 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
21470 rtx (*cmp) (rtx, rtx, rtx, rtx);
21471 int i;
21472
21473 for (i = 0; i < 3; i++)
21474 tmp[i] = gen_reg_rtx (mode);
21475 real_ldexp (&TWO31r, &dconst1, 31);
21476 two31r = const_double_from_real_value (TWO31r, scalarmode);
21477 two31r = ix86_build_const_vector (mode, 1, two31r);
21478 two31r = force_reg (mode, two31r);
21479 switch (mode)
21480 {
21481 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
21482 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
21483 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
21484 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
21485 default: gcc_unreachable ();
21486 }
21487 tmp[3] = gen_rtx_LE (mode, two31r, val);
21488 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
21489 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
21490 0, OPTAB_DIRECT);
21491 if (intmode == V4SImode || TARGET_AVX2)
21492 *xorp = expand_simple_binop (intmode, ASHIFT,
21493 gen_lowpart (intmode, tmp[0]),
21494 GEN_INT (31), NULL_RTX, 0,
21495 OPTAB_DIRECT);
21496 else
21497 {
21498 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
21499 two31 = ix86_build_const_vector (intmode, 1, two31);
21500 *xorp = expand_simple_binop (intmode, AND,
21501 gen_lowpart (intmode, tmp[0]),
21502 two31, NULL_RTX, 0,
21503 OPTAB_DIRECT);
21504 }
21505 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
21506 0, OPTAB_DIRECT);
21507 }
21508
21509 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21510 then replicate the value for all elements of the vector
21511 register. */
21512
21513 rtx
21514 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
21515 {
21516 int i, n_elt;
21517 rtvec v;
21518 machine_mode scalar_mode;
21519
21520 switch (mode)
21521 {
21522 case V64QImode:
21523 case V32QImode:
21524 case V16QImode:
21525 case V32HImode:
21526 case V16HImode:
21527 case V8HImode:
21528 case V16SImode:
21529 case V8SImode:
21530 case V4SImode:
21531 case V8DImode:
21532 case V4DImode:
21533 case V2DImode:
21534 gcc_assert (vect);
21535 /* FALLTHRU */
21536 case V16SFmode:
21537 case V8SFmode:
21538 case V4SFmode:
21539 case V8DFmode:
21540 case V4DFmode:
21541 case V2DFmode:
21542 n_elt = GET_MODE_NUNITS (mode);
21543 v = rtvec_alloc (n_elt);
21544 scalar_mode = GET_MODE_INNER (mode);
21545
21546 RTVEC_ELT (v, 0) = value;
21547
21548 for (i = 1; i < n_elt; ++i)
21549 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
21550
21551 return gen_rtx_CONST_VECTOR (mode, v);
21552
21553 default:
21554 gcc_unreachable ();
21555 }
21556 }
21557
21558 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21559 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21560 for an SSE register. If VECT is true, then replicate the mask for
21561 all elements of the vector register. If INVERT is true, then create
21562 a mask excluding the sign bit. */
21563
21564 rtx
21565 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
21566 {
21567 machine_mode vec_mode, imode;
21568 wide_int w;
21569 rtx mask, v;
21570
21571 switch (mode)
21572 {
21573 case V16SImode:
21574 case V16SFmode:
21575 case V8SImode:
21576 case V4SImode:
21577 case V8SFmode:
21578 case V4SFmode:
21579 vec_mode = mode;
21580 imode = SImode;
21581 break;
21582
21583 case V8DImode:
21584 case V4DImode:
21585 case V2DImode:
21586 case V8DFmode:
21587 case V4DFmode:
21588 case V2DFmode:
21589 vec_mode = mode;
21590 imode = DImode;
21591 break;
21592
21593 case TImode:
21594 case TFmode:
21595 vec_mode = VOIDmode;
21596 imode = TImode;
21597 break;
21598
21599 default:
21600 gcc_unreachable ();
21601 }
21602
21603 machine_mode inner_mode = GET_MODE_INNER (mode);
21604 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
21605 GET_MODE_BITSIZE (inner_mode));
21606 if (invert)
21607 w = wi::bit_not (w);
21608
21609 /* Force this value into the low part of a fp vector constant. */
21610 mask = immed_wide_int_const (w, imode);
21611 mask = gen_lowpart (inner_mode, mask);
21612
21613 if (vec_mode == VOIDmode)
21614 return force_reg (inner_mode, mask);
21615
21616 v = ix86_build_const_vector (vec_mode, vect, mask);
21617 return force_reg (vec_mode, v);
21618 }
21619
21620 /* Generate code for floating point ABS or NEG. */
21621
21622 void
21623 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
21624 rtx operands[])
21625 {
21626 rtx mask, set, dst, src;
21627 bool use_sse = false;
21628 bool vector_mode = VECTOR_MODE_P (mode);
21629 machine_mode vmode = mode;
21630
21631 if (vector_mode)
21632 use_sse = true;
21633 else if (mode == TFmode)
21634 use_sse = true;
21635 else if (TARGET_SSE_MATH)
21636 {
21637 use_sse = SSE_FLOAT_MODE_P (mode);
21638 if (mode == SFmode)
21639 vmode = V4SFmode;
21640 else if (mode == DFmode)
21641 vmode = V2DFmode;
21642 }
21643
21644 /* NEG and ABS performed with SSE use bitwise mask operations.
21645 Create the appropriate mask now. */
21646 if (use_sse)
21647 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
21648 else
21649 mask = NULL_RTX;
21650
21651 dst = operands[0];
21652 src = operands[1];
21653
21654 set = gen_rtx_fmt_e (code, mode, src);
21655 set = gen_rtx_SET (dst, set);
21656
21657 if (mask)
21658 {
21659 rtx use, clob;
21660 rtvec par;
21661
21662 use = gen_rtx_USE (VOIDmode, mask);
21663 if (vector_mode)
21664 par = gen_rtvec (2, set, use);
21665 else
21666 {
21667 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21668 par = gen_rtvec (3, set, use, clob);
21669 }
21670 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
21671 }
21672 else
21673 emit_insn (set);
21674 }
21675
21676 /* Expand a copysign operation. Special case operand 0 being a constant. */
21677
21678 void
21679 ix86_expand_copysign (rtx operands[])
21680 {
21681 machine_mode mode, vmode;
21682 rtx dest, op0, op1, mask, nmask;
21683
21684 dest = operands[0];
21685 op0 = operands[1];
21686 op1 = operands[2];
21687
21688 mode = GET_MODE (dest);
21689
21690 if (mode == SFmode)
21691 vmode = V4SFmode;
21692 else if (mode == DFmode)
21693 vmode = V2DFmode;
21694 else
21695 vmode = mode;
21696
21697 if (CONST_DOUBLE_P (op0))
21698 {
21699 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
21700
21701 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
21702 op0 = simplify_unary_operation (ABS, mode, op0, mode);
21703
21704 if (mode == SFmode || mode == DFmode)
21705 {
21706 if (op0 == CONST0_RTX (mode))
21707 op0 = CONST0_RTX (vmode);
21708 else
21709 {
21710 rtx v = ix86_build_const_vector (vmode, false, op0);
21711
21712 op0 = force_reg (vmode, v);
21713 }
21714 }
21715 else if (op0 != CONST0_RTX (mode))
21716 op0 = force_reg (mode, op0);
21717
21718 mask = ix86_build_signbit_mask (vmode, 0, 0);
21719
21720 if (mode == SFmode)
21721 copysign_insn = gen_copysignsf3_const;
21722 else if (mode == DFmode)
21723 copysign_insn = gen_copysigndf3_const;
21724 else
21725 copysign_insn = gen_copysigntf3_const;
21726
21727 emit_insn (copysign_insn (dest, op0, op1, mask));
21728 }
21729 else
21730 {
21731 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
21732
21733 nmask = ix86_build_signbit_mask (vmode, 0, 1);
21734 mask = ix86_build_signbit_mask (vmode, 0, 0);
21735
21736 if (mode == SFmode)
21737 copysign_insn = gen_copysignsf3_var;
21738 else if (mode == DFmode)
21739 copysign_insn = gen_copysigndf3_var;
21740 else
21741 copysign_insn = gen_copysigntf3_var;
21742
21743 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21744 }
21745 }
21746
21747 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21748 be a constant, and so has already been expanded into a vector constant. */
21749
21750 void
21751 ix86_split_copysign_const (rtx operands[])
21752 {
21753 machine_mode mode, vmode;
21754 rtx dest, op0, mask, x;
21755
21756 dest = operands[0];
21757 op0 = operands[1];
21758 mask = operands[3];
21759
21760 mode = GET_MODE (dest);
21761 vmode = GET_MODE (mask);
21762
21763 dest = lowpart_subreg (vmode, dest, mode);
21764 x = gen_rtx_AND (vmode, dest, mask);
21765 emit_insn (gen_rtx_SET (dest, x));
21766
21767 if (op0 != CONST0_RTX (vmode))
21768 {
21769 x = gen_rtx_IOR (vmode, dest, op0);
21770 emit_insn (gen_rtx_SET (dest, x));
21771 }
21772 }
21773
21774 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21775 so we have to do two masks. */
21776
21777 void
21778 ix86_split_copysign_var (rtx operands[])
21779 {
21780 machine_mode mode, vmode;
21781 rtx dest, scratch, op0, op1, mask, nmask, x;
21782
21783 dest = operands[0];
21784 scratch = operands[1];
21785 op0 = operands[2];
21786 op1 = operands[3];
21787 nmask = operands[4];
21788 mask = operands[5];
21789
21790 mode = GET_MODE (dest);
21791 vmode = GET_MODE (mask);
21792
21793 if (rtx_equal_p (op0, op1))
21794 {
21795 /* Shouldn't happen often (it's useless, obviously), but when it does
21796 we'd generate incorrect code if we continue below. */
21797 emit_move_insn (dest, op0);
21798 return;
21799 }
21800
21801 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21802 {
21803 gcc_assert (REGNO (op1) == REGNO (scratch));
21804
21805 x = gen_rtx_AND (vmode, scratch, mask);
21806 emit_insn (gen_rtx_SET (scratch, x));
21807
21808 dest = mask;
21809 op0 = lowpart_subreg (vmode, op0, mode);
21810 x = gen_rtx_NOT (vmode, dest);
21811 x = gen_rtx_AND (vmode, x, op0);
21812 emit_insn (gen_rtx_SET (dest, x));
21813 }
21814 else
21815 {
21816 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21817 {
21818 x = gen_rtx_AND (vmode, scratch, mask);
21819 }
21820 else /* alternative 2,4 */
21821 {
21822 gcc_assert (REGNO (mask) == REGNO (scratch));
21823 op1 = lowpart_subreg (vmode, op1, mode);
21824 x = gen_rtx_AND (vmode, scratch, op1);
21825 }
21826 emit_insn (gen_rtx_SET (scratch, x));
21827
21828 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21829 {
21830 dest = lowpart_subreg (vmode, op0, mode);
21831 x = gen_rtx_AND (vmode, dest, nmask);
21832 }
21833 else /* alternative 3,4 */
21834 {
21835 gcc_assert (REGNO (nmask) == REGNO (dest));
21836 dest = nmask;
21837 op0 = lowpart_subreg (vmode, op0, mode);
21838 x = gen_rtx_AND (vmode, dest, op0);
21839 }
21840 emit_insn (gen_rtx_SET (dest, x));
21841 }
21842
21843 x = gen_rtx_IOR (vmode, dest, scratch);
21844 emit_insn (gen_rtx_SET (dest, x));
21845 }
21846
21847 /* Return TRUE or FALSE depending on whether the first SET in INSN
21848 has source and destination with matching CC modes, and that the
21849 CC mode is at least as constrained as REQ_MODE. */
21850
21851 bool
21852 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21853 {
21854 rtx set;
21855 machine_mode set_mode;
21856
21857 set = PATTERN (insn);
21858 if (GET_CODE (set) == PARALLEL)
21859 set = XVECEXP (set, 0, 0);
21860 gcc_assert (GET_CODE (set) == SET);
21861 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21862
21863 set_mode = GET_MODE (SET_DEST (set));
21864 switch (set_mode)
21865 {
21866 case CCNOmode:
21867 if (req_mode != CCNOmode
21868 && (req_mode != CCmode
21869 || XEXP (SET_SRC (set), 1) != const0_rtx))
21870 return false;
21871 break;
21872 case CCmode:
21873 if (req_mode == CCGCmode)
21874 return false;
21875 /* FALLTHRU */
21876 case CCGCmode:
21877 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21878 return false;
21879 /* FALLTHRU */
21880 case CCGOCmode:
21881 if (req_mode == CCZmode)
21882 return false;
21883 /* FALLTHRU */
21884 case CCZmode:
21885 break;
21886
21887 case CCAmode:
21888 case CCCmode:
21889 case CCOmode:
21890 case CCPmode:
21891 case CCSmode:
21892 if (set_mode != req_mode)
21893 return false;
21894 break;
21895
21896 default:
21897 gcc_unreachable ();
21898 }
21899
21900 return GET_MODE (SET_SRC (set)) == set_mode;
21901 }
21902
21903 /* Generate insn patterns to do an integer compare of OPERANDS. */
21904
21905 static rtx
21906 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21907 {
21908 machine_mode cmpmode;
21909 rtx tmp, flags;
21910
21911 cmpmode = SELECT_CC_MODE (code, op0, op1);
21912 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21913
21914 /* This is very simple, but making the interface the same as in the
21915 FP case makes the rest of the code easier. */
21916 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21917 emit_insn (gen_rtx_SET (flags, tmp));
21918
21919 /* Return the test that should be put into the flags user, i.e.
21920 the bcc, scc, or cmov instruction. */
21921 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21922 }
21923
21924 /* Figure out whether to use ordered or unordered fp comparisons.
21925 Return the appropriate mode to use. */
21926
21927 machine_mode
21928 ix86_fp_compare_mode (enum rtx_code)
21929 {
21930 /* ??? In order to make all comparisons reversible, we do all comparisons
21931 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21932 all forms trapping and nontrapping comparisons, we can make inequality
21933 comparisons trapping again, since it results in better code when using
21934 FCOM based compares. */
21935 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21936 }
21937
21938 machine_mode
21939 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21940 {
21941 machine_mode mode = GET_MODE (op0);
21942
21943 if (SCALAR_FLOAT_MODE_P (mode))
21944 {
21945 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21946 return ix86_fp_compare_mode (code);
21947 }
21948
21949 switch (code)
21950 {
21951 /* Only zero flag is needed. */
21952 case EQ: /* ZF=0 */
21953 case NE: /* ZF!=0 */
21954 return CCZmode;
21955 /* Codes needing carry flag. */
21956 case GEU: /* CF=0 */
21957 case LTU: /* CF=1 */
21958 /* Detect overflow checks. They need just the carry flag. */
21959 if (GET_CODE (op0) == PLUS
21960 && (rtx_equal_p (op1, XEXP (op0, 0))
21961 || rtx_equal_p (op1, XEXP (op0, 1))))
21962 return CCCmode;
21963 else
21964 return CCmode;
21965 case GTU: /* CF=0 & ZF=0 */
21966 case LEU: /* CF=1 | ZF=1 */
21967 return CCmode;
21968 /* Codes possibly doable only with sign flag when
21969 comparing against zero. */
21970 case GE: /* SF=OF or SF=0 */
21971 case LT: /* SF<>OF or SF=1 */
21972 if (op1 == const0_rtx)
21973 return CCGOCmode;
21974 else
21975 /* For other cases Carry flag is not required. */
21976 return CCGCmode;
21977 /* Codes doable only with sign flag when comparing
21978 against zero, but we miss jump instruction for it
21979 so we need to use relational tests against overflow
21980 that thus needs to be zero. */
21981 case GT: /* ZF=0 & SF=OF */
21982 case LE: /* ZF=1 | SF<>OF */
21983 if (op1 == const0_rtx)
21984 return CCNOmode;
21985 else
21986 return CCGCmode;
21987 /* strcmp pattern do (use flags) and combine may ask us for proper
21988 mode. */
21989 case USE:
21990 return CCmode;
21991 default:
21992 gcc_unreachable ();
21993 }
21994 }
21995
21996 /* Return the fixed registers used for condition codes. */
21997
21998 static bool
21999 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22000 {
22001 *p1 = FLAGS_REG;
22002 *p2 = FPSR_REG;
22003 return true;
22004 }
22005
22006 /* If two condition code modes are compatible, return a condition code
22007 mode which is compatible with both. Otherwise, return
22008 VOIDmode. */
22009
22010 static machine_mode
22011 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
22012 {
22013 if (m1 == m2)
22014 return m1;
22015
22016 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
22017 return VOIDmode;
22018
22019 if ((m1 == CCGCmode && m2 == CCGOCmode)
22020 || (m1 == CCGOCmode && m2 == CCGCmode))
22021 return CCGCmode;
22022
22023 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
22024 return m2;
22025 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
22026 return m1;
22027
22028 switch (m1)
22029 {
22030 default:
22031 gcc_unreachable ();
22032
22033 case CCmode:
22034 case CCGCmode:
22035 case CCGOCmode:
22036 case CCNOmode:
22037 case CCAmode:
22038 case CCCmode:
22039 case CCOmode:
22040 case CCPmode:
22041 case CCSmode:
22042 case CCZmode:
22043 switch (m2)
22044 {
22045 default:
22046 return VOIDmode;
22047
22048 case CCmode:
22049 case CCGCmode:
22050 case CCGOCmode:
22051 case CCNOmode:
22052 case CCAmode:
22053 case CCCmode:
22054 case CCOmode:
22055 case CCPmode:
22056 case CCSmode:
22057 case CCZmode:
22058 return CCmode;
22059 }
22060
22061 case CCFPmode:
22062 case CCFPUmode:
22063 /* These are only compatible with themselves, which we already
22064 checked above. */
22065 return VOIDmode;
22066 }
22067 }
22068
22069
22070 /* Return a comparison we can do and that it is equivalent to
22071 swap_condition (code) apart possibly from orderedness.
22072 But, never change orderedness if TARGET_IEEE_FP, returning
22073 UNKNOWN in that case if necessary. */
22074
22075 static enum rtx_code
22076 ix86_fp_swap_condition (enum rtx_code code)
22077 {
22078 switch (code)
22079 {
22080 case GT: /* GTU - CF=0 & ZF=0 */
22081 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
22082 case GE: /* GEU - CF=0 */
22083 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
22084 case UNLT: /* LTU - CF=1 */
22085 return TARGET_IEEE_FP ? UNKNOWN : GT;
22086 case UNLE: /* LEU - CF=1 | ZF=1 */
22087 return TARGET_IEEE_FP ? UNKNOWN : GE;
22088 default:
22089 return swap_condition (code);
22090 }
22091 }
22092
22093 /* Return cost of comparison CODE using the best strategy for performance.
22094 All following functions do use number of instructions as a cost metrics.
22095 In future this should be tweaked to compute bytes for optimize_size and
22096 take into account performance of various instructions on various CPUs. */
22097
22098 static int
22099 ix86_fp_comparison_cost (enum rtx_code code)
22100 {
22101 int arith_cost;
22102
22103 /* The cost of code using bit-twiddling on %ah. */
22104 switch (code)
22105 {
22106 case UNLE:
22107 case UNLT:
22108 case LTGT:
22109 case GT:
22110 case GE:
22111 case UNORDERED:
22112 case ORDERED:
22113 case UNEQ:
22114 arith_cost = 4;
22115 break;
22116 case LT:
22117 case NE:
22118 case EQ:
22119 case UNGE:
22120 arith_cost = TARGET_IEEE_FP ? 5 : 4;
22121 break;
22122 case LE:
22123 case UNGT:
22124 arith_cost = TARGET_IEEE_FP ? 6 : 4;
22125 break;
22126 default:
22127 gcc_unreachable ();
22128 }
22129
22130 switch (ix86_fp_comparison_strategy (code))
22131 {
22132 case IX86_FPCMP_COMI:
22133 return arith_cost > 4 ? 3 : 2;
22134 case IX86_FPCMP_SAHF:
22135 return arith_cost > 4 ? 4 : 3;
22136 default:
22137 return arith_cost;
22138 }
22139 }
22140
22141 /* Return strategy to use for floating-point. We assume that fcomi is always
22142 preferrable where available, since that is also true when looking at size
22143 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22144
22145 enum ix86_fpcmp_strategy
22146 ix86_fp_comparison_strategy (enum rtx_code)
22147 {
22148 /* Do fcomi/sahf based test when profitable. */
22149
22150 if (TARGET_CMOVE)
22151 return IX86_FPCMP_COMI;
22152
22153 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22154 return IX86_FPCMP_SAHF;
22155
22156 return IX86_FPCMP_ARITH;
22157 }
22158
22159 /* Swap, force into registers, or otherwise massage the two operands
22160 to a fp comparison. The operands are updated in place; the new
22161 comparison code is returned. */
22162
22163 static enum rtx_code
22164 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22165 {
22166 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
22167 rtx op0 = *pop0, op1 = *pop1;
22168 machine_mode op_mode = GET_MODE (op0);
22169 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22170
22171 /* All of the unordered compare instructions only work on registers.
22172 The same is true of the fcomi compare instructions. The XFmode
22173 compare instructions require registers except when comparing
22174 against zero or when converting operand 1 from fixed point to
22175 floating point. */
22176
22177 if (!is_sse
22178 && (fpcmp_mode == CCFPUmode
22179 || (op_mode == XFmode
22180 && ! (standard_80387_constant_p (op0) == 1
22181 || standard_80387_constant_p (op1) == 1)
22182 && GET_CODE (op1) != FLOAT)
22183 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22184 {
22185 op0 = force_reg (op_mode, op0);
22186 op1 = force_reg (op_mode, op1);
22187 }
22188 else
22189 {
22190 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22191 things around if they appear profitable, otherwise force op0
22192 into a register. */
22193
22194 if (standard_80387_constant_p (op0) == 0
22195 || (MEM_P (op0)
22196 && ! (standard_80387_constant_p (op1) == 0
22197 || MEM_P (op1))))
22198 {
22199 enum rtx_code new_code = ix86_fp_swap_condition (code);
22200 if (new_code != UNKNOWN)
22201 {
22202 std::swap (op0, op1);
22203 code = new_code;
22204 }
22205 }
22206
22207 if (!REG_P (op0))
22208 op0 = force_reg (op_mode, op0);
22209
22210 if (CONSTANT_P (op1))
22211 {
22212 int tmp = standard_80387_constant_p (op1);
22213 if (tmp == 0)
22214 op1 = validize_mem (force_const_mem (op_mode, op1));
22215 else if (tmp == 1)
22216 {
22217 if (TARGET_CMOVE)
22218 op1 = force_reg (op_mode, op1);
22219 }
22220 else
22221 op1 = force_reg (op_mode, op1);
22222 }
22223 }
22224
22225 /* Try to rearrange the comparison to make it cheaper. */
22226 if (ix86_fp_comparison_cost (code)
22227 > ix86_fp_comparison_cost (swap_condition (code))
22228 && (REG_P (op1) || can_create_pseudo_p ()))
22229 {
22230 std::swap (op0, op1);
22231 code = swap_condition (code);
22232 if (!REG_P (op0))
22233 op0 = force_reg (op_mode, op0);
22234 }
22235
22236 *pop0 = op0;
22237 *pop1 = op1;
22238 return code;
22239 }
22240
22241 /* Convert comparison codes we use to represent FP comparison to integer
22242 code that will result in proper branch. Return UNKNOWN if no such code
22243 is available. */
22244
22245 enum rtx_code
22246 ix86_fp_compare_code_to_integer (enum rtx_code code)
22247 {
22248 switch (code)
22249 {
22250 case GT:
22251 return GTU;
22252 case GE:
22253 return GEU;
22254 case ORDERED:
22255 case UNORDERED:
22256 return code;
22257 break;
22258 case UNEQ:
22259 return EQ;
22260 break;
22261 case UNLT:
22262 return LTU;
22263 break;
22264 case UNLE:
22265 return LEU;
22266 break;
22267 case LTGT:
22268 return NE;
22269 break;
22270 default:
22271 return UNKNOWN;
22272 }
22273 }
22274
22275 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22276
22277 static rtx
22278 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22279 {
22280 machine_mode fpcmp_mode, intcmp_mode;
22281 rtx tmp, tmp2;
22282
22283 fpcmp_mode = ix86_fp_compare_mode (code);
22284 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22285
22286 /* Do fcomi/sahf based test when profitable. */
22287 switch (ix86_fp_comparison_strategy (code))
22288 {
22289 case IX86_FPCMP_COMI:
22290 intcmp_mode = fpcmp_mode;
22291 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
22292 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
22293 emit_insn (tmp);
22294 break;
22295
22296 case IX86_FPCMP_SAHF:
22297 intcmp_mode = fpcmp_mode;
22298 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
22299 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
22300
22301 if (!scratch)
22302 scratch = gen_reg_rtx (HImode);
22303 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22304 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22305 break;
22306
22307 case IX86_FPCMP_ARITH:
22308 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22309 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
22310 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22311 if (!scratch)
22312 scratch = gen_reg_rtx (HImode);
22313 emit_insn (gen_rtx_SET (scratch, tmp2));
22314
22315 /* In the unordered case, we have to check C2 for NaN's, which
22316 doesn't happen to work out to anything nice combination-wise.
22317 So do some bit twiddling on the value we've got in AH to come
22318 up with an appropriate set of condition codes. */
22319
22320 intcmp_mode = CCNOmode;
22321 switch (code)
22322 {
22323 case GT:
22324 case UNGT:
22325 if (code == GT || !TARGET_IEEE_FP)
22326 {
22327 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
22328 code = EQ;
22329 }
22330 else
22331 {
22332 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
22333 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22334 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22335 intcmp_mode = CCmode;
22336 code = GEU;
22337 }
22338 break;
22339 case LT:
22340 case UNLT:
22341 if (code == LT && TARGET_IEEE_FP)
22342 {
22343 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
22344 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22345 intcmp_mode = CCmode;
22346 code = EQ;
22347 }
22348 else
22349 {
22350 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
22351 code = NE;
22352 }
22353 break;
22354 case GE:
22355 case UNGE:
22356 if (code == GE || !TARGET_IEEE_FP)
22357 {
22358 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
22359 code = EQ;
22360 }
22361 else
22362 {
22363 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
22364 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
22365 code = NE;
22366 }
22367 break;
22368 case LE:
22369 case UNLE:
22370 if (code == LE && TARGET_IEEE_FP)
22371 {
22372 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
22373 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22374 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22375 intcmp_mode = CCmode;
22376 code = LTU;
22377 }
22378 else
22379 {
22380 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
22381 code = NE;
22382 }
22383 break;
22384 case EQ:
22385 case UNEQ:
22386 if (code == EQ && TARGET_IEEE_FP)
22387 {
22388 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
22389 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22390 intcmp_mode = CCmode;
22391 code = EQ;
22392 }
22393 else
22394 {
22395 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
22396 code = NE;
22397 }
22398 break;
22399 case NE:
22400 case LTGT:
22401 if (code == NE && TARGET_IEEE_FP)
22402 {
22403 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
22404 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
22405 GEN_INT (0x40)));
22406 code = NE;
22407 }
22408 else
22409 {
22410 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
22411 code = EQ;
22412 }
22413 break;
22414
22415 case UNORDERED:
22416 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
22417 code = NE;
22418 break;
22419 case ORDERED:
22420 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
22421 code = EQ;
22422 break;
22423
22424 default:
22425 gcc_unreachable ();
22426 }
22427 break;
22428
22429 default:
22430 gcc_unreachable();
22431 }
22432
22433 /* Return the test that should be put into the flags user, i.e.
22434 the bcc, scc, or cmov instruction. */
22435 return gen_rtx_fmt_ee (code, VOIDmode,
22436 gen_rtx_REG (intcmp_mode, FLAGS_REG),
22437 const0_rtx);
22438 }
22439
22440 static rtx
22441 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
22442 {
22443 rtx ret;
22444
22445 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
22446 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
22447
22448 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
22449 {
22450 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
22451 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22452 }
22453 else
22454 ret = ix86_expand_int_compare (code, op0, op1);
22455
22456 return ret;
22457 }
22458
22459 void
22460 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
22461 {
22462 machine_mode mode = GET_MODE (op0);
22463 rtx tmp;
22464
22465 /* Handle special case - vector comparsion with boolean result, transform
22466 it using ptest instruction. */
22467 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22468 {
22469 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
22470 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
22471
22472 gcc_assert (code == EQ || code == NE);
22473 /* Generate XOR since we can't check that one operand is zero vector. */
22474 tmp = gen_reg_rtx (mode);
22475 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
22476 tmp = gen_lowpart (p_mode, tmp);
22477 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
22478 gen_rtx_UNSPEC (CCmode,
22479 gen_rtvec (2, tmp, tmp),
22480 UNSPEC_PTEST)));
22481 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
22482 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22483 gen_rtx_LABEL_REF (VOIDmode, label),
22484 pc_rtx);
22485 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22486 return;
22487 }
22488
22489 switch (mode)
22490 {
22491 case SFmode:
22492 case DFmode:
22493 case XFmode:
22494 case QImode:
22495 case HImode:
22496 case SImode:
22497 simple:
22498 tmp = ix86_expand_compare (code, op0, op1);
22499 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22500 gen_rtx_LABEL_REF (VOIDmode, label),
22501 pc_rtx);
22502 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22503 return;
22504
22505 case DImode:
22506 if (TARGET_64BIT)
22507 goto simple;
22508 /* For 32-bit target DI comparison may be performed on
22509 SSE registers. To allow this we should avoid split
22510 to SI mode which is achieved by doing xor in DI mode
22511 and then comparing with zero (which is recognized by
22512 STV pass). We don't compare using xor when optimizing
22513 for size. */
22514 if (!optimize_insn_for_size_p ()
22515 && TARGET_STV
22516 && (code == EQ || code == NE))
22517 {
22518 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
22519 op1 = const0_rtx;
22520 }
22521 /* FALLTHRU */
22522 case TImode:
22523 /* Expand DImode branch into multiple compare+branch. */
22524 {
22525 rtx lo[2], hi[2];
22526 rtx_code_label *label2;
22527 enum rtx_code code1, code2, code3;
22528 machine_mode submode;
22529
22530 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
22531 {
22532 std::swap (op0, op1);
22533 code = swap_condition (code);
22534 }
22535
22536 split_double_mode (mode, &op0, 1, lo+0, hi+0);
22537 split_double_mode (mode, &op1, 1, lo+1, hi+1);
22538
22539 submode = mode == DImode ? SImode : DImode;
22540
22541 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22542 avoid two branches. This costs one extra insn, so disable when
22543 optimizing for size. */
22544
22545 if ((code == EQ || code == NE)
22546 && (!optimize_insn_for_size_p ()
22547 || hi[1] == const0_rtx || lo[1] == const0_rtx))
22548 {
22549 rtx xor0, xor1;
22550
22551 xor1 = hi[0];
22552 if (hi[1] != const0_rtx)
22553 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
22554 NULL_RTX, 0, OPTAB_WIDEN);
22555
22556 xor0 = lo[0];
22557 if (lo[1] != const0_rtx)
22558 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
22559 NULL_RTX, 0, OPTAB_WIDEN);
22560
22561 tmp = expand_binop (submode, ior_optab, xor1, xor0,
22562 NULL_RTX, 0, OPTAB_WIDEN);
22563
22564 ix86_expand_branch (code, tmp, const0_rtx, label);
22565 return;
22566 }
22567
22568 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22569 op1 is a constant and the low word is zero, then we can just
22570 examine the high word. Similarly for low word -1 and
22571 less-or-equal-than or greater-than. */
22572
22573 if (CONST_INT_P (hi[1]))
22574 switch (code)
22575 {
22576 case LT: case LTU: case GE: case GEU:
22577 if (lo[1] == const0_rtx)
22578 {
22579 ix86_expand_branch (code, hi[0], hi[1], label);
22580 return;
22581 }
22582 break;
22583 case LE: case LEU: case GT: case GTU:
22584 if (lo[1] == constm1_rtx)
22585 {
22586 ix86_expand_branch (code, hi[0], hi[1], label);
22587 return;
22588 }
22589 break;
22590 default:
22591 break;
22592 }
22593
22594 /* Otherwise, we need two or three jumps. */
22595
22596 label2 = gen_label_rtx ();
22597
22598 code1 = code;
22599 code2 = swap_condition (code);
22600 code3 = unsigned_condition (code);
22601
22602 switch (code)
22603 {
22604 case LT: case GT: case LTU: case GTU:
22605 break;
22606
22607 case LE: code1 = LT; code2 = GT; break;
22608 case GE: code1 = GT; code2 = LT; break;
22609 case LEU: code1 = LTU; code2 = GTU; break;
22610 case GEU: code1 = GTU; code2 = LTU; break;
22611
22612 case EQ: code1 = UNKNOWN; code2 = NE; break;
22613 case NE: code2 = UNKNOWN; break;
22614
22615 default:
22616 gcc_unreachable ();
22617 }
22618
22619 /*
22620 * a < b =>
22621 * if (hi(a) < hi(b)) goto true;
22622 * if (hi(a) > hi(b)) goto false;
22623 * if (lo(a) < lo(b)) goto true;
22624 * false:
22625 */
22626
22627 if (code1 != UNKNOWN)
22628 ix86_expand_branch (code1, hi[0], hi[1], label);
22629 if (code2 != UNKNOWN)
22630 ix86_expand_branch (code2, hi[0], hi[1], label2);
22631
22632 ix86_expand_branch (code3, lo[0], lo[1], label);
22633
22634 if (code2 != UNKNOWN)
22635 emit_label (label2);
22636 return;
22637 }
22638
22639 default:
22640 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
22641 goto simple;
22642 }
22643 }
22644
22645 /* Split branch based on floating point condition. */
22646 void
22647 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
22648 rtx target1, rtx target2, rtx tmp)
22649 {
22650 rtx condition;
22651 rtx i;
22652
22653 if (target2 != pc_rtx)
22654 {
22655 std::swap (target1, target2);
22656 code = reverse_condition_maybe_unordered (code);
22657 }
22658
22659 condition = ix86_expand_fp_compare (code, op1, op2,
22660 tmp);
22661
22662 i = emit_jump_insn (gen_rtx_SET
22663 (pc_rtx,
22664 gen_rtx_IF_THEN_ELSE (VOIDmode,
22665 condition, target1, target2)));
22666 if (split_branch_probability >= 0)
22667 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
22668 }
22669
22670 void
22671 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22672 {
22673 rtx ret;
22674
22675 gcc_assert (GET_MODE (dest) == QImode);
22676
22677 ret = ix86_expand_compare (code, op0, op1);
22678 PUT_MODE (ret, QImode);
22679 emit_insn (gen_rtx_SET (dest, ret));
22680 }
22681
22682 /* Expand comparison setting or clearing carry flag. Return true when
22683 successful and set pop for the operation. */
22684 static bool
22685 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
22686 {
22687 machine_mode mode =
22688 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
22689
22690 /* Do not handle double-mode compares that go through special path. */
22691 if (mode == (TARGET_64BIT ? TImode : DImode))
22692 return false;
22693
22694 if (SCALAR_FLOAT_MODE_P (mode))
22695 {
22696 rtx compare_op;
22697 rtx_insn *compare_seq;
22698
22699 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22700
22701 /* Shortcut: following common codes never translate
22702 into carry flag compares. */
22703 if (code == EQ || code == NE || code == UNEQ || code == LTGT
22704 || code == ORDERED || code == UNORDERED)
22705 return false;
22706
22707 /* These comparisons require zero flag; swap operands so they won't. */
22708 if ((code == GT || code == UNLE || code == LE || code == UNGT)
22709 && !TARGET_IEEE_FP)
22710 {
22711 std::swap (op0, op1);
22712 code = swap_condition (code);
22713 }
22714
22715 /* Try to expand the comparison and verify that we end up with
22716 carry flag based comparison. This fails to be true only when
22717 we decide to expand comparison using arithmetic that is not
22718 too common scenario. */
22719 start_sequence ();
22720 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22721 compare_seq = get_insns ();
22722 end_sequence ();
22723
22724 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
22725 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
22726 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
22727 else
22728 code = GET_CODE (compare_op);
22729
22730 if (code != LTU && code != GEU)
22731 return false;
22732
22733 emit_insn (compare_seq);
22734 *pop = compare_op;
22735 return true;
22736 }
22737
22738 if (!INTEGRAL_MODE_P (mode))
22739 return false;
22740
22741 switch (code)
22742 {
22743 case LTU:
22744 case GEU:
22745 break;
22746
22747 /* Convert a==0 into (unsigned)a<1. */
22748 case EQ:
22749 case NE:
22750 if (op1 != const0_rtx)
22751 return false;
22752 op1 = const1_rtx;
22753 code = (code == EQ ? LTU : GEU);
22754 break;
22755
22756 /* Convert a>b into b<a or a>=b-1. */
22757 case GTU:
22758 case LEU:
22759 if (CONST_INT_P (op1))
22760 {
22761 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22762 /* Bail out on overflow. We still can swap operands but that
22763 would force loading of the constant into register. */
22764 if (op1 == const0_rtx
22765 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22766 return false;
22767 code = (code == GTU ? GEU : LTU);
22768 }
22769 else
22770 {
22771 std::swap (op0, op1);
22772 code = (code == GTU ? LTU : GEU);
22773 }
22774 break;
22775
22776 /* Convert a>=0 into (unsigned)a<0x80000000. */
22777 case LT:
22778 case GE:
22779 if (mode == DImode || op1 != const0_rtx)
22780 return false;
22781 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22782 code = (code == LT ? GEU : LTU);
22783 break;
22784 case LE:
22785 case GT:
22786 if (mode == DImode || op1 != constm1_rtx)
22787 return false;
22788 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22789 code = (code == LE ? GEU : LTU);
22790 break;
22791
22792 default:
22793 return false;
22794 }
22795 /* Swapping operands may cause constant to appear as first operand. */
22796 if (!nonimmediate_operand (op0, VOIDmode))
22797 {
22798 if (!can_create_pseudo_p ())
22799 return false;
22800 op0 = force_reg (mode, op0);
22801 }
22802 *pop = ix86_expand_compare (code, op0, op1);
22803 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22804 return true;
22805 }
22806
22807 bool
22808 ix86_expand_int_movcc (rtx operands[])
22809 {
22810 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22811 rtx_insn *compare_seq;
22812 rtx compare_op;
22813 machine_mode mode = GET_MODE (operands[0]);
22814 bool sign_bit_compare_p = false;
22815 rtx op0 = XEXP (operands[1], 0);
22816 rtx op1 = XEXP (operands[1], 1);
22817
22818 if (GET_MODE (op0) == TImode
22819 || (GET_MODE (op0) == DImode
22820 && !TARGET_64BIT))
22821 return false;
22822
22823 start_sequence ();
22824 compare_op = ix86_expand_compare (code, op0, op1);
22825 compare_seq = get_insns ();
22826 end_sequence ();
22827
22828 compare_code = GET_CODE (compare_op);
22829
22830 if ((op1 == const0_rtx && (code == GE || code == LT))
22831 || (op1 == constm1_rtx && (code == GT || code == LE)))
22832 sign_bit_compare_p = true;
22833
22834 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22835 HImode insns, we'd be swallowed in word prefix ops. */
22836
22837 if ((mode != HImode || TARGET_FAST_PREFIX)
22838 && (mode != (TARGET_64BIT ? TImode : DImode))
22839 && CONST_INT_P (operands[2])
22840 && CONST_INT_P (operands[3]))
22841 {
22842 rtx out = operands[0];
22843 HOST_WIDE_INT ct = INTVAL (operands[2]);
22844 HOST_WIDE_INT cf = INTVAL (operands[3]);
22845 HOST_WIDE_INT diff;
22846
22847 diff = ct - cf;
22848 /* Sign bit compares are better done using shifts than we do by using
22849 sbb. */
22850 if (sign_bit_compare_p
22851 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22852 {
22853 /* Detect overlap between destination and compare sources. */
22854 rtx tmp = out;
22855
22856 if (!sign_bit_compare_p)
22857 {
22858 rtx flags;
22859 bool fpcmp = false;
22860
22861 compare_code = GET_CODE (compare_op);
22862
22863 flags = XEXP (compare_op, 0);
22864
22865 if (GET_MODE (flags) == CCFPmode
22866 || GET_MODE (flags) == CCFPUmode)
22867 {
22868 fpcmp = true;
22869 compare_code
22870 = ix86_fp_compare_code_to_integer (compare_code);
22871 }
22872
22873 /* To simplify rest of code, restrict to the GEU case. */
22874 if (compare_code == LTU)
22875 {
22876 std::swap (ct, cf);
22877 compare_code = reverse_condition (compare_code);
22878 code = reverse_condition (code);
22879 }
22880 else
22881 {
22882 if (fpcmp)
22883 PUT_CODE (compare_op,
22884 reverse_condition_maybe_unordered
22885 (GET_CODE (compare_op)));
22886 else
22887 PUT_CODE (compare_op,
22888 reverse_condition (GET_CODE (compare_op)));
22889 }
22890 diff = ct - cf;
22891
22892 if (reg_overlap_mentioned_p (out, op0)
22893 || reg_overlap_mentioned_p (out, op1))
22894 tmp = gen_reg_rtx (mode);
22895
22896 if (mode == DImode)
22897 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22898 else
22899 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22900 flags, compare_op));
22901 }
22902 else
22903 {
22904 if (code == GT || code == GE)
22905 code = reverse_condition (code);
22906 else
22907 {
22908 std::swap (ct, cf);
22909 diff = ct - cf;
22910 }
22911 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22912 }
22913
22914 if (diff == 1)
22915 {
22916 /*
22917 * cmpl op0,op1
22918 * sbbl dest,dest
22919 * [addl dest, ct]
22920 *
22921 * Size 5 - 8.
22922 */
22923 if (ct)
22924 tmp = expand_simple_binop (mode, PLUS,
22925 tmp, GEN_INT (ct),
22926 copy_rtx (tmp), 1, OPTAB_DIRECT);
22927 }
22928 else if (cf == -1)
22929 {
22930 /*
22931 * cmpl op0,op1
22932 * sbbl dest,dest
22933 * orl $ct, dest
22934 *
22935 * Size 8.
22936 */
22937 tmp = expand_simple_binop (mode, IOR,
22938 tmp, GEN_INT (ct),
22939 copy_rtx (tmp), 1, OPTAB_DIRECT);
22940 }
22941 else if (diff == -1 && ct)
22942 {
22943 /*
22944 * cmpl op0,op1
22945 * sbbl dest,dest
22946 * notl dest
22947 * [addl dest, cf]
22948 *
22949 * Size 8 - 11.
22950 */
22951 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22952 if (cf)
22953 tmp = expand_simple_binop (mode, PLUS,
22954 copy_rtx (tmp), GEN_INT (cf),
22955 copy_rtx (tmp), 1, OPTAB_DIRECT);
22956 }
22957 else
22958 {
22959 /*
22960 * cmpl op0,op1
22961 * sbbl dest,dest
22962 * [notl dest]
22963 * andl cf - ct, dest
22964 * [addl dest, ct]
22965 *
22966 * Size 8 - 11.
22967 */
22968
22969 if (cf == 0)
22970 {
22971 cf = ct;
22972 ct = 0;
22973 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22974 }
22975
22976 tmp = expand_simple_binop (mode, AND,
22977 copy_rtx (tmp),
22978 gen_int_mode (cf - ct, mode),
22979 copy_rtx (tmp), 1, OPTAB_DIRECT);
22980 if (ct)
22981 tmp = expand_simple_binop (mode, PLUS,
22982 copy_rtx (tmp), GEN_INT (ct),
22983 copy_rtx (tmp), 1, OPTAB_DIRECT);
22984 }
22985
22986 if (!rtx_equal_p (tmp, out))
22987 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22988
22989 return true;
22990 }
22991
22992 if (diff < 0)
22993 {
22994 machine_mode cmp_mode = GET_MODE (op0);
22995 enum rtx_code new_code;
22996
22997 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22998 {
22999 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23000
23001 /* We may be reversing unordered compare to normal compare, that
23002 is not valid in general (we may convert non-trapping condition
23003 to trapping one), however on i386 we currently emit all
23004 comparisons unordered. */
23005 new_code = reverse_condition_maybe_unordered (code);
23006 }
23007 else
23008 new_code = ix86_reverse_condition (code, cmp_mode);
23009 if (new_code != UNKNOWN)
23010 {
23011 std::swap (ct, cf);
23012 diff = -diff;
23013 code = new_code;
23014 }
23015 }
23016
23017 compare_code = UNKNOWN;
23018 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
23019 && CONST_INT_P (op1))
23020 {
23021 if (op1 == const0_rtx
23022 && (code == LT || code == GE))
23023 compare_code = code;
23024 else if (op1 == constm1_rtx)
23025 {
23026 if (code == LE)
23027 compare_code = LT;
23028 else if (code == GT)
23029 compare_code = GE;
23030 }
23031 }
23032
23033 /* Optimize dest = (op0 < 0) ? -1 : cf. */
23034 if (compare_code != UNKNOWN
23035 && GET_MODE (op0) == GET_MODE (out)
23036 && (cf == -1 || ct == -1))
23037 {
23038 /* If lea code below could be used, only optimize
23039 if it results in a 2 insn sequence. */
23040
23041 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
23042 || diff == 3 || diff == 5 || diff == 9)
23043 || (compare_code == LT && ct == -1)
23044 || (compare_code == GE && cf == -1))
23045 {
23046 /*
23047 * notl op1 (if necessary)
23048 * sarl $31, op1
23049 * orl cf, op1
23050 */
23051 if (ct != -1)
23052 {
23053 cf = ct;
23054 ct = -1;
23055 code = reverse_condition (code);
23056 }
23057
23058 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23059
23060 out = expand_simple_binop (mode, IOR,
23061 out, GEN_INT (cf),
23062 out, 1, OPTAB_DIRECT);
23063 if (out != operands[0])
23064 emit_move_insn (operands[0], out);
23065
23066 return true;
23067 }
23068 }
23069
23070
23071 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
23072 || diff == 3 || diff == 5 || diff == 9)
23073 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
23074 && (mode != DImode
23075 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
23076 {
23077 /*
23078 * xorl dest,dest
23079 * cmpl op1,op2
23080 * setcc dest
23081 * lea cf(dest*(ct-cf)),dest
23082 *
23083 * Size 14.
23084 *
23085 * This also catches the degenerate setcc-only case.
23086 */
23087
23088 rtx tmp;
23089 int nops;
23090
23091 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23092
23093 nops = 0;
23094 /* On x86_64 the lea instruction operates on Pmode, so we need
23095 to get arithmetics done in proper mode to match. */
23096 if (diff == 1)
23097 tmp = copy_rtx (out);
23098 else
23099 {
23100 rtx out1;
23101 out1 = copy_rtx (out);
23102 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
23103 nops++;
23104 if (diff & 1)
23105 {
23106 tmp = gen_rtx_PLUS (mode, tmp, out1);
23107 nops++;
23108 }
23109 }
23110 if (cf != 0)
23111 {
23112 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23113 nops++;
23114 }
23115 if (!rtx_equal_p (tmp, out))
23116 {
23117 if (nops == 1)
23118 out = force_operand (tmp, copy_rtx (out));
23119 else
23120 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23121 }
23122 if (!rtx_equal_p (out, operands[0]))
23123 emit_move_insn (operands[0], copy_rtx (out));
23124
23125 return true;
23126 }
23127
23128 /*
23129 * General case: Jumpful:
23130 * xorl dest,dest cmpl op1, op2
23131 * cmpl op1, op2 movl ct, dest
23132 * setcc dest jcc 1f
23133 * decl dest movl cf, dest
23134 * andl (cf-ct),dest 1:
23135 * addl ct,dest
23136 *
23137 * Size 20. Size 14.
23138 *
23139 * This is reasonably steep, but branch mispredict costs are
23140 * high on modern cpus, so consider failing only if optimizing
23141 * for space.
23142 */
23143
23144 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23145 && BRANCH_COST (optimize_insn_for_speed_p (),
23146 false) >= 2)
23147 {
23148 if (cf == 0)
23149 {
23150 machine_mode cmp_mode = GET_MODE (op0);
23151 enum rtx_code new_code;
23152
23153 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23154 {
23155 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23156
23157 /* We may be reversing unordered compare to normal compare,
23158 that is not valid in general (we may convert non-trapping
23159 condition to trapping one), however on i386 we currently
23160 emit all comparisons unordered. */
23161 new_code = reverse_condition_maybe_unordered (code);
23162 }
23163 else
23164 {
23165 new_code = ix86_reverse_condition (code, cmp_mode);
23166 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23167 compare_code = reverse_condition (compare_code);
23168 }
23169
23170 if (new_code != UNKNOWN)
23171 {
23172 cf = ct;
23173 ct = 0;
23174 code = new_code;
23175 }
23176 }
23177
23178 if (compare_code != UNKNOWN)
23179 {
23180 /* notl op1 (if needed)
23181 sarl $31, op1
23182 andl (cf-ct), op1
23183 addl ct, op1
23184
23185 For x < 0 (resp. x <= -1) there will be no notl,
23186 so if possible swap the constants to get rid of the
23187 complement.
23188 True/false will be -1/0 while code below (store flag
23189 followed by decrement) is 0/-1, so the constants need
23190 to be exchanged once more. */
23191
23192 if (compare_code == GE || !cf)
23193 {
23194 code = reverse_condition (code);
23195 compare_code = LT;
23196 }
23197 else
23198 std::swap (ct, cf);
23199
23200 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23201 }
23202 else
23203 {
23204 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23205
23206 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23207 constm1_rtx,
23208 copy_rtx (out), 1, OPTAB_DIRECT);
23209 }
23210
23211 out = expand_simple_binop (mode, AND, copy_rtx (out),
23212 gen_int_mode (cf - ct, mode),
23213 copy_rtx (out), 1, OPTAB_DIRECT);
23214 if (ct)
23215 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23216 copy_rtx (out), 1, OPTAB_DIRECT);
23217 if (!rtx_equal_p (out, operands[0]))
23218 emit_move_insn (operands[0], copy_rtx (out));
23219
23220 return true;
23221 }
23222 }
23223
23224 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23225 {
23226 /* Try a few things more with specific constants and a variable. */
23227
23228 optab op;
23229 rtx var, orig_out, out, tmp;
23230
23231 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23232 return false;
23233
23234 /* If one of the two operands is an interesting constant, load a
23235 constant with the above and mask it in with a logical operation. */
23236
23237 if (CONST_INT_P (operands[2]))
23238 {
23239 var = operands[3];
23240 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23241 operands[3] = constm1_rtx, op = and_optab;
23242 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23243 operands[3] = const0_rtx, op = ior_optab;
23244 else
23245 return false;
23246 }
23247 else if (CONST_INT_P (operands[3]))
23248 {
23249 var = operands[2];
23250 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23251 operands[2] = constm1_rtx, op = and_optab;
23252 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23253 operands[2] = const0_rtx, op = ior_optab;
23254 else
23255 return false;
23256 }
23257 else
23258 return false;
23259
23260 orig_out = operands[0];
23261 tmp = gen_reg_rtx (mode);
23262 operands[0] = tmp;
23263
23264 /* Recurse to get the constant loaded. */
23265 if (!ix86_expand_int_movcc (operands))
23266 return false;
23267
23268 /* Mask in the interesting variable. */
23269 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23270 OPTAB_WIDEN);
23271 if (!rtx_equal_p (out, orig_out))
23272 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23273
23274 return true;
23275 }
23276
23277 /*
23278 * For comparison with above,
23279 *
23280 * movl cf,dest
23281 * movl ct,tmp
23282 * cmpl op1,op2
23283 * cmovcc tmp,dest
23284 *
23285 * Size 15.
23286 */
23287
23288 if (! nonimmediate_operand (operands[2], mode))
23289 operands[2] = force_reg (mode, operands[2]);
23290 if (! nonimmediate_operand (operands[3], mode))
23291 operands[3] = force_reg (mode, operands[3]);
23292
23293 if (! register_operand (operands[2], VOIDmode)
23294 && (mode == QImode
23295 || ! register_operand (operands[3], VOIDmode)))
23296 operands[2] = force_reg (mode, operands[2]);
23297
23298 if (mode == QImode
23299 && ! register_operand (operands[3], VOIDmode))
23300 operands[3] = force_reg (mode, operands[3]);
23301
23302 emit_insn (compare_seq);
23303 emit_insn (gen_rtx_SET (operands[0],
23304 gen_rtx_IF_THEN_ELSE (mode,
23305 compare_op, operands[2],
23306 operands[3])));
23307 return true;
23308 }
23309
23310 /* Swap, force into registers, or otherwise massage the two operands
23311 to an sse comparison with a mask result. Thus we differ a bit from
23312 ix86_prepare_fp_compare_args which expects to produce a flags result.
23313
23314 The DEST operand exists to help determine whether to commute commutative
23315 operators. The POP0/POP1 operands are updated in place. The new
23316 comparison code is returned, or UNKNOWN if not implementable. */
23317
23318 static enum rtx_code
23319 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23320 rtx *pop0, rtx *pop1)
23321 {
23322 switch (code)
23323 {
23324 case LTGT:
23325 case UNEQ:
23326 /* AVX supports all the needed comparisons. */
23327 if (TARGET_AVX)
23328 break;
23329 /* We have no LTGT as an operator. We could implement it with
23330 NE & ORDERED, but this requires an extra temporary. It's
23331 not clear that it's worth it. */
23332 return UNKNOWN;
23333
23334 case LT:
23335 case LE:
23336 case UNGT:
23337 case UNGE:
23338 /* These are supported directly. */
23339 break;
23340
23341 case EQ:
23342 case NE:
23343 case UNORDERED:
23344 case ORDERED:
23345 /* AVX has 3 operand comparisons, no need to swap anything. */
23346 if (TARGET_AVX)
23347 break;
23348 /* For commutative operators, try to canonicalize the destination
23349 operand to be first in the comparison - this helps reload to
23350 avoid extra moves. */
23351 if (!dest || !rtx_equal_p (dest, *pop1))
23352 break;
23353 /* FALLTHRU */
23354
23355 case GE:
23356 case GT:
23357 case UNLE:
23358 case UNLT:
23359 /* These are not supported directly before AVX, and furthermore
23360 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23361 comparison operands to transform into something that is
23362 supported. */
23363 std::swap (*pop0, *pop1);
23364 code = swap_condition (code);
23365 break;
23366
23367 default:
23368 gcc_unreachable ();
23369 }
23370
23371 return code;
23372 }
23373
23374 /* Detect conditional moves that exactly match min/max operational
23375 semantics. Note that this is IEEE safe, as long as we don't
23376 interchange the operands.
23377
23378 Returns FALSE if this conditional move doesn't match a MIN/MAX,
23379 and TRUE if the operation is successful and instructions are emitted. */
23380
23381 static bool
23382 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
23383 rtx cmp_op1, rtx if_true, rtx if_false)
23384 {
23385 machine_mode mode;
23386 bool is_min;
23387 rtx tmp;
23388
23389 if (code == LT)
23390 ;
23391 else if (code == UNGE)
23392 std::swap (if_true, if_false);
23393 else
23394 return false;
23395
23396 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
23397 is_min = true;
23398 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
23399 is_min = false;
23400 else
23401 return false;
23402
23403 mode = GET_MODE (dest);
23404
23405 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23406 but MODE may be a vector mode and thus not appropriate. */
23407 if (!flag_finite_math_only || flag_signed_zeros)
23408 {
23409 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23410 rtvec v;
23411
23412 if_true = force_reg (mode, if_true);
23413 v = gen_rtvec (2, if_true, if_false);
23414 tmp = gen_rtx_UNSPEC (mode, v, u);
23415 }
23416 else
23417 {
23418 code = is_min ? SMIN : SMAX;
23419 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
23420 }
23421
23422 emit_insn (gen_rtx_SET (dest, tmp));
23423 return true;
23424 }
23425
23426 /* Expand an sse vector comparison. Return the register with the result. */
23427
23428 static rtx
23429 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
23430 rtx op_true, rtx op_false)
23431 {
23432 machine_mode mode = GET_MODE (dest);
23433 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
23434
23435 /* In general case result of comparison can differ from operands' type. */
23436 machine_mode cmp_mode;
23437
23438 /* In AVX512F the result of comparison is an integer mask. */
23439 bool maskcmp = false;
23440 rtx x;
23441
23442 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
23443 {
23444 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
23445 gcc_assert (cmp_mode != BLKmode);
23446
23447 maskcmp = true;
23448 }
23449 else
23450 cmp_mode = cmp_ops_mode;
23451
23452
23453 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
23454 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
23455 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
23456
23457 if (optimize
23458 || (op_true && reg_overlap_mentioned_p (dest, op_true))
23459 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
23460 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
23461
23462 /* Compare patterns for int modes are unspec in AVX512F only. */
23463 if (maskcmp && (code == GT || code == EQ))
23464 {
23465 rtx (*gen)(rtx, rtx, rtx);
23466
23467 switch (cmp_ops_mode)
23468 {
23469 case V64QImode:
23470 gcc_assert (TARGET_AVX512BW);
23471 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
23472 break;
23473 case V32HImode:
23474 gcc_assert (TARGET_AVX512BW);
23475 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
23476 break;
23477 case V16SImode:
23478 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
23479 break;
23480 case V8DImode:
23481 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
23482 break;
23483 default:
23484 gen = NULL;
23485 }
23486
23487 if (gen)
23488 {
23489 emit_insn (gen (dest, cmp_op0, cmp_op1));
23490 return dest;
23491 }
23492 }
23493 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
23494
23495 if (cmp_mode != mode && !maskcmp)
23496 {
23497 x = force_reg (cmp_ops_mode, x);
23498 convert_move (dest, x, false);
23499 }
23500 else
23501 emit_insn (gen_rtx_SET (dest, x));
23502
23503 return dest;
23504 }
23505
23506 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23507 operations. This is used for both scalar and vector conditional moves. */
23508
23509 void
23510 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
23511 {
23512 machine_mode mode = GET_MODE (dest);
23513 machine_mode cmpmode = GET_MODE (cmp);
23514
23515 /* In AVX512F the result of comparison is an integer mask. */
23516 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
23517
23518 rtx t2, t3, x;
23519
23520 /* If we have an integer mask and FP value then we need
23521 to cast mask to FP mode. */
23522 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
23523 {
23524 cmp = force_reg (cmpmode, cmp);
23525 cmp = gen_rtx_SUBREG (mode, cmp, 0);
23526 }
23527
23528 if (vector_all_ones_operand (op_true, mode)
23529 && rtx_equal_p (op_false, CONST0_RTX (mode))
23530 && !maskcmp)
23531 {
23532 emit_insn (gen_rtx_SET (dest, cmp));
23533 }
23534 else if (op_false == CONST0_RTX (mode)
23535 && !maskcmp)
23536 {
23537 op_true = force_reg (mode, op_true);
23538 x = gen_rtx_AND (mode, cmp, op_true);
23539 emit_insn (gen_rtx_SET (dest, x));
23540 }
23541 else if (op_true == CONST0_RTX (mode)
23542 && !maskcmp)
23543 {
23544 op_false = force_reg (mode, op_false);
23545 x = gen_rtx_NOT (mode, cmp);
23546 x = gen_rtx_AND (mode, x, op_false);
23547 emit_insn (gen_rtx_SET (dest, x));
23548 }
23549 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
23550 && !maskcmp)
23551 {
23552 op_false = force_reg (mode, op_false);
23553 x = gen_rtx_IOR (mode, cmp, op_false);
23554 emit_insn (gen_rtx_SET (dest, x));
23555 }
23556 else if (TARGET_XOP
23557 && !maskcmp)
23558 {
23559 op_true = force_reg (mode, op_true);
23560
23561 if (!nonimmediate_operand (op_false, mode))
23562 op_false = force_reg (mode, op_false);
23563
23564 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
23565 op_true,
23566 op_false)));
23567 }
23568 else
23569 {
23570 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23571 rtx d = dest;
23572
23573 if (!nonimmediate_operand (op_true, mode))
23574 op_true = force_reg (mode, op_true);
23575
23576 op_false = force_reg (mode, op_false);
23577
23578 switch (mode)
23579 {
23580 case V4SFmode:
23581 if (TARGET_SSE4_1)
23582 gen = gen_sse4_1_blendvps;
23583 break;
23584 case V2DFmode:
23585 if (TARGET_SSE4_1)
23586 gen = gen_sse4_1_blendvpd;
23587 break;
23588 case V16QImode:
23589 case V8HImode:
23590 case V4SImode:
23591 case V2DImode:
23592 if (TARGET_SSE4_1)
23593 {
23594 gen = gen_sse4_1_pblendvb;
23595 if (mode != V16QImode)
23596 d = gen_reg_rtx (V16QImode);
23597 op_false = gen_lowpart (V16QImode, op_false);
23598 op_true = gen_lowpart (V16QImode, op_true);
23599 cmp = gen_lowpart (V16QImode, cmp);
23600 }
23601 break;
23602 case V8SFmode:
23603 if (TARGET_AVX)
23604 gen = gen_avx_blendvps256;
23605 break;
23606 case V4DFmode:
23607 if (TARGET_AVX)
23608 gen = gen_avx_blendvpd256;
23609 break;
23610 case V32QImode:
23611 case V16HImode:
23612 case V8SImode:
23613 case V4DImode:
23614 if (TARGET_AVX2)
23615 {
23616 gen = gen_avx2_pblendvb;
23617 if (mode != V32QImode)
23618 d = gen_reg_rtx (V32QImode);
23619 op_false = gen_lowpart (V32QImode, op_false);
23620 op_true = gen_lowpart (V32QImode, op_true);
23621 cmp = gen_lowpart (V32QImode, cmp);
23622 }
23623 break;
23624
23625 case V64QImode:
23626 gen = gen_avx512bw_blendmv64qi;
23627 break;
23628 case V32HImode:
23629 gen = gen_avx512bw_blendmv32hi;
23630 break;
23631 case V16SImode:
23632 gen = gen_avx512f_blendmv16si;
23633 break;
23634 case V8DImode:
23635 gen = gen_avx512f_blendmv8di;
23636 break;
23637 case V8DFmode:
23638 gen = gen_avx512f_blendmv8df;
23639 break;
23640 case V16SFmode:
23641 gen = gen_avx512f_blendmv16sf;
23642 break;
23643
23644 default:
23645 break;
23646 }
23647
23648 if (gen != NULL)
23649 {
23650 emit_insn (gen (d, op_false, op_true, cmp));
23651 if (d != dest)
23652 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
23653 }
23654 else
23655 {
23656 op_true = force_reg (mode, op_true);
23657
23658 t2 = gen_reg_rtx (mode);
23659 if (optimize)
23660 t3 = gen_reg_rtx (mode);
23661 else
23662 t3 = dest;
23663
23664 x = gen_rtx_AND (mode, op_true, cmp);
23665 emit_insn (gen_rtx_SET (t2, x));
23666
23667 x = gen_rtx_NOT (mode, cmp);
23668 x = gen_rtx_AND (mode, x, op_false);
23669 emit_insn (gen_rtx_SET (t3, x));
23670
23671 x = gen_rtx_IOR (mode, t3, t2);
23672 emit_insn (gen_rtx_SET (dest, x));
23673 }
23674 }
23675 }
23676
23677 /* Expand a floating-point conditional move. Return true if successful. */
23678
23679 bool
23680 ix86_expand_fp_movcc (rtx operands[])
23681 {
23682 machine_mode mode = GET_MODE (operands[0]);
23683 enum rtx_code code = GET_CODE (operands[1]);
23684 rtx tmp, compare_op;
23685 rtx op0 = XEXP (operands[1], 0);
23686 rtx op1 = XEXP (operands[1], 1);
23687
23688 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23689 {
23690 machine_mode cmode;
23691
23692 /* Since we've no cmove for sse registers, don't force bad register
23693 allocation just to gain access to it. Deny movcc when the
23694 comparison mode doesn't match the move mode. */
23695 cmode = GET_MODE (op0);
23696 if (cmode == VOIDmode)
23697 cmode = GET_MODE (op1);
23698 if (cmode != mode)
23699 return false;
23700
23701 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
23702 if (code == UNKNOWN)
23703 return false;
23704
23705 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
23706 operands[2], operands[3]))
23707 return true;
23708
23709 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
23710 operands[2], operands[3]);
23711 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
23712 return true;
23713 }
23714
23715 if (GET_MODE (op0) == TImode
23716 || (GET_MODE (op0) == DImode
23717 && !TARGET_64BIT))
23718 return false;
23719
23720 /* The floating point conditional move instructions don't directly
23721 support conditions resulting from a signed integer comparison. */
23722
23723 compare_op = ix86_expand_compare (code, op0, op1);
23724 if (!fcmov_comparison_operator (compare_op, VOIDmode))
23725 {
23726 tmp = gen_reg_rtx (QImode);
23727 ix86_expand_setcc (tmp, code, op0, op1);
23728
23729 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
23730 }
23731
23732 emit_insn (gen_rtx_SET (operands[0],
23733 gen_rtx_IF_THEN_ELSE (mode, compare_op,
23734 operands[2], operands[3])));
23735
23736 return true;
23737 }
23738
23739 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23740
23741 static int
23742 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23743 {
23744 switch (code)
23745 {
23746 case EQ:
23747 return 0;
23748 case LT:
23749 case LTU:
23750 return 1;
23751 case LE:
23752 case LEU:
23753 return 2;
23754 case NE:
23755 return 4;
23756 case GE:
23757 case GEU:
23758 return 5;
23759 case GT:
23760 case GTU:
23761 return 6;
23762 default:
23763 gcc_unreachable ();
23764 }
23765 }
23766
23767 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23768
23769 static int
23770 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23771 {
23772 switch (code)
23773 {
23774 case EQ:
23775 return 0x00;
23776 case NE:
23777 return 0x04;
23778 case GT:
23779 return 0x0e;
23780 case LE:
23781 return 0x02;
23782 case GE:
23783 return 0x0d;
23784 case LT:
23785 return 0x01;
23786 case UNLE:
23787 return 0x0a;
23788 case UNLT:
23789 return 0x09;
23790 case UNGE:
23791 return 0x05;
23792 case UNGT:
23793 return 0x06;
23794 case UNEQ:
23795 return 0x18;
23796 case LTGT:
23797 return 0x0c;
23798 case ORDERED:
23799 return 0x07;
23800 case UNORDERED:
23801 return 0x03;
23802 default:
23803 gcc_unreachable ();
23804 }
23805 }
23806
23807 /* Return immediate value to be used in UNSPEC_PCMP
23808 for comparison CODE in MODE. */
23809
23810 static int
23811 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23812 {
23813 if (FLOAT_MODE_P (mode))
23814 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23815 return ix86_int_cmp_code_to_pcmp_immediate (code);
23816 }
23817
23818 /* Expand AVX-512 vector comparison. */
23819
23820 bool
23821 ix86_expand_mask_vec_cmp (rtx operands[])
23822 {
23823 machine_mode mask_mode = GET_MODE (operands[0]);
23824 machine_mode cmp_mode = GET_MODE (operands[2]);
23825 enum rtx_code code = GET_CODE (operands[1]);
23826 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23827 int unspec_code;
23828 rtx unspec;
23829
23830 switch (code)
23831 {
23832 case LEU:
23833 case GTU:
23834 case GEU:
23835 case LTU:
23836 unspec_code = UNSPEC_UNSIGNED_PCMP;
23837 break;
23838
23839 default:
23840 unspec_code = UNSPEC_PCMP;
23841 }
23842
23843 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23844 operands[3], imm),
23845 unspec_code);
23846 emit_insn (gen_rtx_SET (operands[0], unspec));
23847
23848 return true;
23849 }
23850
23851 /* Expand fp vector comparison. */
23852
23853 bool
23854 ix86_expand_fp_vec_cmp (rtx operands[])
23855 {
23856 enum rtx_code code = GET_CODE (operands[1]);
23857 rtx cmp;
23858
23859 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23860 &operands[2], &operands[3]);
23861 if (code == UNKNOWN)
23862 {
23863 rtx temp;
23864 switch (GET_CODE (operands[1]))
23865 {
23866 case LTGT:
23867 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23868 operands[3], NULL, NULL);
23869 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23870 operands[3], NULL, NULL);
23871 code = AND;
23872 break;
23873 case UNEQ:
23874 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23875 operands[3], NULL, NULL);
23876 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23877 operands[3], NULL, NULL);
23878 code = IOR;
23879 break;
23880 default:
23881 gcc_unreachable ();
23882 }
23883 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23884 OPTAB_DIRECT);
23885 }
23886 else
23887 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23888 operands[1], operands[2]);
23889
23890 if (operands[0] != cmp)
23891 emit_move_insn (operands[0], cmp);
23892
23893 return true;
23894 }
23895
23896 static rtx
23897 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23898 rtx op_true, rtx op_false, bool *negate)
23899 {
23900 machine_mode data_mode = GET_MODE (dest);
23901 machine_mode mode = GET_MODE (cop0);
23902 rtx x;
23903
23904 *negate = false;
23905
23906 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23907 if (TARGET_XOP
23908 && (mode == V16QImode || mode == V8HImode
23909 || mode == V4SImode || mode == V2DImode))
23910 ;
23911 else
23912 {
23913 /* Canonicalize the comparison to EQ, GT, GTU. */
23914 switch (code)
23915 {
23916 case EQ:
23917 case GT:
23918 case GTU:
23919 break;
23920
23921 case NE:
23922 case LE:
23923 case LEU:
23924 code = reverse_condition (code);
23925 *negate = true;
23926 break;
23927
23928 case GE:
23929 case GEU:
23930 code = reverse_condition (code);
23931 *negate = true;
23932 /* FALLTHRU */
23933
23934 case LT:
23935 case LTU:
23936 std::swap (cop0, cop1);
23937 code = swap_condition (code);
23938 break;
23939
23940 default:
23941 gcc_unreachable ();
23942 }
23943
23944 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23945 if (mode == V2DImode)
23946 {
23947 switch (code)
23948 {
23949 case EQ:
23950 /* SSE4.1 supports EQ. */
23951 if (!TARGET_SSE4_1)
23952 return NULL;
23953 break;
23954
23955 case GT:
23956 case GTU:
23957 /* SSE4.2 supports GT/GTU. */
23958 if (!TARGET_SSE4_2)
23959 return NULL;
23960 break;
23961
23962 default:
23963 gcc_unreachable ();
23964 }
23965 }
23966
23967 /* Unsigned parallel compare is not supported by the hardware.
23968 Play some tricks to turn this into a signed comparison
23969 against 0. */
23970 if (code == GTU)
23971 {
23972 cop0 = force_reg (mode, cop0);
23973
23974 switch (mode)
23975 {
23976 case V16SImode:
23977 case V8DImode:
23978 case V8SImode:
23979 case V4DImode:
23980 case V4SImode:
23981 case V2DImode:
23982 {
23983 rtx t1, t2, mask;
23984 rtx (*gen_sub3) (rtx, rtx, rtx);
23985
23986 switch (mode)
23987 {
23988 case V16SImode: gen_sub3 = gen_subv16si3; break;
23989 case V8DImode: gen_sub3 = gen_subv8di3; break;
23990 case V8SImode: gen_sub3 = gen_subv8si3; break;
23991 case V4DImode: gen_sub3 = gen_subv4di3; break;
23992 case V4SImode: gen_sub3 = gen_subv4si3; break;
23993 case V2DImode: gen_sub3 = gen_subv2di3; break;
23994 default:
23995 gcc_unreachable ();
23996 }
23997 /* Subtract (-(INT MAX) - 1) from both operands to make
23998 them signed. */
23999 mask = ix86_build_signbit_mask (mode, true, false);
24000 t1 = gen_reg_rtx (mode);
24001 emit_insn (gen_sub3 (t1, cop0, mask));
24002
24003 t2 = gen_reg_rtx (mode);
24004 emit_insn (gen_sub3 (t2, cop1, mask));
24005
24006 cop0 = t1;
24007 cop1 = t2;
24008 code = GT;
24009 }
24010 break;
24011
24012 case V64QImode:
24013 case V32HImode:
24014 case V32QImode:
24015 case V16HImode:
24016 case V16QImode:
24017 case V8HImode:
24018 /* Perform a parallel unsigned saturating subtraction. */
24019 x = gen_reg_rtx (mode);
24020 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
24021 cop1)));
24022
24023 cop0 = x;
24024 cop1 = CONST0_RTX (mode);
24025 code = EQ;
24026 *negate = !*negate;
24027 break;
24028
24029 default:
24030 gcc_unreachable ();
24031 }
24032 }
24033 }
24034
24035 if (*negate)
24036 std::swap (op_true, op_false);
24037
24038 /* Allow the comparison to be done in one mode, but the movcc to
24039 happen in another mode. */
24040 if (data_mode == mode)
24041 {
24042 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
24043 op_true, op_false);
24044 }
24045 else
24046 {
24047 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
24048 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
24049 op_true, op_false);
24050 if (GET_MODE (x) == mode)
24051 x = gen_lowpart (data_mode, x);
24052 }
24053
24054 return x;
24055 }
24056
24057 /* Expand integer vector comparison. */
24058
24059 bool
24060 ix86_expand_int_vec_cmp (rtx operands[])
24061 {
24062 rtx_code code = GET_CODE (operands[1]);
24063 bool negate = false;
24064 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
24065 operands[3], NULL, NULL, &negate);
24066
24067 if (!cmp)
24068 return false;
24069
24070 if (negate)
24071 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
24072 CONST0_RTX (GET_MODE (cmp)),
24073 NULL, NULL, &negate);
24074
24075 gcc_assert (!negate);
24076
24077 if (operands[0] != cmp)
24078 emit_move_insn (operands[0], cmp);
24079
24080 return true;
24081 }
24082
24083 /* Expand a floating-point vector conditional move; a vcond operation
24084 rather than a movcc operation. */
24085
24086 bool
24087 ix86_expand_fp_vcond (rtx operands[])
24088 {
24089 enum rtx_code code = GET_CODE (operands[3]);
24090 rtx cmp;
24091
24092 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24093 &operands[4], &operands[5]);
24094 if (code == UNKNOWN)
24095 {
24096 rtx temp;
24097 switch (GET_CODE (operands[3]))
24098 {
24099 case LTGT:
24100 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
24101 operands[5], operands[0], operands[0]);
24102 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
24103 operands[5], operands[1], operands[2]);
24104 code = AND;
24105 break;
24106 case UNEQ:
24107 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
24108 operands[5], operands[0], operands[0]);
24109 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24110 operands[5], operands[1], operands[2]);
24111 code = IOR;
24112 break;
24113 default:
24114 gcc_unreachable ();
24115 }
24116 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24117 OPTAB_DIRECT);
24118 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24119 return true;
24120 }
24121
24122 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24123 operands[5], operands[1], operands[2]))
24124 return true;
24125
24126 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24127 operands[1], operands[2]);
24128 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24129 return true;
24130 }
24131
24132 /* Expand a signed/unsigned integral vector conditional move. */
24133
24134 bool
24135 ix86_expand_int_vcond (rtx operands[])
24136 {
24137 machine_mode data_mode = GET_MODE (operands[0]);
24138 machine_mode mode = GET_MODE (operands[4]);
24139 enum rtx_code code = GET_CODE (operands[3]);
24140 bool negate = false;
24141 rtx x, cop0, cop1;
24142
24143 cop0 = operands[4];
24144 cop1 = operands[5];
24145
24146 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24147 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24148 if ((code == LT || code == GE)
24149 && data_mode == mode
24150 && cop1 == CONST0_RTX (mode)
24151 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24152 && GET_MODE_UNIT_SIZE (data_mode) > 1
24153 && GET_MODE_UNIT_SIZE (data_mode) <= 8
24154 && (GET_MODE_SIZE (data_mode) == 16
24155 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24156 {
24157 rtx negop = operands[2 - (code == LT)];
24158 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24159 if (negop == CONST1_RTX (data_mode))
24160 {
24161 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24162 operands[0], 1, OPTAB_DIRECT);
24163 if (res != operands[0])
24164 emit_move_insn (operands[0], res);
24165 return true;
24166 }
24167 else if (GET_MODE_INNER (data_mode) != DImode
24168 && vector_all_ones_operand (negop, data_mode))
24169 {
24170 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24171 operands[0], 0, OPTAB_DIRECT);
24172 if (res != operands[0])
24173 emit_move_insn (operands[0], res);
24174 return true;
24175 }
24176 }
24177
24178 if (!nonimmediate_operand (cop1, mode))
24179 cop1 = force_reg (mode, cop1);
24180 if (!general_operand (operands[1], data_mode))
24181 operands[1] = force_reg (data_mode, operands[1]);
24182 if (!general_operand (operands[2], data_mode))
24183 operands[2] = force_reg (data_mode, operands[2]);
24184
24185 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24186 operands[1], operands[2], &negate);
24187
24188 if (!x)
24189 return false;
24190
24191 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24192 operands[2-negate]);
24193 return true;
24194 }
24195
24196 /* AVX512F does support 64-byte integer vector operations,
24197 thus the longest vector we are faced with is V64QImode. */
24198 #define MAX_VECT_LEN 64
24199
24200 struct expand_vec_perm_d
24201 {
24202 rtx target, op0, op1;
24203 unsigned char perm[MAX_VECT_LEN];
24204 machine_mode vmode;
24205 unsigned char nelt;
24206 bool one_operand_p;
24207 bool testing_p;
24208 };
24209
24210 static bool
24211 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
24212 struct expand_vec_perm_d *d)
24213 {
24214 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
24215 expander, so args are either in d, or in op0, op1 etc. */
24216 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24217 machine_mode maskmode = mode;
24218 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24219
24220 switch (mode)
24221 {
24222 case V8HImode:
24223 if (TARGET_AVX512VL && TARGET_AVX512BW)
24224 gen = gen_avx512vl_vpermi2varv8hi3;
24225 break;
24226 case V16HImode:
24227 if (TARGET_AVX512VL && TARGET_AVX512BW)
24228 gen = gen_avx512vl_vpermi2varv16hi3;
24229 break;
24230 case V64QImode:
24231 if (TARGET_AVX512VBMI)
24232 gen = gen_avx512bw_vpermi2varv64qi3;
24233 break;
24234 case V32HImode:
24235 if (TARGET_AVX512BW)
24236 gen = gen_avx512bw_vpermi2varv32hi3;
24237 break;
24238 case V4SImode:
24239 if (TARGET_AVX512VL)
24240 gen = gen_avx512vl_vpermi2varv4si3;
24241 break;
24242 case V8SImode:
24243 if (TARGET_AVX512VL)
24244 gen = gen_avx512vl_vpermi2varv8si3;
24245 break;
24246 case V16SImode:
24247 if (TARGET_AVX512F)
24248 gen = gen_avx512f_vpermi2varv16si3;
24249 break;
24250 case V4SFmode:
24251 if (TARGET_AVX512VL)
24252 {
24253 gen = gen_avx512vl_vpermi2varv4sf3;
24254 maskmode = V4SImode;
24255 }
24256 break;
24257 case V8SFmode:
24258 if (TARGET_AVX512VL)
24259 {
24260 gen = gen_avx512vl_vpermi2varv8sf3;
24261 maskmode = V8SImode;
24262 }
24263 break;
24264 case V16SFmode:
24265 if (TARGET_AVX512F)
24266 {
24267 gen = gen_avx512f_vpermi2varv16sf3;
24268 maskmode = V16SImode;
24269 }
24270 break;
24271 case V2DImode:
24272 if (TARGET_AVX512VL)
24273 gen = gen_avx512vl_vpermi2varv2di3;
24274 break;
24275 case V4DImode:
24276 if (TARGET_AVX512VL)
24277 gen = gen_avx512vl_vpermi2varv4di3;
24278 break;
24279 case V8DImode:
24280 if (TARGET_AVX512F)
24281 gen = gen_avx512f_vpermi2varv8di3;
24282 break;
24283 case V2DFmode:
24284 if (TARGET_AVX512VL)
24285 {
24286 gen = gen_avx512vl_vpermi2varv2df3;
24287 maskmode = V2DImode;
24288 }
24289 break;
24290 case V4DFmode:
24291 if (TARGET_AVX512VL)
24292 {
24293 gen = gen_avx512vl_vpermi2varv4df3;
24294 maskmode = V4DImode;
24295 }
24296 break;
24297 case V8DFmode:
24298 if (TARGET_AVX512F)
24299 {
24300 gen = gen_avx512f_vpermi2varv8df3;
24301 maskmode = V8DImode;
24302 }
24303 break;
24304 default:
24305 break;
24306 }
24307
24308 if (gen == NULL)
24309 return false;
24310
24311 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
24312 expander, so args are either in d, or in op0, op1 etc. */
24313 if (d)
24314 {
24315 rtx vec[64];
24316 target = d->target;
24317 op0 = d->op0;
24318 op1 = d->op1;
24319 for (int i = 0; i < d->nelt; ++i)
24320 vec[i] = GEN_INT (d->perm[i]);
24321 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24322 }
24323
24324 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
24325 return true;
24326 }
24327
24328 /* Expand a variable vector permutation. */
24329
24330 void
24331 ix86_expand_vec_perm (rtx operands[])
24332 {
24333 rtx target = operands[0];
24334 rtx op0 = operands[1];
24335 rtx op1 = operands[2];
24336 rtx mask = operands[3];
24337 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24338 machine_mode mode = GET_MODE (op0);
24339 machine_mode maskmode = GET_MODE (mask);
24340 int w, e, i;
24341 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24342
24343 /* Number of elements in the vector. */
24344 w = GET_MODE_NUNITS (mode);
24345 e = GET_MODE_UNIT_SIZE (mode);
24346 gcc_assert (w <= 64);
24347
24348 if (TARGET_AVX512F && one_operand_shuffle)
24349 {
24350 rtx (*gen) (rtx, rtx, rtx) = NULL;
24351 switch (mode)
24352 {
24353 case V16SImode:
24354 gen =gen_avx512f_permvarv16si;
24355 break;
24356 case V16SFmode:
24357 gen = gen_avx512f_permvarv16sf;
24358 break;
24359 case V8DImode:
24360 gen = gen_avx512f_permvarv8di;
24361 break;
24362 case V8DFmode:
24363 gen = gen_avx512f_permvarv8df;
24364 break;
24365 default:
24366 break;
24367 }
24368 if (gen != NULL)
24369 {
24370 emit_insn (gen (target, op0, mask));
24371 return;
24372 }
24373 }
24374
24375 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
24376 return;
24377
24378 if (TARGET_AVX2)
24379 {
24380 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
24381 {
24382 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24383 an constant shuffle operand. With a tiny bit of effort we can
24384 use VPERMD instead. A re-interpretation stall for V4DFmode is
24385 unfortunate but there's no avoiding it.
24386 Similarly for V16HImode we don't have instructions for variable
24387 shuffling, while for V32QImode we can use after preparing suitable
24388 masks vpshufb; vpshufb; vpermq; vpor. */
24389
24390 if (mode == V16HImode)
24391 {
24392 maskmode = mode = V32QImode;
24393 w = 32;
24394 e = 1;
24395 }
24396 else
24397 {
24398 maskmode = mode = V8SImode;
24399 w = 8;
24400 e = 4;
24401 }
24402 t1 = gen_reg_rtx (maskmode);
24403
24404 /* Replicate the low bits of the V4DImode mask into V8SImode:
24405 mask = { A B C D }
24406 t1 = { A A B B C C D D }. */
24407 for (i = 0; i < w / 2; ++i)
24408 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24409 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24410 vt = force_reg (maskmode, vt);
24411 mask = gen_lowpart (maskmode, mask);
24412 if (maskmode == V8SImode)
24413 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
24414 else
24415 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
24416
24417 /* Multiply the shuffle indicies by two. */
24418 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
24419 OPTAB_DIRECT);
24420
24421 /* Add one to the odd shuffle indicies:
24422 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24423 for (i = 0; i < w / 2; ++i)
24424 {
24425 vec[i * 2] = const0_rtx;
24426 vec[i * 2 + 1] = const1_rtx;
24427 }
24428 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24429 vt = validize_mem (force_const_mem (maskmode, vt));
24430 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
24431 OPTAB_DIRECT);
24432
24433 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24434 operands[3] = mask = t1;
24435 target = gen_reg_rtx (mode);
24436 op0 = gen_lowpart (mode, op0);
24437 op1 = gen_lowpart (mode, op1);
24438 }
24439
24440 switch (mode)
24441 {
24442 case V8SImode:
24443 /* The VPERMD and VPERMPS instructions already properly ignore
24444 the high bits of the shuffle elements. No need for us to
24445 perform an AND ourselves. */
24446 if (one_operand_shuffle)
24447 {
24448 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
24449 if (target != operands[0])
24450 emit_move_insn (operands[0],
24451 gen_lowpart (GET_MODE (operands[0]), target));
24452 }
24453 else
24454 {
24455 t1 = gen_reg_rtx (V8SImode);
24456 t2 = gen_reg_rtx (V8SImode);
24457 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
24458 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
24459 goto merge_two;
24460 }
24461 return;
24462
24463 case V8SFmode:
24464 mask = gen_lowpart (V8SImode, mask);
24465 if (one_operand_shuffle)
24466 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
24467 else
24468 {
24469 t1 = gen_reg_rtx (V8SFmode);
24470 t2 = gen_reg_rtx (V8SFmode);
24471 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
24472 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
24473 goto merge_two;
24474 }
24475 return;
24476
24477 case V4SImode:
24478 /* By combining the two 128-bit input vectors into one 256-bit
24479 input vector, we can use VPERMD and VPERMPS for the full
24480 two-operand shuffle. */
24481 t1 = gen_reg_rtx (V8SImode);
24482 t2 = gen_reg_rtx (V8SImode);
24483 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
24484 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24485 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
24486 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
24487 return;
24488
24489 case V4SFmode:
24490 t1 = gen_reg_rtx (V8SFmode);
24491 t2 = gen_reg_rtx (V8SImode);
24492 mask = gen_lowpart (V4SImode, mask);
24493 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
24494 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24495 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
24496 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
24497 return;
24498
24499 case V32QImode:
24500 t1 = gen_reg_rtx (V32QImode);
24501 t2 = gen_reg_rtx (V32QImode);
24502 t3 = gen_reg_rtx (V32QImode);
24503 vt2 = GEN_INT (-128);
24504 for (i = 0; i < 32; i++)
24505 vec[i] = vt2;
24506 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24507 vt = force_reg (V32QImode, vt);
24508 for (i = 0; i < 32; i++)
24509 vec[i] = i < 16 ? vt2 : const0_rtx;
24510 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24511 vt2 = force_reg (V32QImode, vt2);
24512 /* From mask create two adjusted masks, which contain the same
24513 bits as mask in the low 7 bits of each vector element.
24514 The first mask will have the most significant bit clear
24515 if it requests element from the same 128-bit lane
24516 and MSB set if it requests element from the other 128-bit lane.
24517 The second mask will have the opposite values of the MSB,
24518 and additionally will have its 128-bit lanes swapped.
24519 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24520 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24521 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24522 stands for other 12 bytes. */
24523 /* The bit whether element is from the same lane or the other
24524 lane is bit 4, so shift it up by 3 to the MSB position. */
24525 t5 = gen_reg_rtx (V4DImode);
24526 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
24527 GEN_INT (3)));
24528 /* Clear MSB bits from the mask just in case it had them set. */
24529 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
24530 /* After this t1 will have MSB set for elements from other lane. */
24531 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
24532 /* Clear bits other than MSB. */
24533 emit_insn (gen_andv32qi3 (t1, t1, vt));
24534 /* Or in the lower bits from mask into t3. */
24535 emit_insn (gen_iorv32qi3 (t3, t1, t2));
24536 /* And invert MSB bits in t1, so MSB is set for elements from the same
24537 lane. */
24538 emit_insn (gen_xorv32qi3 (t1, t1, vt));
24539 /* Swap 128-bit lanes in t3. */
24540 t6 = gen_reg_rtx (V4DImode);
24541 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
24542 const2_rtx, GEN_INT (3),
24543 const0_rtx, const1_rtx));
24544 /* And or in the lower bits from mask into t1. */
24545 emit_insn (gen_iorv32qi3 (t1, t1, t2));
24546 if (one_operand_shuffle)
24547 {
24548 /* Each of these shuffles will put 0s in places where
24549 element from the other 128-bit lane is needed, otherwise
24550 will shuffle in the requested value. */
24551 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
24552 gen_lowpart (V32QImode, t6)));
24553 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
24554 /* For t3 the 128-bit lanes are swapped again. */
24555 t7 = gen_reg_rtx (V4DImode);
24556 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
24557 const2_rtx, GEN_INT (3),
24558 const0_rtx, const1_rtx));
24559 /* And oring both together leads to the result. */
24560 emit_insn (gen_iorv32qi3 (target, t1,
24561 gen_lowpart (V32QImode, t7)));
24562 if (target != operands[0])
24563 emit_move_insn (operands[0],
24564 gen_lowpart (GET_MODE (operands[0]), target));
24565 return;
24566 }
24567
24568 t4 = gen_reg_rtx (V32QImode);
24569 /* Similarly to the above one_operand_shuffle code,
24570 just for repeated twice for each operand. merge_two:
24571 code will merge the two results together. */
24572 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
24573 gen_lowpart (V32QImode, t6)));
24574 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
24575 gen_lowpart (V32QImode, t6)));
24576 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
24577 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
24578 t7 = gen_reg_rtx (V4DImode);
24579 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
24580 const2_rtx, GEN_INT (3),
24581 const0_rtx, const1_rtx));
24582 t8 = gen_reg_rtx (V4DImode);
24583 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
24584 const2_rtx, GEN_INT (3),
24585 const0_rtx, const1_rtx));
24586 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
24587 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
24588 t1 = t4;
24589 t2 = t3;
24590 goto merge_two;
24591
24592 default:
24593 gcc_assert (GET_MODE_SIZE (mode) <= 16);
24594 break;
24595 }
24596 }
24597
24598 if (TARGET_XOP)
24599 {
24600 /* The XOP VPPERM insn supports three inputs. By ignoring the
24601 one_operand_shuffle special case, we avoid creating another
24602 set of constant vectors in memory. */
24603 one_operand_shuffle = false;
24604
24605 /* mask = mask & {2*w-1, ...} */
24606 vt = GEN_INT (2*w - 1);
24607 }
24608 else
24609 {
24610 /* mask = mask & {w-1, ...} */
24611 vt = GEN_INT (w - 1);
24612 }
24613
24614 for (i = 0; i < w; i++)
24615 vec[i] = vt;
24616 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24617 mask = expand_simple_binop (maskmode, AND, mask, vt,
24618 NULL_RTX, 0, OPTAB_DIRECT);
24619
24620 /* For non-QImode operations, convert the word permutation control
24621 into a byte permutation control. */
24622 if (mode != V16QImode)
24623 {
24624 mask = expand_simple_binop (maskmode, ASHIFT, mask,
24625 GEN_INT (exact_log2 (e)),
24626 NULL_RTX, 0, OPTAB_DIRECT);
24627
24628 /* Convert mask to vector of chars. */
24629 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
24630
24631 /* Replicate each of the input bytes into byte positions:
24632 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24633 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24634 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24635 for (i = 0; i < 16; ++i)
24636 vec[i] = GEN_INT (i/e * e);
24637 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24638 vt = validize_mem (force_const_mem (V16QImode, vt));
24639 if (TARGET_XOP)
24640 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
24641 else
24642 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
24643
24644 /* Convert it into the byte positions by doing
24645 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24646 for (i = 0; i < 16; ++i)
24647 vec[i] = GEN_INT (i % e);
24648 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24649 vt = validize_mem (force_const_mem (V16QImode, vt));
24650 emit_insn (gen_addv16qi3 (mask, mask, vt));
24651 }
24652
24653 /* The actual shuffle operations all operate on V16QImode. */
24654 op0 = gen_lowpart (V16QImode, op0);
24655 op1 = gen_lowpart (V16QImode, op1);
24656
24657 if (TARGET_XOP)
24658 {
24659 if (GET_MODE (target) != V16QImode)
24660 target = gen_reg_rtx (V16QImode);
24661 emit_insn (gen_xop_pperm (target, op0, op1, mask));
24662 if (target != operands[0])
24663 emit_move_insn (operands[0],
24664 gen_lowpart (GET_MODE (operands[0]), target));
24665 }
24666 else if (one_operand_shuffle)
24667 {
24668 if (GET_MODE (target) != V16QImode)
24669 target = gen_reg_rtx (V16QImode);
24670 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
24671 if (target != operands[0])
24672 emit_move_insn (operands[0],
24673 gen_lowpart (GET_MODE (operands[0]), target));
24674 }
24675 else
24676 {
24677 rtx xops[6];
24678 bool ok;
24679
24680 /* Shuffle the two input vectors independently. */
24681 t1 = gen_reg_rtx (V16QImode);
24682 t2 = gen_reg_rtx (V16QImode);
24683 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
24684 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
24685
24686 merge_two:
24687 /* Then merge them together. The key is whether any given control
24688 element contained a bit set that indicates the second word. */
24689 mask = operands[3];
24690 vt = GEN_INT (w);
24691 if (maskmode == V2DImode && !TARGET_SSE4_1)
24692 {
24693 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
24694 more shuffle to convert the V2DI input mask into a V4SI
24695 input mask. At which point the masking that expand_int_vcond
24696 will work as desired. */
24697 rtx t3 = gen_reg_rtx (V4SImode);
24698 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
24699 const0_rtx, const0_rtx,
24700 const2_rtx, const2_rtx));
24701 mask = t3;
24702 maskmode = V4SImode;
24703 e = w = 4;
24704 }
24705
24706 for (i = 0; i < w; i++)
24707 vec[i] = vt;
24708 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24709 vt = force_reg (maskmode, vt);
24710 mask = expand_simple_binop (maskmode, AND, mask, vt,
24711 NULL_RTX, 0, OPTAB_DIRECT);
24712
24713 if (GET_MODE (target) != mode)
24714 target = gen_reg_rtx (mode);
24715 xops[0] = target;
24716 xops[1] = gen_lowpart (mode, t2);
24717 xops[2] = gen_lowpart (mode, t1);
24718 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
24719 xops[4] = mask;
24720 xops[5] = vt;
24721 ok = ix86_expand_int_vcond (xops);
24722 gcc_assert (ok);
24723 if (target != operands[0])
24724 emit_move_insn (operands[0],
24725 gen_lowpart (GET_MODE (operands[0]), target));
24726 }
24727 }
24728
24729 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
24730 true if we should do zero extension, else sign extension. HIGH_P is
24731 true if we want the N/2 high elements, else the low elements. */
24732
24733 void
24734 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
24735 {
24736 machine_mode imode = GET_MODE (src);
24737 rtx tmp;
24738
24739 if (TARGET_SSE4_1)
24740 {
24741 rtx (*unpack)(rtx, rtx);
24742 rtx (*extract)(rtx, rtx) = NULL;
24743 machine_mode halfmode = BLKmode;
24744
24745 switch (imode)
24746 {
24747 case V64QImode:
24748 if (unsigned_p)
24749 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
24750 else
24751 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
24752 halfmode = V32QImode;
24753 extract
24754 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
24755 break;
24756 case V32QImode:
24757 if (unsigned_p)
24758 unpack = gen_avx2_zero_extendv16qiv16hi2;
24759 else
24760 unpack = gen_avx2_sign_extendv16qiv16hi2;
24761 halfmode = V16QImode;
24762 extract
24763 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
24764 break;
24765 case V32HImode:
24766 if (unsigned_p)
24767 unpack = gen_avx512f_zero_extendv16hiv16si2;
24768 else
24769 unpack = gen_avx512f_sign_extendv16hiv16si2;
24770 halfmode = V16HImode;
24771 extract
24772 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
24773 break;
24774 case V16HImode:
24775 if (unsigned_p)
24776 unpack = gen_avx2_zero_extendv8hiv8si2;
24777 else
24778 unpack = gen_avx2_sign_extendv8hiv8si2;
24779 halfmode = V8HImode;
24780 extract
24781 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
24782 break;
24783 case V16SImode:
24784 if (unsigned_p)
24785 unpack = gen_avx512f_zero_extendv8siv8di2;
24786 else
24787 unpack = gen_avx512f_sign_extendv8siv8di2;
24788 halfmode = V8SImode;
24789 extract
24790 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
24791 break;
24792 case V8SImode:
24793 if (unsigned_p)
24794 unpack = gen_avx2_zero_extendv4siv4di2;
24795 else
24796 unpack = gen_avx2_sign_extendv4siv4di2;
24797 halfmode = V4SImode;
24798 extract
24799 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
24800 break;
24801 case V16QImode:
24802 if (unsigned_p)
24803 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
24804 else
24805 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24806 break;
24807 case V8HImode:
24808 if (unsigned_p)
24809 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24810 else
24811 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24812 break;
24813 case V4SImode:
24814 if (unsigned_p)
24815 unpack = gen_sse4_1_zero_extendv2siv2di2;
24816 else
24817 unpack = gen_sse4_1_sign_extendv2siv2di2;
24818 break;
24819 default:
24820 gcc_unreachable ();
24821 }
24822
24823 if (GET_MODE_SIZE (imode) >= 32)
24824 {
24825 tmp = gen_reg_rtx (halfmode);
24826 emit_insn (extract (tmp, src));
24827 }
24828 else if (high_p)
24829 {
24830 /* Shift higher 8 bytes to lower 8 bytes. */
24831 tmp = gen_reg_rtx (V1TImode);
24832 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24833 GEN_INT (64)));
24834 tmp = gen_lowpart (imode, tmp);
24835 }
24836 else
24837 tmp = src;
24838
24839 emit_insn (unpack (dest, tmp));
24840 }
24841 else
24842 {
24843 rtx (*unpack)(rtx, rtx, rtx);
24844
24845 switch (imode)
24846 {
24847 case V16QImode:
24848 if (high_p)
24849 unpack = gen_vec_interleave_highv16qi;
24850 else
24851 unpack = gen_vec_interleave_lowv16qi;
24852 break;
24853 case V8HImode:
24854 if (high_p)
24855 unpack = gen_vec_interleave_highv8hi;
24856 else
24857 unpack = gen_vec_interleave_lowv8hi;
24858 break;
24859 case V4SImode:
24860 if (high_p)
24861 unpack = gen_vec_interleave_highv4si;
24862 else
24863 unpack = gen_vec_interleave_lowv4si;
24864 break;
24865 default:
24866 gcc_unreachable ();
24867 }
24868
24869 if (unsigned_p)
24870 tmp = force_reg (imode, CONST0_RTX (imode));
24871 else
24872 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24873 src, pc_rtx, pc_rtx);
24874
24875 rtx tmp2 = gen_reg_rtx (imode);
24876 emit_insn (unpack (tmp2, src, tmp));
24877 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24878 }
24879 }
24880
24881 /* Expand conditional increment or decrement using adb/sbb instructions.
24882 The default case using setcc followed by the conditional move can be
24883 done by generic code. */
24884 bool
24885 ix86_expand_int_addcc (rtx operands[])
24886 {
24887 enum rtx_code code = GET_CODE (operands[1]);
24888 rtx flags;
24889 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24890 rtx compare_op;
24891 rtx val = const0_rtx;
24892 bool fpcmp = false;
24893 machine_mode mode;
24894 rtx op0 = XEXP (operands[1], 0);
24895 rtx op1 = XEXP (operands[1], 1);
24896
24897 if (operands[3] != const1_rtx
24898 && operands[3] != constm1_rtx)
24899 return false;
24900 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24901 return false;
24902 code = GET_CODE (compare_op);
24903
24904 flags = XEXP (compare_op, 0);
24905
24906 if (GET_MODE (flags) == CCFPmode
24907 || GET_MODE (flags) == CCFPUmode)
24908 {
24909 fpcmp = true;
24910 code = ix86_fp_compare_code_to_integer (code);
24911 }
24912
24913 if (code != LTU)
24914 {
24915 val = constm1_rtx;
24916 if (fpcmp)
24917 PUT_CODE (compare_op,
24918 reverse_condition_maybe_unordered
24919 (GET_CODE (compare_op)));
24920 else
24921 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24922 }
24923
24924 mode = GET_MODE (operands[0]);
24925
24926 /* Construct either adc or sbb insn. */
24927 if ((code == LTU) == (operands[3] == constm1_rtx))
24928 {
24929 switch (mode)
24930 {
24931 case QImode:
24932 insn = gen_subqi3_carry;
24933 break;
24934 case HImode:
24935 insn = gen_subhi3_carry;
24936 break;
24937 case SImode:
24938 insn = gen_subsi3_carry;
24939 break;
24940 case DImode:
24941 insn = gen_subdi3_carry;
24942 break;
24943 default:
24944 gcc_unreachable ();
24945 }
24946 }
24947 else
24948 {
24949 switch (mode)
24950 {
24951 case QImode:
24952 insn = gen_addqi3_carry;
24953 break;
24954 case HImode:
24955 insn = gen_addhi3_carry;
24956 break;
24957 case SImode:
24958 insn = gen_addsi3_carry;
24959 break;
24960 case DImode:
24961 insn = gen_adddi3_carry;
24962 break;
24963 default:
24964 gcc_unreachable ();
24965 }
24966 }
24967 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24968
24969 return true;
24970 }
24971
24972
24973 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24974 but works for floating pointer parameters and nonoffsetable memories.
24975 For pushes, it returns just stack offsets; the values will be saved
24976 in the right order. Maximally three parts are generated. */
24977
24978 static int
24979 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24980 {
24981 int size;
24982
24983 if (!TARGET_64BIT)
24984 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24985 else
24986 size = (GET_MODE_SIZE (mode) + 4) / 8;
24987
24988 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24989 gcc_assert (size >= 2 && size <= 4);
24990
24991 /* Optimize constant pool reference to immediates. This is used by fp
24992 moves, that force all constants to memory to allow combining. */
24993 if (MEM_P (operand) && MEM_READONLY_P (operand))
24994 {
24995 rtx tmp = maybe_get_pool_constant (operand);
24996 if (tmp)
24997 operand = tmp;
24998 }
24999
25000 if (MEM_P (operand) && !offsettable_memref_p (operand))
25001 {
25002 /* The only non-offsetable memories we handle are pushes. */
25003 int ok = push_operand (operand, VOIDmode);
25004
25005 gcc_assert (ok);
25006
25007 operand = copy_rtx (operand);
25008 PUT_MODE (operand, word_mode);
25009 parts[0] = parts[1] = parts[2] = parts[3] = operand;
25010 return size;
25011 }
25012
25013 if (GET_CODE (operand) == CONST_VECTOR)
25014 {
25015 machine_mode imode = int_mode_for_mode (mode);
25016 /* Caution: if we looked through a constant pool memory above,
25017 the operand may actually have a different mode now. That's
25018 ok, since we want to pun this all the way back to an integer. */
25019 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
25020 gcc_assert (operand != NULL);
25021 mode = imode;
25022 }
25023
25024 if (!TARGET_64BIT)
25025 {
25026 if (mode == DImode)
25027 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25028 else
25029 {
25030 int i;
25031
25032 if (REG_P (operand))
25033 {
25034 gcc_assert (reload_completed);
25035 for (i = 0; i < size; i++)
25036 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
25037 }
25038 else if (offsettable_memref_p (operand))
25039 {
25040 operand = adjust_address (operand, SImode, 0);
25041 parts[0] = operand;
25042 for (i = 1; i < size; i++)
25043 parts[i] = adjust_address (operand, SImode, 4 * i);
25044 }
25045 else if (CONST_DOUBLE_P (operand))
25046 {
25047 const REAL_VALUE_TYPE *r;
25048 long l[4];
25049
25050 r = CONST_DOUBLE_REAL_VALUE (operand);
25051 switch (mode)
25052 {
25053 case TFmode:
25054 real_to_target (l, r, mode);
25055 parts[3] = gen_int_mode (l[3], SImode);
25056 parts[2] = gen_int_mode (l[2], SImode);
25057 break;
25058 case XFmode:
25059 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25060 long double may not be 80-bit. */
25061 real_to_target (l, r, mode);
25062 parts[2] = gen_int_mode (l[2], SImode);
25063 break;
25064 case DFmode:
25065 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
25066 break;
25067 default:
25068 gcc_unreachable ();
25069 }
25070 parts[1] = gen_int_mode (l[1], SImode);
25071 parts[0] = gen_int_mode (l[0], SImode);
25072 }
25073 else
25074 gcc_unreachable ();
25075 }
25076 }
25077 else
25078 {
25079 if (mode == TImode)
25080 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25081 if (mode == XFmode || mode == TFmode)
25082 {
25083 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
25084 if (REG_P (operand))
25085 {
25086 gcc_assert (reload_completed);
25087 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
25088 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
25089 }
25090 else if (offsettable_memref_p (operand))
25091 {
25092 operand = adjust_address (operand, DImode, 0);
25093 parts[0] = operand;
25094 parts[1] = adjust_address (operand, upper_mode, 8);
25095 }
25096 else if (CONST_DOUBLE_P (operand))
25097 {
25098 long l[4];
25099
25100 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
25101
25102 /* real_to_target puts 32-bit pieces in each long. */
25103 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
25104 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
25105 << 32), DImode);
25106
25107 if (upper_mode == SImode)
25108 parts[1] = gen_int_mode (l[2], SImode);
25109 else
25110 parts[1]
25111 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
25112 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
25113 << 32), DImode);
25114 }
25115 else
25116 gcc_unreachable ();
25117 }
25118 }
25119
25120 return size;
25121 }
25122
25123 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25124 Return false when normal moves are needed; true when all required
25125 insns have been emitted. Operands 2-4 contain the input values
25126 int the correct order; operands 5-7 contain the output values. */
25127
25128 void
25129 ix86_split_long_move (rtx operands[])
25130 {
25131 rtx part[2][4];
25132 int nparts, i, j;
25133 int push = 0;
25134 int collisions = 0;
25135 machine_mode mode = GET_MODE (operands[0]);
25136 bool collisionparts[4];
25137
25138 /* The DFmode expanders may ask us to move double.
25139 For 64bit target this is single move. By hiding the fact
25140 here we simplify i386.md splitters. */
25141 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25142 {
25143 /* Optimize constant pool reference to immediates. This is used by
25144 fp moves, that force all constants to memory to allow combining. */
25145
25146 if (MEM_P (operands[1])
25147 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25148 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25149 operands[1] = get_pool_constant (XEXP (operands[1], 0));
25150 if (push_operand (operands[0], VOIDmode))
25151 {
25152 operands[0] = copy_rtx (operands[0]);
25153 PUT_MODE (operands[0], word_mode);
25154 }
25155 else
25156 operands[0] = gen_lowpart (DImode, operands[0]);
25157 operands[1] = gen_lowpart (DImode, operands[1]);
25158 emit_move_insn (operands[0], operands[1]);
25159 return;
25160 }
25161
25162 /* The only non-offsettable memory we handle is push. */
25163 if (push_operand (operands[0], VOIDmode))
25164 push = 1;
25165 else
25166 gcc_assert (!MEM_P (operands[0])
25167 || offsettable_memref_p (operands[0]));
25168
25169 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25170 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25171
25172 /* When emitting push, take care for source operands on the stack. */
25173 if (push && MEM_P (operands[1])
25174 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25175 {
25176 rtx src_base = XEXP (part[1][nparts - 1], 0);
25177
25178 /* Compensate for the stack decrement by 4. */
25179 if (!TARGET_64BIT && nparts == 3
25180 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25181 src_base = plus_constant (Pmode, src_base, 4);
25182
25183 /* src_base refers to the stack pointer and is
25184 automatically decreased by emitted push. */
25185 for (i = 0; i < nparts; i++)
25186 part[1][i] = change_address (part[1][i],
25187 GET_MODE (part[1][i]), src_base);
25188 }
25189
25190 /* We need to do copy in the right order in case an address register
25191 of the source overlaps the destination. */
25192 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25193 {
25194 rtx tmp;
25195
25196 for (i = 0; i < nparts; i++)
25197 {
25198 collisionparts[i]
25199 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25200 if (collisionparts[i])
25201 collisions++;
25202 }
25203
25204 /* Collision in the middle part can be handled by reordering. */
25205 if (collisions == 1 && nparts == 3 && collisionparts [1])
25206 {
25207 std::swap (part[0][1], part[0][2]);
25208 std::swap (part[1][1], part[1][2]);
25209 }
25210 else if (collisions == 1
25211 && nparts == 4
25212 && (collisionparts [1] || collisionparts [2]))
25213 {
25214 if (collisionparts [1])
25215 {
25216 std::swap (part[0][1], part[0][2]);
25217 std::swap (part[1][1], part[1][2]);
25218 }
25219 else
25220 {
25221 std::swap (part[0][2], part[0][3]);
25222 std::swap (part[1][2], part[1][3]);
25223 }
25224 }
25225
25226 /* If there are more collisions, we can't handle it by reordering.
25227 Do an lea to the last part and use only one colliding move. */
25228 else if (collisions > 1)
25229 {
25230 rtx base, addr, tls_base = NULL_RTX;
25231
25232 collisions = 1;
25233
25234 base = part[0][nparts - 1];
25235
25236 /* Handle the case when the last part isn't valid for lea.
25237 Happens in 64-bit mode storing the 12-byte XFmode. */
25238 if (GET_MODE (base) != Pmode)
25239 base = gen_rtx_REG (Pmode, REGNO (base));
25240
25241 addr = XEXP (part[1][0], 0);
25242 if (TARGET_TLS_DIRECT_SEG_REFS)
25243 {
25244 struct ix86_address parts;
25245 int ok = ix86_decompose_address (addr, &parts);
25246 gcc_assert (ok);
25247 if (parts.seg == DEFAULT_TLS_SEG_REG)
25248 {
25249 /* It is not valid to use %gs: or %fs: in
25250 lea though, so we need to remove it from the
25251 address used for lea and add it to each individual
25252 memory loads instead. */
25253 addr = copy_rtx (addr);
25254 rtx *x = &addr;
25255 while (GET_CODE (*x) == PLUS)
25256 {
25257 for (i = 0; i < 2; i++)
25258 {
25259 rtx u = XEXP (*x, i);
25260 if (GET_CODE (u) == ZERO_EXTEND)
25261 u = XEXP (u, 0);
25262 if (GET_CODE (u) == UNSPEC
25263 && XINT (u, 1) == UNSPEC_TP)
25264 {
25265 tls_base = XEXP (*x, i);
25266 *x = XEXP (*x, 1 - i);
25267 break;
25268 }
25269 }
25270 if (tls_base)
25271 break;
25272 x = &XEXP (*x, 0);
25273 }
25274 gcc_assert (tls_base);
25275 }
25276 }
25277 emit_insn (gen_rtx_SET (base, addr));
25278 if (tls_base)
25279 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
25280 part[1][0] = replace_equiv_address (part[1][0], base);
25281 for (i = 1; i < nparts; i++)
25282 {
25283 if (tls_base)
25284 base = copy_rtx (base);
25285 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25286 part[1][i] = replace_equiv_address (part[1][i], tmp);
25287 }
25288 }
25289 }
25290
25291 if (push)
25292 {
25293 if (!TARGET_64BIT)
25294 {
25295 if (nparts == 3)
25296 {
25297 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25298 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25299 stack_pointer_rtx, GEN_INT (-4)));
25300 emit_move_insn (part[0][2], part[1][2]);
25301 }
25302 else if (nparts == 4)
25303 {
25304 emit_move_insn (part[0][3], part[1][3]);
25305 emit_move_insn (part[0][2], part[1][2]);
25306 }
25307 }
25308 else
25309 {
25310 /* In 64bit mode we don't have 32bit push available. In case this is
25311 register, it is OK - we will just use larger counterpart. We also
25312 retype memory - these comes from attempt to avoid REX prefix on
25313 moving of second half of TFmode value. */
25314 if (GET_MODE (part[1][1]) == SImode)
25315 {
25316 switch (GET_CODE (part[1][1]))
25317 {
25318 case MEM:
25319 part[1][1] = adjust_address (part[1][1], DImode, 0);
25320 break;
25321
25322 case REG:
25323 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25324 break;
25325
25326 default:
25327 gcc_unreachable ();
25328 }
25329
25330 if (GET_MODE (part[1][0]) == SImode)
25331 part[1][0] = part[1][1];
25332 }
25333 }
25334 emit_move_insn (part[0][1], part[1][1]);
25335 emit_move_insn (part[0][0], part[1][0]);
25336 return;
25337 }
25338
25339 /* Choose correct order to not overwrite the source before it is copied. */
25340 if ((REG_P (part[0][0])
25341 && REG_P (part[1][1])
25342 && (REGNO (part[0][0]) == REGNO (part[1][1])
25343 || (nparts == 3
25344 && REGNO (part[0][0]) == REGNO (part[1][2]))
25345 || (nparts == 4
25346 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25347 || (collisions > 0
25348 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25349 {
25350 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25351 {
25352 operands[2 + i] = part[0][j];
25353 operands[6 + i] = part[1][j];
25354 }
25355 }
25356 else
25357 {
25358 for (i = 0; i < nparts; i++)
25359 {
25360 operands[2 + i] = part[0][i];
25361 operands[6 + i] = part[1][i];
25362 }
25363 }
25364
25365 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25366 if (optimize_insn_for_size_p ())
25367 {
25368 for (j = 0; j < nparts - 1; j++)
25369 if (CONST_INT_P (operands[6 + j])
25370 && operands[6 + j] != const0_rtx
25371 && REG_P (operands[2 + j]))
25372 for (i = j; i < nparts - 1; i++)
25373 if (CONST_INT_P (operands[7 + i])
25374 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25375 operands[7 + i] = operands[2 + j];
25376 }
25377
25378 for (i = 0; i < nparts; i++)
25379 emit_move_insn (operands[2 + i], operands[6 + i]);
25380
25381 return;
25382 }
25383
25384 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25385 left shift by a constant, either using a single shift or
25386 a sequence of add instructions. */
25387
25388 static void
25389 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25390 {
25391 rtx (*insn)(rtx, rtx, rtx);
25392
25393 if (count == 1
25394 || (count * ix86_cost->add <= ix86_cost->shift_const
25395 && !optimize_insn_for_size_p ()))
25396 {
25397 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25398 while (count-- > 0)
25399 emit_insn (insn (operand, operand, operand));
25400 }
25401 else
25402 {
25403 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25404 emit_insn (insn (operand, operand, GEN_INT (count)));
25405 }
25406 }
25407
25408 void
25409 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25410 {
25411 rtx (*gen_ashl3)(rtx, rtx, rtx);
25412 rtx (*gen_shld)(rtx, rtx, rtx);
25413 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25414
25415 rtx low[2], high[2];
25416 int count;
25417
25418 if (CONST_INT_P (operands[2]))
25419 {
25420 split_double_mode (mode, operands, 2, low, high);
25421 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25422
25423 if (count >= half_width)
25424 {
25425 emit_move_insn (high[0], low[1]);
25426 emit_move_insn (low[0], const0_rtx);
25427
25428 if (count > half_width)
25429 ix86_expand_ashl_const (high[0], count - half_width, mode);
25430 }
25431 else
25432 {
25433 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25434
25435 if (!rtx_equal_p (operands[0], operands[1]))
25436 emit_move_insn (operands[0], operands[1]);
25437
25438 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25439 ix86_expand_ashl_const (low[0], count, mode);
25440 }
25441 return;
25442 }
25443
25444 split_double_mode (mode, operands, 1, low, high);
25445
25446 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25447
25448 if (operands[1] == const1_rtx)
25449 {
25450 /* Assuming we've chosen a QImode capable registers, then 1 << N
25451 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25452 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25453 {
25454 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25455
25456 ix86_expand_clear (low[0]);
25457 ix86_expand_clear (high[0]);
25458 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
25459
25460 d = gen_lowpart (QImode, low[0]);
25461 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25462 s = gen_rtx_EQ (QImode, flags, const0_rtx);
25463 emit_insn (gen_rtx_SET (d, s));
25464
25465 d = gen_lowpart (QImode, high[0]);
25466 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25467 s = gen_rtx_NE (QImode, flags, const0_rtx);
25468 emit_insn (gen_rtx_SET (d, s));
25469 }
25470
25471 /* Otherwise, we can get the same results by manually performing
25472 a bit extract operation on bit 5/6, and then performing the two
25473 shifts. The two methods of getting 0/1 into low/high are exactly
25474 the same size. Avoiding the shift in the bit extract case helps
25475 pentium4 a bit; no one else seems to care much either way. */
25476 else
25477 {
25478 machine_mode half_mode;
25479 rtx (*gen_lshr3)(rtx, rtx, rtx);
25480 rtx (*gen_and3)(rtx, rtx, rtx);
25481 rtx (*gen_xor3)(rtx, rtx, rtx);
25482 HOST_WIDE_INT bits;
25483 rtx x;
25484
25485 if (mode == DImode)
25486 {
25487 half_mode = SImode;
25488 gen_lshr3 = gen_lshrsi3;
25489 gen_and3 = gen_andsi3;
25490 gen_xor3 = gen_xorsi3;
25491 bits = 5;
25492 }
25493 else
25494 {
25495 half_mode = DImode;
25496 gen_lshr3 = gen_lshrdi3;
25497 gen_and3 = gen_anddi3;
25498 gen_xor3 = gen_xordi3;
25499 bits = 6;
25500 }
25501
25502 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
25503 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
25504 else
25505 x = gen_lowpart (half_mode, operands[2]);
25506 emit_insn (gen_rtx_SET (high[0], x));
25507
25508 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
25509 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
25510 emit_move_insn (low[0], high[0]);
25511 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
25512 }
25513
25514 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25515 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
25516 return;
25517 }
25518
25519 if (operands[1] == constm1_rtx)
25520 {
25521 /* For -1 << N, we can avoid the shld instruction, because we
25522 know that we're shifting 0...31/63 ones into a -1. */
25523 emit_move_insn (low[0], constm1_rtx);
25524 if (optimize_insn_for_size_p ())
25525 emit_move_insn (high[0], low[0]);
25526 else
25527 emit_move_insn (high[0], constm1_rtx);
25528 }
25529 else
25530 {
25531 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25532
25533 if (!rtx_equal_p (operands[0], operands[1]))
25534 emit_move_insn (operands[0], operands[1]);
25535
25536 split_double_mode (mode, operands, 1, low, high);
25537 emit_insn (gen_shld (high[0], low[0], operands[2]));
25538 }
25539
25540 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25541
25542 if (TARGET_CMOVE && scratch)
25543 {
25544 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25545 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25546
25547 ix86_expand_clear (scratch);
25548 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
25549 }
25550 else
25551 {
25552 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25553 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25554
25555 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
25556 }
25557 }
25558
25559 void
25560 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
25561 {
25562 rtx (*gen_ashr3)(rtx, rtx, rtx)
25563 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
25564 rtx (*gen_shrd)(rtx, rtx, rtx);
25565 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25566
25567 rtx low[2], high[2];
25568 int count;
25569
25570 if (CONST_INT_P (operands[2]))
25571 {
25572 split_double_mode (mode, operands, 2, low, high);
25573 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25574
25575 if (count == GET_MODE_BITSIZE (mode) - 1)
25576 {
25577 emit_move_insn (high[0], high[1]);
25578 emit_insn (gen_ashr3 (high[0], high[0],
25579 GEN_INT (half_width - 1)));
25580 emit_move_insn (low[0], high[0]);
25581
25582 }
25583 else if (count >= half_width)
25584 {
25585 emit_move_insn (low[0], high[1]);
25586 emit_move_insn (high[0], low[0]);
25587 emit_insn (gen_ashr3 (high[0], high[0],
25588 GEN_INT (half_width - 1)));
25589
25590 if (count > half_width)
25591 emit_insn (gen_ashr3 (low[0], low[0],
25592 GEN_INT (count - half_width)));
25593 }
25594 else
25595 {
25596 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25597
25598 if (!rtx_equal_p (operands[0], operands[1]))
25599 emit_move_insn (operands[0], operands[1]);
25600
25601 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25602 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
25603 }
25604 }
25605 else
25606 {
25607 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25608
25609 if (!rtx_equal_p (operands[0], operands[1]))
25610 emit_move_insn (operands[0], operands[1]);
25611
25612 split_double_mode (mode, operands, 1, low, high);
25613
25614 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25615 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
25616
25617 if (TARGET_CMOVE && scratch)
25618 {
25619 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25620 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25621
25622 emit_move_insn (scratch, high[0]);
25623 emit_insn (gen_ashr3 (scratch, scratch,
25624 GEN_INT (half_width - 1)));
25625 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25626 scratch));
25627 }
25628 else
25629 {
25630 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
25631 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
25632
25633 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
25634 }
25635 }
25636 }
25637
25638 void
25639 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
25640 {
25641 rtx (*gen_lshr3)(rtx, rtx, rtx)
25642 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
25643 rtx (*gen_shrd)(rtx, rtx, rtx);
25644 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25645
25646 rtx low[2], high[2];
25647 int count;
25648
25649 if (CONST_INT_P (operands[2]))
25650 {
25651 split_double_mode (mode, operands, 2, low, high);
25652 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25653
25654 if (count >= half_width)
25655 {
25656 emit_move_insn (low[0], high[1]);
25657 ix86_expand_clear (high[0]);
25658
25659 if (count > half_width)
25660 emit_insn (gen_lshr3 (low[0], low[0],
25661 GEN_INT (count - half_width)));
25662 }
25663 else
25664 {
25665 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25666
25667 if (!rtx_equal_p (operands[0], operands[1]))
25668 emit_move_insn (operands[0], operands[1]);
25669
25670 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25671 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
25672 }
25673 }
25674 else
25675 {
25676 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25677
25678 if (!rtx_equal_p (operands[0], operands[1]))
25679 emit_move_insn (operands[0], operands[1]);
25680
25681 split_double_mode (mode, operands, 1, low, high);
25682
25683 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25684 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
25685
25686 if (TARGET_CMOVE && scratch)
25687 {
25688 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25689 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25690
25691 ix86_expand_clear (scratch);
25692 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25693 scratch));
25694 }
25695 else
25696 {
25697 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25698 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25699
25700 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
25701 }
25702 }
25703 }
25704
25705 /* Predict just emitted jump instruction to be taken with probability PROB. */
25706 static void
25707 predict_jump (int prob)
25708 {
25709 rtx insn = get_last_insn ();
25710 gcc_assert (JUMP_P (insn));
25711 add_int_reg_note (insn, REG_BR_PROB, prob);
25712 }
25713
25714 /* Helper function for the string operations below. Dest VARIABLE whether
25715 it is aligned to VALUE bytes. If true, jump to the label. */
25716 static rtx_code_label *
25717 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
25718 {
25719 rtx_code_label *label = gen_label_rtx ();
25720 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
25721 if (GET_MODE (variable) == DImode)
25722 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
25723 else
25724 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
25725 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
25726 1, label);
25727 if (epilogue)
25728 predict_jump (REG_BR_PROB_BASE * 50 / 100);
25729 else
25730 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25731 return label;
25732 }
25733
25734 /* Adjust COUNTER by the VALUE. */
25735 static void
25736 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
25737 {
25738 rtx (*gen_add)(rtx, rtx, rtx)
25739 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
25740
25741 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
25742 }
25743
25744 /* Zero extend possibly SImode EXP to Pmode register. */
25745 rtx
25746 ix86_zero_extend_to_Pmode (rtx exp)
25747 {
25748 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
25749 }
25750
25751 /* Divide COUNTREG by SCALE. */
25752 static rtx
25753 scale_counter (rtx countreg, int scale)
25754 {
25755 rtx sc;
25756
25757 if (scale == 1)
25758 return countreg;
25759 if (CONST_INT_P (countreg))
25760 return GEN_INT (INTVAL (countreg) / scale);
25761 gcc_assert (REG_P (countreg));
25762
25763 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
25764 GEN_INT (exact_log2 (scale)),
25765 NULL, 1, OPTAB_DIRECT);
25766 return sc;
25767 }
25768
25769 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
25770 DImode for constant loop counts. */
25771
25772 static machine_mode
25773 counter_mode (rtx count_exp)
25774 {
25775 if (GET_MODE (count_exp) != VOIDmode)
25776 return GET_MODE (count_exp);
25777 if (!CONST_INT_P (count_exp))
25778 return Pmode;
25779 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
25780 return DImode;
25781 return SImode;
25782 }
25783
25784 /* Copy the address to a Pmode register. This is used for x32 to
25785 truncate DImode TLS address to a SImode register. */
25786
25787 static rtx
25788 ix86_copy_addr_to_reg (rtx addr)
25789 {
25790 rtx reg;
25791 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
25792 {
25793 reg = copy_addr_to_reg (addr);
25794 REG_POINTER (reg) = 1;
25795 return reg;
25796 }
25797 else
25798 {
25799 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
25800 reg = copy_to_mode_reg (DImode, addr);
25801 REG_POINTER (reg) = 1;
25802 return gen_rtx_SUBREG (SImode, reg, 0);
25803 }
25804 }
25805
25806 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25807 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25808 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25809 memory by VALUE (supposed to be in MODE).
25810
25811 The size is rounded down to whole number of chunk size moved at once.
25812 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25813
25814
25815 static void
25816 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25817 rtx destptr, rtx srcptr, rtx value,
25818 rtx count, machine_mode mode, int unroll,
25819 int expected_size, bool issetmem)
25820 {
25821 rtx_code_label *out_label, *top_label;
25822 rtx iter, tmp;
25823 machine_mode iter_mode = counter_mode (count);
25824 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25825 rtx piece_size = GEN_INT (piece_size_n);
25826 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25827 rtx size;
25828 int i;
25829
25830 top_label = gen_label_rtx ();
25831 out_label = gen_label_rtx ();
25832 iter = gen_reg_rtx (iter_mode);
25833
25834 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25835 NULL, 1, OPTAB_DIRECT);
25836 /* Those two should combine. */
25837 if (piece_size == const1_rtx)
25838 {
25839 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25840 true, out_label);
25841 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25842 }
25843 emit_move_insn (iter, const0_rtx);
25844
25845 emit_label (top_label);
25846
25847 tmp = convert_modes (Pmode, iter_mode, iter, true);
25848
25849 /* This assert could be relaxed - in this case we'll need to compute
25850 smallest power of two, containing in PIECE_SIZE_N and pass it to
25851 offset_address. */
25852 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25853 destmem = offset_address (destmem, tmp, piece_size_n);
25854 destmem = adjust_address (destmem, mode, 0);
25855
25856 if (!issetmem)
25857 {
25858 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25859 srcmem = adjust_address (srcmem, mode, 0);
25860
25861 /* When unrolling for chips that reorder memory reads and writes,
25862 we can save registers by using single temporary.
25863 Also using 4 temporaries is overkill in 32bit mode. */
25864 if (!TARGET_64BIT && 0)
25865 {
25866 for (i = 0; i < unroll; i++)
25867 {
25868 if (i)
25869 {
25870 destmem =
25871 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25872 srcmem =
25873 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25874 }
25875 emit_move_insn (destmem, srcmem);
25876 }
25877 }
25878 else
25879 {
25880 rtx tmpreg[4];
25881 gcc_assert (unroll <= 4);
25882 for (i = 0; i < unroll; i++)
25883 {
25884 tmpreg[i] = gen_reg_rtx (mode);
25885 if (i)
25886 {
25887 srcmem =
25888 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25889 }
25890 emit_move_insn (tmpreg[i], srcmem);
25891 }
25892 for (i = 0; i < unroll; i++)
25893 {
25894 if (i)
25895 {
25896 destmem =
25897 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25898 }
25899 emit_move_insn (destmem, tmpreg[i]);
25900 }
25901 }
25902 }
25903 else
25904 for (i = 0; i < unroll; i++)
25905 {
25906 if (i)
25907 destmem =
25908 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25909 emit_move_insn (destmem, value);
25910 }
25911
25912 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25913 true, OPTAB_LIB_WIDEN);
25914 if (tmp != iter)
25915 emit_move_insn (iter, tmp);
25916
25917 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25918 true, top_label);
25919 if (expected_size != -1)
25920 {
25921 expected_size /= GET_MODE_SIZE (mode) * unroll;
25922 if (expected_size == 0)
25923 predict_jump (0);
25924 else if (expected_size > REG_BR_PROB_BASE)
25925 predict_jump (REG_BR_PROB_BASE - 1);
25926 else
25927 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25928 }
25929 else
25930 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25931 iter = ix86_zero_extend_to_Pmode (iter);
25932 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25933 true, OPTAB_LIB_WIDEN);
25934 if (tmp != destptr)
25935 emit_move_insn (destptr, tmp);
25936 if (!issetmem)
25937 {
25938 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25939 true, OPTAB_LIB_WIDEN);
25940 if (tmp != srcptr)
25941 emit_move_insn (srcptr, tmp);
25942 }
25943 emit_label (out_label);
25944 }
25945
25946 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25947 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25948 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25949 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25950 ORIG_VALUE is the original value passed to memset to fill the memory with.
25951 Other arguments have same meaning as for previous function. */
25952
25953 static void
25954 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25955 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25956 rtx count,
25957 machine_mode mode, bool issetmem)
25958 {
25959 rtx destexp;
25960 rtx srcexp;
25961 rtx countreg;
25962 HOST_WIDE_INT rounded_count;
25963
25964 /* If possible, it is shorter to use rep movs.
25965 TODO: Maybe it is better to move this logic to decide_alg. */
25966 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25967 && (!issetmem || orig_value == const0_rtx))
25968 mode = SImode;
25969
25970 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25971 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25972
25973 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25974 GET_MODE_SIZE (mode)));
25975 if (mode != QImode)
25976 {
25977 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25978 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25979 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25980 }
25981 else
25982 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25983 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25984 {
25985 rounded_count
25986 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25987 destmem = shallow_copy_rtx (destmem);
25988 set_mem_size (destmem, rounded_count);
25989 }
25990 else if (MEM_SIZE_KNOWN_P (destmem))
25991 clear_mem_size (destmem);
25992
25993 if (issetmem)
25994 {
25995 value = force_reg (mode, gen_lowpart (mode, value));
25996 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25997 }
25998 else
25999 {
26000 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
26001 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
26002 if (mode != QImode)
26003 {
26004 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
26005 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26006 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
26007 }
26008 else
26009 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
26010 if (CONST_INT_P (count))
26011 {
26012 rounded_count
26013 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26014 srcmem = shallow_copy_rtx (srcmem);
26015 set_mem_size (srcmem, rounded_count);
26016 }
26017 else
26018 {
26019 if (MEM_SIZE_KNOWN_P (srcmem))
26020 clear_mem_size (srcmem);
26021 }
26022 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
26023 destexp, srcexp));
26024 }
26025 }
26026
26027 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
26028 DESTMEM.
26029 SRC is passed by pointer to be updated on return.
26030 Return value is updated DST. */
26031 static rtx
26032 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
26033 HOST_WIDE_INT size_to_move)
26034 {
26035 rtx dst = destmem, src = *srcmem, adjust, tempreg;
26036 enum insn_code code;
26037 machine_mode move_mode;
26038 int piece_size, i;
26039
26040 /* Find the widest mode in which we could perform moves.
26041 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26042 it until move of such size is supported. */
26043 piece_size = 1 << floor_log2 (size_to_move);
26044 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
26045 code = optab_handler (mov_optab, move_mode);
26046 while (code == CODE_FOR_nothing && piece_size > 1)
26047 {
26048 piece_size >>= 1;
26049 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
26050 code = optab_handler (mov_optab, move_mode);
26051 }
26052
26053 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26054 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26055 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26056 {
26057 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26058 move_mode = mode_for_vector (word_mode, nunits);
26059 code = optab_handler (mov_optab, move_mode);
26060 if (code == CODE_FOR_nothing)
26061 {
26062 move_mode = word_mode;
26063 piece_size = GET_MODE_SIZE (move_mode);
26064 code = optab_handler (mov_optab, move_mode);
26065 }
26066 }
26067 gcc_assert (code != CODE_FOR_nothing);
26068
26069 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26070 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
26071
26072 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26073 gcc_assert (size_to_move % piece_size == 0);
26074 adjust = GEN_INT (piece_size);
26075 for (i = 0; i < size_to_move; i += piece_size)
26076 {
26077 /* We move from memory to memory, so we'll need to do it via
26078 a temporary register. */
26079 tempreg = gen_reg_rtx (move_mode);
26080 emit_insn (GEN_FCN (code) (tempreg, src));
26081 emit_insn (GEN_FCN (code) (dst, tempreg));
26082
26083 emit_move_insn (destptr,
26084 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26085 emit_move_insn (srcptr,
26086 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
26087
26088 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26089 piece_size);
26090 src = adjust_automodify_address_nv (src, move_mode, srcptr,
26091 piece_size);
26092 }
26093
26094 /* Update DST and SRC rtx. */
26095 *srcmem = src;
26096 return dst;
26097 }
26098
26099 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
26100 static void
26101 expand_movmem_epilogue (rtx destmem, rtx srcmem,
26102 rtx destptr, rtx srcptr, rtx count, int max_size)
26103 {
26104 rtx src, dest;
26105 if (CONST_INT_P (count))
26106 {
26107 HOST_WIDE_INT countval = INTVAL (count);
26108 HOST_WIDE_INT epilogue_size = countval % max_size;
26109 int i;
26110
26111 /* For now MAX_SIZE should be a power of 2. This assert could be
26112 relaxed, but it'll require a bit more complicated epilogue
26113 expanding. */
26114 gcc_assert ((max_size & (max_size - 1)) == 0);
26115 for (i = max_size; i >= 1; i >>= 1)
26116 {
26117 if (epilogue_size & i)
26118 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26119 }
26120 return;
26121 }
26122 if (max_size > 8)
26123 {
26124 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
26125 count, 1, OPTAB_DIRECT);
26126 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
26127 count, QImode, 1, 4, false);
26128 return;
26129 }
26130
26131 /* When there are stringops, we can cheaply increase dest and src pointers.
26132 Otherwise we save code size by maintaining offset (zero is readily
26133 available from preceding rep operation) and using x86 addressing modes.
26134 */
26135 if (TARGET_SINGLE_STRINGOP)
26136 {
26137 if (max_size > 4)
26138 {
26139 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26140 src = change_address (srcmem, SImode, srcptr);
26141 dest = change_address (destmem, SImode, destptr);
26142 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26143 emit_label (label);
26144 LABEL_NUSES (label) = 1;
26145 }
26146 if (max_size > 2)
26147 {
26148 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26149 src = change_address (srcmem, HImode, srcptr);
26150 dest = change_address (destmem, HImode, destptr);
26151 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26152 emit_label (label);
26153 LABEL_NUSES (label) = 1;
26154 }
26155 if (max_size > 1)
26156 {
26157 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26158 src = change_address (srcmem, QImode, srcptr);
26159 dest = change_address (destmem, QImode, destptr);
26160 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26161 emit_label (label);
26162 LABEL_NUSES (label) = 1;
26163 }
26164 }
26165 else
26166 {
26167 rtx offset = force_reg (Pmode, const0_rtx);
26168 rtx tmp;
26169
26170 if (max_size > 4)
26171 {
26172 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26173 src = change_address (srcmem, SImode, srcptr);
26174 dest = change_address (destmem, SImode, destptr);
26175 emit_move_insn (dest, src);
26176 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26177 true, OPTAB_LIB_WIDEN);
26178 if (tmp != offset)
26179 emit_move_insn (offset, tmp);
26180 emit_label (label);
26181 LABEL_NUSES (label) = 1;
26182 }
26183 if (max_size > 2)
26184 {
26185 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26186 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26187 src = change_address (srcmem, HImode, tmp);
26188 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26189 dest = change_address (destmem, HImode, tmp);
26190 emit_move_insn (dest, src);
26191 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26192 true, OPTAB_LIB_WIDEN);
26193 if (tmp != offset)
26194 emit_move_insn (offset, tmp);
26195 emit_label (label);
26196 LABEL_NUSES (label) = 1;
26197 }
26198 if (max_size > 1)
26199 {
26200 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26201 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26202 src = change_address (srcmem, QImode, tmp);
26203 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26204 dest = change_address (destmem, QImode, tmp);
26205 emit_move_insn (dest, src);
26206 emit_label (label);
26207 LABEL_NUSES (label) = 1;
26208 }
26209 }
26210 }
26211
26212 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26213 with value PROMOTED_VAL.
26214 SRC is passed by pointer to be updated on return.
26215 Return value is updated DST. */
26216 static rtx
26217 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26218 HOST_WIDE_INT size_to_move)
26219 {
26220 rtx dst = destmem, adjust;
26221 enum insn_code code;
26222 machine_mode move_mode;
26223 int piece_size, i;
26224
26225 /* Find the widest mode in which we could perform moves.
26226 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26227 it until move of such size is supported. */
26228 move_mode = GET_MODE (promoted_val);
26229 if (move_mode == VOIDmode)
26230 move_mode = QImode;
26231 if (size_to_move < GET_MODE_SIZE (move_mode))
26232 {
26233 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
26234 promoted_val = gen_lowpart (move_mode, promoted_val);
26235 }
26236 piece_size = GET_MODE_SIZE (move_mode);
26237 code = optab_handler (mov_optab, move_mode);
26238 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26239
26240 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26241
26242 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26243 gcc_assert (size_to_move % piece_size == 0);
26244 adjust = GEN_INT (piece_size);
26245 for (i = 0; i < size_to_move; i += piece_size)
26246 {
26247 if (piece_size <= GET_MODE_SIZE (word_mode))
26248 {
26249 emit_insn (gen_strset (destptr, dst, promoted_val));
26250 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26251 piece_size);
26252 continue;
26253 }
26254
26255 emit_insn (GEN_FCN (code) (dst, promoted_val));
26256
26257 emit_move_insn (destptr,
26258 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26259
26260 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26261 piece_size);
26262 }
26263
26264 /* Update DST rtx. */
26265 return dst;
26266 }
26267 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26268 static void
26269 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26270 rtx count, int max_size)
26271 {
26272 count =
26273 expand_simple_binop (counter_mode (count), AND, count,
26274 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26275 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26276 gen_lowpart (QImode, value), count, QImode,
26277 1, max_size / 2, true);
26278 }
26279
26280 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26281 static void
26282 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26283 rtx count, int max_size)
26284 {
26285 rtx dest;
26286
26287 if (CONST_INT_P (count))
26288 {
26289 HOST_WIDE_INT countval = INTVAL (count);
26290 HOST_WIDE_INT epilogue_size = countval % max_size;
26291 int i;
26292
26293 /* For now MAX_SIZE should be a power of 2. This assert could be
26294 relaxed, but it'll require a bit more complicated epilogue
26295 expanding. */
26296 gcc_assert ((max_size & (max_size - 1)) == 0);
26297 for (i = max_size; i >= 1; i >>= 1)
26298 {
26299 if (epilogue_size & i)
26300 {
26301 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26302 destmem = emit_memset (destmem, destptr, vec_value, i);
26303 else
26304 destmem = emit_memset (destmem, destptr, value, i);
26305 }
26306 }
26307 return;
26308 }
26309 if (max_size > 32)
26310 {
26311 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26312 return;
26313 }
26314 if (max_size > 16)
26315 {
26316 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26317 if (TARGET_64BIT)
26318 {
26319 dest = change_address (destmem, DImode, destptr);
26320 emit_insn (gen_strset (destptr, dest, value));
26321 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26322 emit_insn (gen_strset (destptr, dest, value));
26323 }
26324 else
26325 {
26326 dest = change_address (destmem, SImode, destptr);
26327 emit_insn (gen_strset (destptr, dest, value));
26328 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26329 emit_insn (gen_strset (destptr, dest, value));
26330 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26331 emit_insn (gen_strset (destptr, dest, value));
26332 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26333 emit_insn (gen_strset (destptr, dest, value));
26334 }
26335 emit_label (label);
26336 LABEL_NUSES (label) = 1;
26337 }
26338 if (max_size > 8)
26339 {
26340 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26341 if (TARGET_64BIT)
26342 {
26343 dest = change_address (destmem, DImode, destptr);
26344 emit_insn (gen_strset (destptr, dest, value));
26345 }
26346 else
26347 {
26348 dest = change_address (destmem, SImode, destptr);
26349 emit_insn (gen_strset (destptr, dest, value));
26350 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26351 emit_insn (gen_strset (destptr, dest, value));
26352 }
26353 emit_label (label);
26354 LABEL_NUSES (label) = 1;
26355 }
26356 if (max_size > 4)
26357 {
26358 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26359 dest = change_address (destmem, SImode, destptr);
26360 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26361 emit_label (label);
26362 LABEL_NUSES (label) = 1;
26363 }
26364 if (max_size > 2)
26365 {
26366 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26367 dest = change_address (destmem, HImode, destptr);
26368 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26369 emit_label (label);
26370 LABEL_NUSES (label) = 1;
26371 }
26372 if (max_size > 1)
26373 {
26374 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26375 dest = change_address (destmem, QImode, destptr);
26376 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26377 emit_label (label);
26378 LABEL_NUSES (label) = 1;
26379 }
26380 }
26381
26382 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26383 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26384 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26385 ignored.
26386 Return value is updated DESTMEM. */
26387 static rtx
26388 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26389 rtx destptr, rtx srcptr, rtx value,
26390 rtx vec_value, rtx count, int align,
26391 int desired_alignment, bool issetmem)
26392 {
26393 int i;
26394 for (i = 1; i < desired_alignment; i <<= 1)
26395 {
26396 if (align <= i)
26397 {
26398 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26399 if (issetmem)
26400 {
26401 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26402 destmem = emit_memset (destmem, destptr, vec_value, i);
26403 else
26404 destmem = emit_memset (destmem, destptr, value, i);
26405 }
26406 else
26407 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26408 ix86_adjust_counter (count, i);
26409 emit_label (label);
26410 LABEL_NUSES (label) = 1;
26411 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26412 }
26413 }
26414 return destmem;
26415 }
26416
26417 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26418 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26419 and jump to DONE_LABEL. */
26420 static void
26421 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
26422 rtx destptr, rtx srcptr,
26423 rtx value, rtx vec_value,
26424 rtx count, int size,
26425 rtx done_label, bool issetmem)
26426 {
26427 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
26428 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
26429 rtx modesize;
26430 int n;
26431
26432 /* If we do not have vector value to copy, we must reduce size. */
26433 if (issetmem)
26434 {
26435 if (!vec_value)
26436 {
26437 if (GET_MODE (value) == VOIDmode && size > 8)
26438 mode = Pmode;
26439 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26440 mode = GET_MODE (value);
26441 }
26442 else
26443 mode = GET_MODE (vec_value), value = vec_value;
26444 }
26445 else
26446 {
26447 /* Choose appropriate vector mode. */
26448 if (size >= 32)
26449 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26450 else if (size >= 16)
26451 mode = TARGET_SSE ? V16QImode : DImode;
26452 srcmem = change_address (srcmem, mode, srcptr);
26453 }
26454 destmem = change_address (destmem, mode, destptr);
26455 modesize = GEN_INT (GET_MODE_SIZE (mode));
26456 gcc_assert (GET_MODE_SIZE (mode) <= size);
26457 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26458 {
26459 if (issetmem)
26460 emit_move_insn (destmem, gen_lowpart (mode, value));
26461 else
26462 {
26463 emit_move_insn (destmem, srcmem);
26464 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26465 }
26466 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26467 }
26468
26469 destmem = offset_address (destmem, count, 1);
26470 destmem = offset_address (destmem, GEN_INT (-2 * size),
26471 GET_MODE_SIZE (mode));
26472 if (!issetmem)
26473 {
26474 srcmem = offset_address (srcmem, count, 1);
26475 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
26476 GET_MODE_SIZE (mode));
26477 }
26478 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26479 {
26480 if (issetmem)
26481 emit_move_insn (destmem, gen_lowpart (mode, value));
26482 else
26483 {
26484 emit_move_insn (destmem, srcmem);
26485 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26486 }
26487 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26488 }
26489 emit_jump_insn (gen_jump (done_label));
26490 emit_barrier ();
26491
26492 emit_label (label);
26493 LABEL_NUSES (label) = 1;
26494 }
26495
26496 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26497 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26498 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26499 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26500 DONE_LABEL is a label after the whole copying sequence. The label is created
26501 on demand if *DONE_LABEL is NULL.
26502 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26503 bounds after the initial copies.
26504
26505 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26506 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26507 we will dispatch to a library call for large blocks.
26508
26509 In pseudocode we do:
26510
26511 if (COUNT < SIZE)
26512 {
26513 Assume that SIZE is 4. Bigger sizes are handled analogously
26514 if (COUNT & 4)
26515 {
26516 copy 4 bytes from SRCPTR to DESTPTR
26517 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26518 goto done_label
26519 }
26520 if (!COUNT)
26521 goto done_label;
26522 copy 1 byte from SRCPTR to DESTPTR
26523 if (COUNT & 2)
26524 {
26525 copy 2 bytes from SRCPTR to DESTPTR
26526 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26527 }
26528 }
26529 else
26530 {
26531 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26532 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26533
26534 OLD_DESPTR = DESTPTR;
26535 Align DESTPTR up to DESIRED_ALIGN
26536 SRCPTR += DESTPTR - OLD_DESTPTR
26537 COUNT -= DEST_PTR - OLD_DESTPTR
26538 if (DYNAMIC_CHECK)
26539 Round COUNT down to multiple of SIZE
26540 << optional caller supplied zero size guard is here >>
26541 << optional caller supplied dynamic check is here >>
26542 << caller supplied main copy loop is here >>
26543 }
26544 done_label:
26545 */
26546 static void
26547 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
26548 rtx *destptr, rtx *srcptr,
26549 machine_mode mode,
26550 rtx value, rtx vec_value,
26551 rtx *count,
26552 rtx_code_label **done_label,
26553 int size,
26554 int desired_align,
26555 int align,
26556 unsigned HOST_WIDE_INT *min_size,
26557 bool dynamic_check,
26558 bool issetmem)
26559 {
26560 rtx_code_label *loop_label = NULL, *label;
26561 int n;
26562 rtx modesize;
26563 int prolog_size = 0;
26564 rtx mode_value;
26565
26566 /* Chose proper value to copy. */
26567 if (issetmem && VECTOR_MODE_P (mode))
26568 mode_value = vec_value;
26569 else
26570 mode_value = value;
26571 gcc_assert (GET_MODE_SIZE (mode) <= size);
26572
26573 /* See if block is big or small, handle small blocks. */
26574 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
26575 {
26576 int size2 = size;
26577 loop_label = gen_label_rtx ();
26578
26579 if (!*done_label)
26580 *done_label = gen_label_rtx ();
26581
26582 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
26583 1, loop_label);
26584 size2 >>= 1;
26585
26586 /* Handle sizes > 3. */
26587 for (;size2 > 2; size2 >>= 1)
26588 expand_small_movmem_or_setmem (destmem, srcmem,
26589 *destptr, *srcptr,
26590 value, vec_value,
26591 *count,
26592 size2, *done_label, issetmem);
26593 /* Nothing to copy? Jump to DONE_LABEL if so */
26594 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
26595 1, *done_label);
26596
26597 /* Do a byte copy. */
26598 destmem = change_address (destmem, QImode, *destptr);
26599 if (issetmem)
26600 emit_move_insn (destmem, gen_lowpart (QImode, value));
26601 else
26602 {
26603 srcmem = change_address (srcmem, QImode, *srcptr);
26604 emit_move_insn (destmem, srcmem);
26605 }
26606
26607 /* Handle sizes 2 and 3. */
26608 label = ix86_expand_aligntest (*count, 2, false);
26609 destmem = change_address (destmem, HImode, *destptr);
26610 destmem = offset_address (destmem, *count, 1);
26611 destmem = offset_address (destmem, GEN_INT (-2), 2);
26612 if (issetmem)
26613 emit_move_insn (destmem, gen_lowpart (HImode, value));
26614 else
26615 {
26616 srcmem = change_address (srcmem, HImode, *srcptr);
26617 srcmem = offset_address (srcmem, *count, 1);
26618 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
26619 emit_move_insn (destmem, srcmem);
26620 }
26621
26622 emit_label (label);
26623 LABEL_NUSES (label) = 1;
26624 emit_jump_insn (gen_jump (*done_label));
26625 emit_barrier ();
26626 }
26627 else
26628 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
26629 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
26630
26631 /* Start memcpy for COUNT >= SIZE. */
26632 if (loop_label)
26633 {
26634 emit_label (loop_label);
26635 LABEL_NUSES (loop_label) = 1;
26636 }
26637
26638 /* Copy first desired_align bytes. */
26639 if (!issetmem)
26640 srcmem = change_address (srcmem, mode, *srcptr);
26641 destmem = change_address (destmem, mode, *destptr);
26642 modesize = GEN_INT (GET_MODE_SIZE (mode));
26643 for (n = 0; prolog_size < desired_align - align; n++)
26644 {
26645 if (issetmem)
26646 emit_move_insn (destmem, mode_value);
26647 else
26648 {
26649 emit_move_insn (destmem, srcmem);
26650 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26651 }
26652 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26653 prolog_size += GET_MODE_SIZE (mode);
26654 }
26655
26656
26657 /* Copy last SIZE bytes. */
26658 destmem = offset_address (destmem, *count, 1);
26659 destmem = offset_address (destmem,
26660 GEN_INT (-size - prolog_size),
26661 1);
26662 if (issetmem)
26663 emit_move_insn (destmem, mode_value);
26664 else
26665 {
26666 srcmem = offset_address (srcmem, *count, 1);
26667 srcmem = offset_address (srcmem,
26668 GEN_INT (-size - prolog_size),
26669 1);
26670 emit_move_insn (destmem, srcmem);
26671 }
26672 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
26673 {
26674 destmem = offset_address (destmem, modesize, 1);
26675 if (issetmem)
26676 emit_move_insn (destmem, mode_value);
26677 else
26678 {
26679 srcmem = offset_address (srcmem, modesize, 1);
26680 emit_move_insn (destmem, srcmem);
26681 }
26682 }
26683
26684 /* Align destination. */
26685 if (desired_align > 1 && desired_align > align)
26686 {
26687 rtx saveddest = *destptr;
26688
26689 gcc_assert (desired_align <= size);
26690 /* Align destptr up, place it to new register. */
26691 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
26692 GEN_INT (prolog_size),
26693 NULL_RTX, 1, OPTAB_DIRECT);
26694 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
26695 REG_POINTER (*destptr) = 1;
26696 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
26697 GEN_INT (-desired_align),
26698 *destptr, 1, OPTAB_DIRECT);
26699 /* See how many bytes we skipped. */
26700 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
26701 *destptr,
26702 saveddest, 1, OPTAB_DIRECT);
26703 /* Adjust srcptr and count. */
26704 if (!issetmem)
26705 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
26706 saveddest, *srcptr, 1, OPTAB_DIRECT);
26707 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26708 saveddest, *count, 1, OPTAB_DIRECT);
26709 /* We copied at most size + prolog_size. */
26710 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
26711 *min_size
26712 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
26713 else
26714 *min_size = 0;
26715
26716 /* Our loops always round down the block size, but for dispatch to
26717 library we need precise value. */
26718 if (dynamic_check)
26719 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
26720 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
26721 }
26722 else
26723 {
26724 gcc_assert (prolog_size == 0);
26725 /* Decrease count, so we won't end up copying last word twice. */
26726 if (!CONST_INT_P (*count))
26727 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26728 constm1_rtx, *count, 1, OPTAB_DIRECT);
26729 else
26730 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
26731 (unsigned HOST_WIDE_INT)size));
26732 if (*min_size)
26733 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
26734 }
26735 }
26736
26737
26738 /* This function is like the previous one, except here we know how many bytes
26739 need to be copied. That allows us to update alignment not only of DST, which
26740 is returned, but also of SRC, which is passed as a pointer for that
26741 reason. */
26742 static rtx
26743 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
26744 rtx srcreg, rtx value, rtx vec_value,
26745 int desired_align, int align_bytes,
26746 bool issetmem)
26747 {
26748 rtx src = NULL;
26749 rtx orig_dst = dst;
26750 rtx orig_src = NULL;
26751 int piece_size = 1;
26752 int copied_bytes = 0;
26753
26754 if (!issetmem)
26755 {
26756 gcc_assert (srcp != NULL);
26757 src = *srcp;
26758 orig_src = src;
26759 }
26760
26761 for (piece_size = 1;
26762 piece_size <= desired_align && copied_bytes < align_bytes;
26763 piece_size <<= 1)
26764 {
26765 if (align_bytes & piece_size)
26766 {
26767 if (issetmem)
26768 {
26769 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
26770 dst = emit_memset (dst, destreg, vec_value, piece_size);
26771 else
26772 dst = emit_memset (dst, destreg, value, piece_size);
26773 }
26774 else
26775 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
26776 copied_bytes += piece_size;
26777 }
26778 }
26779 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
26780 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26781 if (MEM_SIZE_KNOWN_P (orig_dst))
26782 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
26783
26784 if (!issetmem)
26785 {
26786 int src_align_bytes = get_mem_align_offset (src, desired_align
26787 * BITS_PER_UNIT);
26788 if (src_align_bytes >= 0)
26789 src_align_bytes = desired_align - src_align_bytes;
26790 if (src_align_bytes >= 0)
26791 {
26792 unsigned int src_align;
26793 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
26794 {
26795 if ((src_align_bytes & (src_align - 1))
26796 == (align_bytes & (src_align - 1)))
26797 break;
26798 }
26799 if (src_align > (unsigned int) desired_align)
26800 src_align = desired_align;
26801 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
26802 set_mem_align (src, src_align * BITS_PER_UNIT);
26803 }
26804 if (MEM_SIZE_KNOWN_P (orig_src))
26805 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26806 *srcp = src;
26807 }
26808
26809 return dst;
26810 }
26811
26812 /* Return true if ALG can be used in current context.
26813 Assume we expand memset if MEMSET is true. */
26814 static bool
26815 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26816 {
26817 if (alg == no_stringop)
26818 return false;
26819 if (alg == vector_loop)
26820 return TARGET_SSE || TARGET_AVX;
26821 /* Algorithms using the rep prefix want at least edi and ecx;
26822 additionally, memset wants eax and memcpy wants esi. Don't
26823 consider such algorithms if the user has appropriated those
26824 registers for their own purposes, or if we have a non-default
26825 address space, since some string insns cannot override the segment. */
26826 if (alg == rep_prefix_1_byte
26827 || alg == rep_prefix_4_byte
26828 || alg == rep_prefix_8_byte)
26829 {
26830 if (have_as)
26831 return false;
26832 if (fixed_regs[CX_REG]
26833 || fixed_regs[DI_REG]
26834 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26835 return false;
26836 }
26837 return true;
26838 }
26839
26840 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26841 static enum stringop_alg
26842 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26843 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26844 bool memset, bool zero_memset, bool have_as,
26845 int *dynamic_check, bool *noalign, bool recur)
26846 {
26847 const struct stringop_algs *algs;
26848 bool optimize_for_speed;
26849 int max = 0;
26850 const struct processor_costs *cost;
26851 int i;
26852 bool any_alg_usable_p = false;
26853
26854 *noalign = false;
26855 *dynamic_check = -1;
26856
26857 /* Even if the string operation call is cold, we still might spend a lot
26858 of time processing large blocks. */
26859 if (optimize_function_for_size_p (cfun)
26860 || (optimize_insn_for_size_p ()
26861 && (max_size < 256
26862 || (expected_size != -1 && expected_size < 256))))
26863 optimize_for_speed = false;
26864 else
26865 optimize_for_speed = true;
26866
26867 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26868 if (memset)
26869 algs = &cost->memset[TARGET_64BIT != 0];
26870 else
26871 algs = &cost->memcpy[TARGET_64BIT != 0];
26872
26873 /* See maximal size for user defined algorithm. */
26874 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26875 {
26876 enum stringop_alg candidate = algs->size[i].alg;
26877 bool usable = alg_usable_p (candidate, memset, have_as);
26878 any_alg_usable_p |= usable;
26879
26880 if (candidate != libcall && candidate && usable)
26881 max = algs->size[i].max;
26882 }
26883
26884 /* If expected size is not known but max size is small enough
26885 so inline version is a win, set expected size into
26886 the range. */
26887 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26888 && expected_size == -1)
26889 expected_size = min_size / 2 + max_size / 2;
26890
26891 /* If user specified the algorithm, honor it if possible. */
26892 if (ix86_stringop_alg != no_stringop
26893 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26894 return ix86_stringop_alg;
26895 /* rep; movq or rep; movl is the smallest variant. */
26896 else if (!optimize_for_speed)
26897 {
26898 *noalign = true;
26899 if (!count || (count & 3) || (memset && !zero_memset))
26900 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26901 ? rep_prefix_1_byte : loop_1_byte;
26902 else
26903 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26904 ? rep_prefix_4_byte : loop;
26905 }
26906 /* Very tiny blocks are best handled via the loop, REP is expensive to
26907 setup. */
26908 else if (expected_size != -1 && expected_size < 4)
26909 return loop_1_byte;
26910 else if (expected_size != -1)
26911 {
26912 enum stringop_alg alg = libcall;
26913 bool alg_noalign = false;
26914 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26915 {
26916 /* We get here if the algorithms that were not libcall-based
26917 were rep-prefix based and we are unable to use rep prefixes
26918 based on global register usage. Break out of the loop and
26919 use the heuristic below. */
26920 if (algs->size[i].max == 0)
26921 break;
26922 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26923 {
26924 enum stringop_alg candidate = algs->size[i].alg;
26925
26926 if (candidate != libcall
26927 && alg_usable_p (candidate, memset, have_as))
26928 {
26929 alg = candidate;
26930 alg_noalign = algs->size[i].noalign;
26931 }
26932 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26933 last non-libcall inline algorithm. */
26934 if (TARGET_INLINE_ALL_STRINGOPS)
26935 {
26936 /* When the current size is best to be copied by a libcall,
26937 but we are still forced to inline, run the heuristic below
26938 that will pick code for medium sized blocks. */
26939 if (alg != libcall)
26940 {
26941 *noalign = alg_noalign;
26942 return alg;
26943 }
26944 else if (!any_alg_usable_p)
26945 break;
26946 }
26947 else if (alg_usable_p (candidate, memset, have_as))
26948 {
26949 *noalign = algs->size[i].noalign;
26950 return candidate;
26951 }
26952 }
26953 }
26954 }
26955 /* When asked to inline the call anyway, try to pick meaningful choice.
26956 We look for maximal size of block that is faster to copy by hand and
26957 take blocks of at most of that size guessing that average size will
26958 be roughly half of the block.
26959
26960 If this turns out to be bad, we might simply specify the preferred
26961 choice in ix86_costs. */
26962 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26963 && (algs->unknown_size == libcall
26964 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26965 {
26966 enum stringop_alg alg;
26967 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26968
26969 /* If there aren't any usable algorithms or if recursing already,
26970 then recursing on smaller sizes or same size isn't going to
26971 find anything. Just return the simple byte-at-a-time copy loop. */
26972 if (!any_alg_usable_p || recur)
26973 {
26974 /* Pick something reasonable. */
26975 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26976 *dynamic_check = 128;
26977 return loop_1_byte;
26978 }
26979 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26980 zero_memset, have_as, dynamic_check, noalign, true);
26981 gcc_assert (*dynamic_check == -1);
26982 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26983 *dynamic_check = max;
26984 else
26985 gcc_assert (alg != libcall);
26986 return alg;
26987 }
26988 return (alg_usable_p (algs->unknown_size, memset, have_as)
26989 ? algs->unknown_size : libcall);
26990 }
26991
26992 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26993 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26994 static int
26995 decide_alignment (int align,
26996 enum stringop_alg alg,
26997 int expected_size,
26998 machine_mode move_mode)
26999 {
27000 int desired_align = 0;
27001
27002 gcc_assert (alg != no_stringop);
27003
27004 if (alg == libcall)
27005 return 0;
27006 if (move_mode == VOIDmode)
27007 return 0;
27008
27009 desired_align = GET_MODE_SIZE (move_mode);
27010 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
27011 copying whole cacheline at once. */
27012 if (TARGET_PENTIUMPRO
27013 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
27014 desired_align = 8;
27015
27016 if (optimize_size)
27017 desired_align = 1;
27018 if (desired_align < align)
27019 desired_align = align;
27020 if (expected_size != -1 && expected_size < 4)
27021 desired_align = align;
27022
27023 return desired_align;
27024 }
27025
27026
27027 /* Helper function for memcpy. For QImode value 0xXY produce
27028 0xXYXYXYXY of wide specified by MODE. This is essentially
27029 a * 0x10101010, but we can do slightly better than
27030 synth_mult by unwinding the sequence by hand on CPUs with
27031 slow multiply. */
27032 static rtx
27033 promote_duplicated_reg (machine_mode mode, rtx val)
27034 {
27035 machine_mode valmode = GET_MODE (val);
27036 rtx tmp;
27037 int nops = mode == DImode ? 3 : 2;
27038
27039 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
27040 if (val == const0_rtx)
27041 return copy_to_mode_reg (mode, CONST0_RTX (mode));
27042 if (CONST_INT_P (val))
27043 {
27044 HOST_WIDE_INT v = INTVAL (val) & 255;
27045
27046 v |= v << 8;
27047 v |= v << 16;
27048 if (mode == DImode)
27049 v |= (v << 16) << 16;
27050 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
27051 }
27052
27053 if (valmode == VOIDmode)
27054 valmode = QImode;
27055 if (valmode != QImode)
27056 val = gen_lowpart (QImode, val);
27057 if (mode == QImode)
27058 return val;
27059 if (!TARGET_PARTIAL_REG_STALL)
27060 nops--;
27061 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
27062 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
27063 <= (ix86_cost->shift_const + ix86_cost->add) * nops
27064 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
27065 {
27066 rtx reg = convert_modes (mode, QImode, val, true);
27067 tmp = promote_duplicated_reg (mode, const1_rtx);
27068 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
27069 OPTAB_DIRECT);
27070 }
27071 else
27072 {
27073 rtx reg = convert_modes (mode, QImode, val, true);
27074
27075 if (!TARGET_PARTIAL_REG_STALL)
27076 if (mode == SImode)
27077 emit_insn (gen_insvsi_1 (reg, reg));
27078 else
27079 emit_insn (gen_insvdi_1 (reg, reg));
27080 else
27081 {
27082 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
27083 NULL, 1, OPTAB_DIRECT);
27084 reg =
27085 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27086 }
27087 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
27088 NULL, 1, OPTAB_DIRECT);
27089 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27090 if (mode == SImode)
27091 return reg;
27092 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
27093 NULL, 1, OPTAB_DIRECT);
27094 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27095 return reg;
27096 }
27097 }
27098
27099 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27100 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27101 alignment from ALIGN to DESIRED_ALIGN. */
27102 static rtx
27103 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
27104 int align)
27105 {
27106 rtx promoted_val;
27107
27108 if (TARGET_64BIT
27109 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
27110 promoted_val = promote_duplicated_reg (DImode, val);
27111 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
27112 promoted_val = promote_duplicated_reg (SImode, val);
27113 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
27114 promoted_val = promote_duplicated_reg (HImode, val);
27115 else
27116 promoted_val = val;
27117
27118 return promoted_val;
27119 }
27120
27121 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
27122 operations when profitable. The code depends upon architecture, block size
27123 and alignment, but always has one of the following overall structures:
27124
27125 Aligned move sequence:
27126
27127 1) Prologue guard: Conditional that jumps up to epilogues for small
27128 blocks that can be handled by epilogue alone. This is faster
27129 but also needed for correctness, since prologue assume the block
27130 is larger than the desired alignment.
27131
27132 Optional dynamic check for size and libcall for large
27133 blocks is emitted here too, with -minline-stringops-dynamically.
27134
27135 2) Prologue: copy first few bytes in order to get destination
27136 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
27137 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27138 copied. We emit either a jump tree on power of two sized
27139 blocks, or a byte loop.
27140
27141 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27142 with specified algorithm.
27143
27144 4) Epilogue: code copying tail of the block that is too small to be
27145 handled by main body (or up to size guarded by prologue guard).
27146
27147 Misaligned move sequence
27148
27149 1) missaligned move prologue/epilogue containing:
27150 a) Prologue handling small memory blocks and jumping to done_label
27151 (skipped if blocks are known to be large enough)
27152 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27153 needed by single possibly misaligned move
27154 (skipped if alignment is not needed)
27155 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27156
27157 2) Zero size guard dispatching to done_label, if needed
27158
27159 3) dispatch to library call, if needed,
27160
27161 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27162 with specified algorithm. */
27163 bool
27164 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27165 rtx align_exp, rtx expected_align_exp,
27166 rtx expected_size_exp, rtx min_size_exp,
27167 rtx max_size_exp, rtx probable_max_size_exp,
27168 bool issetmem)
27169 {
27170 rtx destreg;
27171 rtx srcreg = NULL;
27172 rtx_code_label *label = NULL;
27173 rtx tmp;
27174 rtx_code_label *jump_around_label = NULL;
27175 HOST_WIDE_INT align = 1;
27176 unsigned HOST_WIDE_INT count = 0;
27177 HOST_WIDE_INT expected_size = -1;
27178 int size_needed = 0, epilogue_size_needed;
27179 int desired_align = 0, align_bytes = 0;
27180 enum stringop_alg alg;
27181 rtx promoted_val = NULL;
27182 rtx vec_promoted_val = NULL;
27183 bool force_loopy_epilogue = false;
27184 int dynamic_check;
27185 bool need_zero_guard = false;
27186 bool noalign;
27187 machine_mode move_mode = VOIDmode;
27188 int unroll_factor = 1;
27189 /* TODO: Once value ranges are available, fill in proper data. */
27190 unsigned HOST_WIDE_INT min_size = 0;
27191 unsigned HOST_WIDE_INT max_size = -1;
27192 unsigned HOST_WIDE_INT probable_max_size = -1;
27193 bool misaligned_prologue_used = false;
27194 bool have_as;
27195
27196 if (CONST_INT_P (align_exp))
27197 align = INTVAL (align_exp);
27198 /* i386 can do misaligned access on reasonably increased cost. */
27199 if (CONST_INT_P (expected_align_exp)
27200 && INTVAL (expected_align_exp) > align)
27201 align = INTVAL (expected_align_exp);
27202 /* ALIGN is the minimum of destination and source alignment, but we care here
27203 just about destination alignment. */
27204 else if (!issetmem
27205 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27206 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27207
27208 if (CONST_INT_P (count_exp))
27209 {
27210 min_size = max_size = probable_max_size = count = expected_size
27211 = INTVAL (count_exp);
27212 /* When COUNT is 0, there is nothing to do. */
27213 if (!count)
27214 return true;
27215 }
27216 else
27217 {
27218 if (min_size_exp)
27219 min_size = INTVAL (min_size_exp);
27220 if (max_size_exp)
27221 max_size = INTVAL (max_size_exp);
27222 if (probable_max_size_exp)
27223 probable_max_size = INTVAL (probable_max_size_exp);
27224 if (CONST_INT_P (expected_size_exp))
27225 expected_size = INTVAL (expected_size_exp);
27226 }
27227
27228 /* Make sure we don't need to care about overflow later on. */
27229 if (count > (HOST_WIDE_INT_1U << 30))
27230 return false;
27231
27232 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27233 if (!issetmem)
27234 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27235
27236 /* Step 0: Decide on preferred algorithm, desired alignment and
27237 size of chunks to be copied by main loop. */
27238 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27239 issetmem,
27240 issetmem && val_exp == const0_rtx, have_as,
27241 &dynamic_check, &noalign, false);
27242 if (alg == libcall)
27243 return false;
27244 gcc_assert (alg != no_stringop);
27245
27246 /* For now vector-version of memset is generated only for memory zeroing, as
27247 creating of promoted vector value is very cheap in this case. */
27248 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27249 alg = unrolled_loop;
27250
27251 if (!count)
27252 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27253 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27254 if (!issetmem)
27255 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27256
27257 unroll_factor = 1;
27258 move_mode = word_mode;
27259 switch (alg)
27260 {
27261 case libcall:
27262 case no_stringop:
27263 case last_alg:
27264 gcc_unreachable ();
27265 case loop_1_byte:
27266 need_zero_guard = true;
27267 move_mode = QImode;
27268 break;
27269 case loop:
27270 need_zero_guard = true;
27271 break;
27272 case unrolled_loop:
27273 need_zero_guard = true;
27274 unroll_factor = (TARGET_64BIT ? 4 : 2);
27275 break;
27276 case vector_loop:
27277 need_zero_guard = true;
27278 unroll_factor = 4;
27279 /* Find the widest supported mode. */
27280 move_mode = word_mode;
27281 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
27282 != CODE_FOR_nothing)
27283 move_mode = GET_MODE_WIDER_MODE (move_mode);
27284
27285 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27286 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27287 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27288 {
27289 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27290 move_mode = mode_for_vector (word_mode, nunits);
27291 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27292 move_mode = word_mode;
27293 }
27294 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27295 break;
27296 case rep_prefix_8_byte:
27297 move_mode = DImode;
27298 break;
27299 case rep_prefix_4_byte:
27300 move_mode = SImode;
27301 break;
27302 case rep_prefix_1_byte:
27303 move_mode = QImode;
27304 break;
27305 }
27306 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27307 epilogue_size_needed = size_needed;
27308
27309 /* If we are going to call any library calls conditionally, make sure any
27310 pending stack adjustment happen before the first conditional branch,
27311 otherwise they will be emitted before the library call only and won't
27312 happen from the other branches. */
27313 if (dynamic_check != -1)
27314 do_pending_stack_adjust ();
27315
27316 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27317 if (!TARGET_ALIGN_STRINGOPS || noalign)
27318 align = desired_align;
27319
27320 /* Step 1: Prologue guard. */
27321
27322 /* Alignment code needs count to be in register. */
27323 if (CONST_INT_P (count_exp) && desired_align > align)
27324 {
27325 if (INTVAL (count_exp) > desired_align
27326 && INTVAL (count_exp) > size_needed)
27327 {
27328 align_bytes
27329 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27330 if (align_bytes <= 0)
27331 align_bytes = 0;
27332 else
27333 align_bytes = desired_align - align_bytes;
27334 }
27335 if (align_bytes == 0)
27336 count_exp = force_reg (counter_mode (count_exp), count_exp);
27337 }
27338 gcc_assert (desired_align >= 1 && align >= 1);
27339
27340 /* Misaligned move sequences handle both prologue and epilogue at once.
27341 Default code generation results in a smaller code for large alignments
27342 and also avoids redundant job when sizes are known precisely. */
27343 misaligned_prologue_used
27344 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27345 && MAX (desired_align, epilogue_size_needed) <= 32
27346 && desired_align <= epilogue_size_needed
27347 && ((desired_align > align && !align_bytes)
27348 || (!count && epilogue_size_needed > 1)));
27349
27350 /* Do the cheap promotion to allow better CSE across the
27351 main loop and epilogue (ie one load of the big constant in the
27352 front of all code.
27353 For now the misaligned move sequences do not have fast path
27354 without broadcasting. */
27355 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27356 {
27357 if (alg == vector_loop)
27358 {
27359 gcc_assert (val_exp == const0_rtx);
27360 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27361 promoted_val = promote_duplicated_reg_to_size (val_exp,
27362 GET_MODE_SIZE (word_mode),
27363 desired_align, align);
27364 }
27365 else
27366 {
27367 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27368 desired_align, align);
27369 }
27370 }
27371 /* Misaligned move sequences handles both prologues and epilogues at once.
27372 Default code generation results in smaller code for large alignments and
27373 also avoids redundant job when sizes are known precisely. */
27374 if (misaligned_prologue_used)
27375 {
27376 /* Misaligned move prologue handled small blocks by itself. */
27377 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27378 (dst, src, &destreg, &srcreg,
27379 move_mode, promoted_val, vec_promoted_val,
27380 &count_exp,
27381 &jump_around_label,
27382 desired_align < align
27383 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27384 desired_align, align, &min_size, dynamic_check, issetmem);
27385 if (!issetmem)
27386 src = change_address (src, BLKmode, srcreg);
27387 dst = change_address (dst, BLKmode, destreg);
27388 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27389 epilogue_size_needed = 0;
27390 if (need_zero_guard
27391 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27392 {
27393 /* It is possible that we copied enough so the main loop will not
27394 execute. */
27395 gcc_assert (size_needed > 1);
27396 if (jump_around_label == NULL_RTX)
27397 jump_around_label = gen_label_rtx ();
27398 emit_cmp_and_jump_insns (count_exp,
27399 GEN_INT (size_needed),
27400 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27401 if (expected_size == -1
27402 || expected_size < (desired_align - align) / 2 + size_needed)
27403 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27404 else
27405 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27406 }
27407 }
27408 /* Ensure that alignment prologue won't copy past end of block. */
27409 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
27410 {
27411 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
27412 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27413 Make sure it is power of 2. */
27414 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
27415
27416 /* To improve performance of small blocks, we jump around the VAL
27417 promoting mode. This mean that if the promoted VAL is not constant,
27418 we might not use it in the epilogue and have to use byte
27419 loop variant. */
27420 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
27421 force_loopy_epilogue = true;
27422 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27423 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27424 {
27425 /* If main algorithm works on QImode, no epilogue is needed.
27426 For small sizes just don't align anything. */
27427 if (size_needed == 1)
27428 desired_align = align;
27429 else
27430 goto epilogue;
27431 }
27432 else if (!count
27433 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27434 {
27435 label = gen_label_rtx ();
27436 emit_cmp_and_jump_insns (count_exp,
27437 GEN_INT (epilogue_size_needed),
27438 LTU, 0, counter_mode (count_exp), 1, label);
27439 if (expected_size == -1 || expected_size < epilogue_size_needed)
27440 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27441 else
27442 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27443 }
27444 }
27445
27446 /* Emit code to decide on runtime whether library call or inline should be
27447 used. */
27448 if (dynamic_check != -1)
27449 {
27450 if (!issetmem && CONST_INT_P (count_exp))
27451 {
27452 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
27453 {
27454 emit_block_copy_via_libcall (dst, src, count_exp);
27455 count_exp = const0_rtx;
27456 goto epilogue;
27457 }
27458 }
27459 else
27460 {
27461 rtx_code_label *hot_label = gen_label_rtx ();
27462 if (jump_around_label == NULL_RTX)
27463 jump_around_label = gen_label_rtx ();
27464 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
27465 LEU, 0, counter_mode (count_exp),
27466 1, hot_label);
27467 predict_jump (REG_BR_PROB_BASE * 90 / 100);
27468 if (issetmem)
27469 set_storage_via_libcall (dst, count_exp, val_exp);
27470 else
27471 emit_block_copy_via_libcall (dst, src, count_exp);
27472 emit_jump (jump_around_label);
27473 emit_label (hot_label);
27474 }
27475 }
27476
27477 /* Step 2: Alignment prologue. */
27478 /* Do the expensive promotion once we branched off the small blocks. */
27479 if (issetmem && !promoted_val)
27480 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27481 desired_align, align);
27482
27483 if (desired_align > align && !misaligned_prologue_used)
27484 {
27485 if (align_bytes == 0)
27486 {
27487 /* Except for the first move in prologue, we no longer know
27488 constant offset in aliasing info. It don't seems to worth
27489 the pain to maintain it for the first move, so throw away
27490 the info early. */
27491 dst = change_address (dst, BLKmode, destreg);
27492 if (!issetmem)
27493 src = change_address (src, BLKmode, srcreg);
27494 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
27495 promoted_val, vec_promoted_val,
27496 count_exp, align, desired_align,
27497 issetmem);
27498 /* At most desired_align - align bytes are copied. */
27499 if (min_size < (unsigned)(desired_align - align))
27500 min_size = 0;
27501 else
27502 min_size -= desired_align - align;
27503 }
27504 else
27505 {
27506 /* If we know how many bytes need to be stored before dst is
27507 sufficiently aligned, maintain aliasing info accurately. */
27508 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
27509 srcreg,
27510 promoted_val,
27511 vec_promoted_val,
27512 desired_align,
27513 align_bytes,
27514 issetmem);
27515
27516 count_exp = plus_constant (counter_mode (count_exp),
27517 count_exp, -align_bytes);
27518 count -= align_bytes;
27519 min_size -= align_bytes;
27520 max_size -= align_bytes;
27521 }
27522 if (need_zero_guard
27523 && min_size < (unsigned HOST_WIDE_INT) size_needed
27524 && (count < (unsigned HOST_WIDE_INT) size_needed
27525 || (align_bytes == 0
27526 && count < ((unsigned HOST_WIDE_INT) size_needed
27527 + desired_align - align))))
27528 {
27529 /* It is possible that we copied enough so the main loop will not
27530 execute. */
27531 gcc_assert (size_needed > 1);
27532 if (label == NULL_RTX)
27533 label = gen_label_rtx ();
27534 emit_cmp_and_jump_insns (count_exp,
27535 GEN_INT (size_needed),
27536 LTU, 0, counter_mode (count_exp), 1, label);
27537 if (expected_size == -1
27538 || expected_size < (desired_align - align) / 2 + size_needed)
27539 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27540 else
27541 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27542 }
27543 }
27544 if (label && size_needed == 1)
27545 {
27546 emit_label (label);
27547 LABEL_NUSES (label) = 1;
27548 label = NULL;
27549 epilogue_size_needed = 1;
27550 if (issetmem)
27551 promoted_val = val_exp;
27552 }
27553 else if (label == NULL_RTX && !misaligned_prologue_used)
27554 epilogue_size_needed = size_needed;
27555
27556 /* Step 3: Main loop. */
27557
27558 switch (alg)
27559 {
27560 case libcall:
27561 case no_stringop:
27562 case last_alg:
27563 gcc_unreachable ();
27564 case loop_1_byte:
27565 case loop:
27566 case unrolled_loop:
27567 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
27568 count_exp, move_mode, unroll_factor,
27569 expected_size, issetmem);
27570 break;
27571 case vector_loop:
27572 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
27573 vec_promoted_val, count_exp, move_mode,
27574 unroll_factor, expected_size, issetmem);
27575 break;
27576 case rep_prefix_8_byte:
27577 case rep_prefix_4_byte:
27578 case rep_prefix_1_byte:
27579 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
27580 val_exp, count_exp, move_mode, issetmem);
27581 break;
27582 }
27583 /* Adjust properly the offset of src and dest memory for aliasing. */
27584 if (CONST_INT_P (count_exp))
27585 {
27586 if (!issetmem)
27587 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
27588 (count / size_needed) * size_needed);
27589 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
27590 (count / size_needed) * size_needed);
27591 }
27592 else
27593 {
27594 if (!issetmem)
27595 src = change_address (src, BLKmode, srcreg);
27596 dst = change_address (dst, BLKmode, destreg);
27597 }
27598
27599 /* Step 4: Epilogue to copy the remaining bytes. */
27600 epilogue:
27601 if (label)
27602 {
27603 /* When the main loop is done, COUNT_EXP might hold original count,
27604 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27605 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27606 bytes. Compensate if needed. */
27607
27608 if (size_needed < epilogue_size_needed)
27609 {
27610 tmp =
27611 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
27612 GEN_INT (size_needed - 1), count_exp, 1,
27613 OPTAB_DIRECT);
27614 if (tmp != count_exp)
27615 emit_move_insn (count_exp, tmp);
27616 }
27617 emit_label (label);
27618 LABEL_NUSES (label) = 1;
27619 }
27620
27621 if (count_exp != const0_rtx && epilogue_size_needed > 1)
27622 {
27623 if (force_loopy_epilogue)
27624 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
27625 epilogue_size_needed);
27626 else
27627 {
27628 if (issetmem)
27629 expand_setmem_epilogue (dst, destreg, promoted_val,
27630 vec_promoted_val, count_exp,
27631 epilogue_size_needed);
27632 else
27633 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
27634 epilogue_size_needed);
27635 }
27636 }
27637 if (jump_around_label)
27638 emit_label (jump_around_label);
27639 return true;
27640 }
27641
27642
27643 /* Expand the appropriate insns for doing strlen if not just doing
27644 repnz; scasb
27645
27646 out = result, initialized with the start address
27647 align_rtx = alignment of the address.
27648 scratch = scratch register, initialized with the startaddress when
27649 not aligned, otherwise undefined
27650
27651 This is just the body. It needs the initializations mentioned above and
27652 some address computing at the end. These things are done in i386.md. */
27653
27654 static void
27655 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
27656 {
27657 int align;
27658 rtx tmp;
27659 rtx_code_label *align_2_label = NULL;
27660 rtx_code_label *align_3_label = NULL;
27661 rtx_code_label *align_4_label = gen_label_rtx ();
27662 rtx_code_label *end_0_label = gen_label_rtx ();
27663 rtx mem;
27664 rtx tmpreg = gen_reg_rtx (SImode);
27665 rtx scratch = gen_reg_rtx (SImode);
27666 rtx cmp;
27667
27668 align = 0;
27669 if (CONST_INT_P (align_rtx))
27670 align = INTVAL (align_rtx);
27671
27672 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27673
27674 /* Is there a known alignment and is it less than 4? */
27675 if (align < 4)
27676 {
27677 rtx scratch1 = gen_reg_rtx (Pmode);
27678 emit_move_insn (scratch1, out);
27679 /* Is there a known alignment and is it not 2? */
27680 if (align != 2)
27681 {
27682 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
27683 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
27684
27685 /* Leave just the 3 lower bits. */
27686 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
27687 NULL_RTX, 0, OPTAB_WIDEN);
27688
27689 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27690 Pmode, 1, align_4_label);
27691 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
27692 Pmode, 1, align_2_label);
27693 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
27694 Pmode, 1, align_3_label);
27695 }
27696 else
27697 {
27698 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27699 check if is aligned to 4 - byte. */
27700
27701 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
27702 NULL_RTX, 0, OPTAB_WIDEN);
27703
27704 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27705 Pmode, 1, align_4_label);
27706 }
27707
27708 mem = change_address (src, QImode, out);
27709
27710 /* Now compare the bytes. */
27711
27712 /* Compare the first n unaligned byte on a byte per byte basis. */
27713 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
27714 QImode, 1, end_0_label);
27715
27716 /* Increment the address. */
27717 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27718
27719 /* Not needed with an alignment of 2 */
27720 if (align != 2)
27721 {
27722 emit_label (align_2_label);
27723
27724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27725 end_0_label);
27726
27727 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27728
27729 emit_label (align_3_label);
27730 }
27731
27732 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27733 end_0_label);
27734
27735 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27736 }
27737
27738 /* Generate loop to check 4 bytes at a time. It is not a good idea to
27739 align this loop. It gives only huge programs, but does not help to
27740 speed up. */
27741 emit_label (align_4_label);
27742
27743 mem = change_address (src, SImode, out);
27744 emit_move_insn (scratch, mem);
27745 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
27746
27747 /* This formula yields a nonzero result iff one of the bytes is zero.
27748 This saves three branches inside loop and many cycles. */
27749
27750 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
27751 emit_insn (gen_one_cmplsi2 (scratch, scratch));
27752 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
27753 emit_insn (gen_andsi3 (tmpreg, tmpreg,
27754 gen_int_mode (0x80808080, SImode)));
27755 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
27756 align_4_label);
27757
27758 if (TARGET_CMOVE)
27759 {
27760 rtx reg = gen_reg_rtx (SImode);
27761 rtx reg2 = gen_reg_rtx (Pmode);
27762 emit_move_insn (reg, tmpreg);
27763 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
27764
27765 /* If zero is not in the first two bytes, move two bytes forward. */
27766 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27767 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27768 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27769 emit_insn (gen_rtx_SET (tmpreg,
27770 gen_rtx_IF_THEN_ELSE (SImode, tmp,
27771 reg,
27772 tmpreg)));
27773 /* Emit lea manually to avoid clobbering of flags. */
27774 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
27775
27776 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27777 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27778 emit_insn (gen_rtx_SET (out,
27779 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
27780 reg2,
27781 out)));
27782 }
27783 else
27784 {
27785 rtx_code_label *end_2_label = gen_label_rtx ();
27786 /* Is zero in the first two bytes? */
27787
27788 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27789 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27790 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
27791 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27792 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
27793 pc_rtx);
27794 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
27795 JUMP_LABEL (tmp) = end_2_label;
27796
27797 /* Not in the first two. Move two bytes forward. */
27798 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
27799 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
27800
27801 emit_label (end_2_label);
27802
27803 }
27804
27805 /* Avoid branch in fixing the byte. */
27806 tmpreg = gen_lowpart (QImode, tmpreg);
27807 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27808 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27809 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27810 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27811
27812 emit_label (end_0_label);
27813 }
27814
27815 /* Expand strlen. */
27816
27817 bool
27818 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27819 {
27820 rtx addr, scratch1, scratch2, scratch3, scratch4;
27821
27822 /* The generic case of strlen expander is long. Avoid it's
27823 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27824
27825 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27826 && !TARGET_INLINE_ALL_STRINGOPS
27827 && !optimize_insn_for_size_p ()
27828 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27829 return false;
27830
27831 addr = force_reg (Pmode, XEXP (src, 0));
27832 scratch1 = gen_reg_rtx (Pmode);
27833
27834 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27835 && !optimize_insn_for_size_p ())
27836 {
27837 /* Well it seems that some optimizer does not combine a call like
27838 foo(strlen(bar), strlen(bar));
27839 when the move and the subtraction is done here. It does calculate
27840 the length just once when these instructions are done inside of
27841 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27842 often used and I use one fewer register for the lifetime of
27843 output_strlen_unroll() this is better. */
27844
27845 emit_move_insn (out, addr);
27846
27847 ix86_expand_strlensi_unroll_1 (out, src, align);
27848
27849 /* strlensi_unroll_1 returns the address of the zero at the end of
27850 the string, like memchr(), so compute the length by subtracting
27851 the start address. */
27852 emit_insn (ix86_gen_sub3 (out, out, addr));
27853 }
27854 else
27855 {
27856 rtx unspec;
27857
27858 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27859 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27860 return false;
27861 /* Can't use this for non-default address spaces. */
27862 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27863 return false;
27864
27865 scratch2 = gen_reg_rtx (Pmode);
27866 scratch3 = gen_reg_rtx (Pmode);
27867 scratch4 = force_reg (Pmode, constm1_rtx);
27868
27869 emit_move_insn (scratch3, addr);
27870 eoschar = force_reg (QImode, eoschar);
27871
27872 src = replace_equiv_address_nv (src, scratch3);
27873
27874 /* If .md starts supporting :P, this can be done in .md. */
27875 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27876 scratch4), UNSPEC_SCAS);
27877 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27878 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27879 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27880 }
27881 return true;
27882 }
27883
27884 /* For given symbol (function) construct code to compute address of it's PLT
27885 entry in large x86-64 PIC model. */
27886 static rtx
27887 construct_plt_address (rtx symbol)
27888 {
27889 rtx tmp, unspec;
27890
27891 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27892 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27893 gcc_assert (Pmode == DImode);
27894
27895 tmp = gen_reg_rtx (Pmode);
27896 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27897
27898 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27899 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27900 return tmp;
27901 }
27902
27903 rtx
27904 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27905 rtx callarg2,
27906 rtx pop, bool sibcall)
27907 {
27908 rtx vec[3];
27909 rtx use = NULL, call;
27910 unsigned int vec_len = 0;
27911 tree fndecl;
27912
27913 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27914 {
27915 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
27916 if (fndecl
27917 && (lookup_attribute ("interrupt",
27918 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
27919 error ("interrupt service routine can't be called directly");
27920 }
27921 else
27922 fndecl = NULL_TREE;
27923
27924 if (pop == const0_rtx)
27925 pop = NULL;
27926 gcc_assert (!TARGET_64BIT || !pop);
27927
27928 if (TARGET_MACHO && !TARGET_64BIT)
27929 {
27930 #if TARGET_MACHO
27931 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27932 fnaddr = machopic_indirect_call_target (fnaddr);
27933 #endif
27934 }
27935 else
27936 {
27937 /* Static functions and indirect calls don't need the pic register. Also,
27938 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27939 it an indirect call. */
27940 rtx addr = XEXP (fnaddr, 0);
27941 if (flag_pic
27942 && GET_CODE (addr) == SYMBOL_REF
27943 && !SYMBOL_REF_LOCAL_P (addr))
27944 {
27945 if (flag_plt
27946 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27947 || !lookup_attribute ("noplt",
27948 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27949 {
27950 if (!TARGET_64BIT
27951 || (ix86_cmodel == CM_LARGE_PIC
27952 && DEFAULT_ABI != MS_ABI))
27953 {
27954 use_reg (&use, gen_rtx_REG (Pmode,
27955 REAL_PIC_OFFSET_TABLE_REGNUM));
27956 if (ix86_use_pseudo_pic_reg ())
27957 emit_move_insn (gen_rtx_REG (Pmode,
27958 REAL_PIC_OFFSET_TABLE_REGNUM),
27959 pic_offset_table_rtx);
27960 }
27961 }
27962 else if (!TARGET_PECOFF && !TARGET_MACHO)
27963 {
27964 if (TARGET_64BIT)
27965 {
27966 fnaddr = gen_rtx_UNSPEC (Pmode,
27967 gen_rtvec (1, addr),
27968 UNSPEC_GOTPCREL);
27969 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27970 }
27971 else
27972 {
27973 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27974 UNSPEC_GOT);
27975 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27976 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27977 fnaddr);
27978 }
27979 fnaddr = gen_const_mem (Pmode, fnaddr);
27980 /* Pmode may not be the same as word_mode for x32, which
27981 doesn't support indirect branch via 32-bit memory slot.
27982 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27983 indirect branch via x32 GOT slot is OK. */
27984 if (GET_MODE (fnaddr) != word_mode)
27985 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27986 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27987 }
27988 }
27989 }
27990
27991 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27992 parameters passed in vector registers. */
27993 if (TARGET_64BIT
27994 && (INTVAL (callarg2) > 0
27995 || (INTVAL (callarg2) == 0
27996 && (TARGET_SSE || !flag_skip_rax_setup))))
27997 {
27998 rtx al = gen_rtx_REG (QImode, AX_REG);
27999 emit_move_insn (al, callarg2);
28000 use_reg (&use, al);
28001 }
28002
28003 if (ix86_cmodel == CM_LARGE_PIC
28004 && !TARGET_PECOFF
28005 && MEM_P (fnaddr)
28006 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
28007 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
28008 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
28009 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
28010 branch via x32 GOT slot is OK. */
28011 else if (!(TARGET_X32
28012 && MEM_P (fnaddr)
28013 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
28014 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
28015 && (sibcall
28016 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
28017 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
28018 {
28019 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
28020 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
28021 }
28022
28023 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
28024
28025 if (retval)
28026 {
28027 /* We should add bounds as destination register in case
28028 pointer with bounds may be returned. */
28029 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
28030 {
28031 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
28032 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
28033 if (GET_CODE (retval) == PARALLEL)
28034 {
28035 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
28036 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
28037 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
28038 retval = chkp_join_splitted_slot (retval, par);
28039 }
28040 else
28041 {
28042 retval = gen_rtx_PARALLEL (VOIDmode,
28043 gen_rtvec (3, retval, b0, b1));
28044 chkp_put_regs_to_expr_list (retval);
28045 }
28046 }
28047
28048 call = gen_rtx_SET (retval, call);
28049 }
28050 vec[vec_len++] = call;
28051
28052 if (pop)
28053 {
28054 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
28055 pop = gen_rtx_SET (stack_pointer_rtx, pop);
28056 vec[vec_len++] = pop;
28057 }
28058
28059 if (cfun->machine->no_caller_saved_registers
28060 && (!fndecl
28061 || (!TREE_THIS_VOLATILE (fndecl)
28062 && !lookup_attribute ("no_caller_saved_registers",
28063 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
28064 {
28065 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
28066 bool is_64bit_ms_abi = (TARGET_64BIT
28067 && ix86_function_abi (fndecl) == MS_ABI);
28068 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
28069
28070 /* If there are no caller-saved registers, add all registers
28071 that are clobbered by the call which returns. */
28072 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28073 if (!fixed_regs[i]
28074 && (ix86_call_used_regs[i] == 1
28075 || (ix86_call_used_regs[i] & c_mask))
28076 && !STACK_REGNO_P (i)
28077 && !MMX_REGNO_P (i))
28078 clobber_reg (&use,
28079 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
28080 }
28081 else if (TARGET_64BIT_MS_ABI
28082 && (!callarg2 || INTVAL (callarg2) != -2))
28083 {
28084 int const cregs_size
28085 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
28086 int i;
28087
28088 for (i = 0; i < cregs_size; i++)
28089 {
28090 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
28091 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
28092
28093 clobber_reg (&use, gen_rtx_REG (mode, regno));
28094 }
28095 }
28096
28097 if (vec_len > 1)
28098 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
28099 call = emit_call_insn (call);
28100 if (use)
28101 CALL_INSN_FUNCTION_USAGE (call) = use;
28102
28103 return call;
28104 }
28105
28106 /* Return true if the function being called was marked with attribute
28107 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
28108 to handle the non-PIC case in the backend because there is no easy
28109 interface for the front-end to force non-PLT calls to use the GOT.
28110 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28111 to call the function marked "noplt" indirectly. */
28112
28113 static bool
28114 ix86_nopic_noplt_attribute_p (rtx call_op)
28115 {
28116 if (flag_pic || ix86_cmodel == CM_LARGE
28117 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
28118 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
28119 || SYMBOL_REF_LOCAL_P (call_op))
28120 return false;
28121
28122 tree symbol_decl = SYMBOL_REF_DECL (call_op);
28123
28124 if (!flag_plt
28125 || (symbol_decl != NULL_TREE
28126 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
28127 return true;
28128
28129 return false;
28130 }
28131
28132 /* Output the assembly for a call instruction. */
28133
28134 const char *
28135 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
28136 {
28137 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
28138 bool seh_nop_p = false;
28139 const char *xasm;
28140
28141 if (SIBLING_CALL_P (insn))
28142 {
28143 if (direct_p)
28144 {
28145 if (ix86_nopic_noplt_attribute_p (call_op))
28146 {
28147 if (TARGET_64BIT)
28148 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28149 else
28150 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28151 }
28152 else
28153 xasm = "%!jmp\t%P0";
28154 }
28155 /* SEH epilogue detection requires the indirect branch case
28156 to include REX.W. */
28157 else if (TARGET_SEH)
28158 xasm = "%!rex.W jmp\t%A0";
28159 else
28160 xasm = "%!jmp\t%A0";
28161
28162 output_asm_insn (xasm, &call_op);
28163 return "";
28164 }
28165
28166 /* SEH unwinding can require an extra nop to be emitted in several
28167 circumstances. Determine if we have one of those. */
28168 if (TARGET_SEH)
28169 {
28170 rtx_insn *i;
28171
28172 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
28173 {
28174 /* If we get to another real insn, we don't need the nop. */
28175 if (INSN_P (i))
28176 break;
28177
28178 /* If we get to the epilogue note, prevent a catch region from
28179 being adjacent to the standard epilogue sequence. If non-
28180 call-exceptions, we'll have done this during epilogue emission. */
28181 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
28182 && !flag_non_call_exceptions
28183 && !can_throw_internal (insn))
28184 {
28185 seh_nop_p = true;
28186 break;
28187 }
28188 }
28189
28190 /* If we didn't find a real insn following the call, prevent the
28191 unwinder from looking into the next function. */
28192 if (i == NULL)
28193 seh_nop_p = true;
28194 }
28195
28196 if (direct_p)
28197 {
28198 if (ix86_nopic_noplt_attribute_p (call_op))
28199 {
28200 if (TARGET_64BIT)
28201 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28202 else
28203 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28204 }
28205 else
28206 xasm = "%!call\t%P0";
28207 }
28208 else
28209 xasm = "%!call\t%A0";
28210
28211 output_asm_insn (xasm, &call_op);
28212
28213 if (seh_nop_p)
28214 return "nop";
28215
28216 return "";
28217 }
28218 \f
28219 /* Clear stack slot assignments remembered from previous functions.
28220 This is called from INIT_EXPANDERS once before RTL is emitted for each
28221 function. */
28222
28223 static struct machine_function *
28224 ix86_init_machine_status (void)
28225 {
28226 struct machine_function *f;
28227
28228 f = ggc_cleared_alloc<machine_function> ();
28229 f->use_fast_prologue_epilogue_nregs = -1;
28230 f->call_abi = ix86_abi;
28231
28232 return f;
28233 }
28234
28235 /* Return a MEM corresponding to a stack slot with mode MODE.
28236 Allocate a new slot if necessary.
28237
28238 The RTL for a function can have several slots available: N is
28239 which slot to use. */
28240
28241 rtx
28242 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
28243 {
28244 struct stack_local_entry *s;
28245
28246 gcc_assert (n < MAX_386_STACK_LOCALS);
28247
28248 for (s = ix86_stack_locals; s; s = s->next)
28249 if (s->mode == mode && s->n == n)
28250 return validize_mem (copy_rtx (s->rtl));
28251
28252 s = ggc_alloc<stack_local_entry> ();
28253 s->n = n;
28254 s->mode = mode;
28255 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
28256
28257 s->next = ix86_stack_locals;
28258 ix86_stack_locals = s;
28259 return validize_mem (copy_rtx (s->rtl));
28260 }
28261
28262 static void
28263 ix86_instantiate_decls (void)
28264 {
28265 struct stack_local_entry *s;
28266
28267 for (s = ix86_stack_locals; s; s = s->next)
28268 if (s->rtl != NULL_RTX)
28269 instantiate_decl_rtl (s->rtl);
28270 }
28271 \f
28272 /* Return the number used for encoding REG, in the range 0..7. */
28273
28274 static int
28275 reg_encoded_number (rtx reg)
28276 {
28277 unsigned regno = REGNO (reg);
28278 switch (regno)
28279 {
28280 case AX_REG:
28281 return 0;
28282 case CX_REG:
28283 return 1;
28284 case DX_REG:
28285 return 2;
28286 case BX_REG:
28287 return 3;
28288 case SP_REG:
28289 return 4;
28290 case BP_REG:
28291 return 5;
28292 case SI_REG:
28293 return 6;
28294 case DI_REG:
28295 return 7;
28296 default:
28297 break;
28298 }
28299 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
28300 return regno - FIRST_STACK_REG;
28301 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
28302 return regno - FIRST_SSE_REG;
28303 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
28304 return regno - FIRST_MMX_REG;
28305 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
28306 return regno - FIRST_REX_SSE_REG;
28307 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
28308 return regno - FIRST_REX_INT_REG;
28309 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
28310 return regno - FIRST_MASK_REG;
28311 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
28312 return regno - FIRST_BND_REG;
28313 return -1;
28314 }
28315
28316 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
28317 in its encoding if it could be relevant for ROP mitigation, otherwise
28318 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
28319 used for calculating it into them. */
28320
28321 static int
28322 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
28323 int *popno0 = 0, int *popno1 = 0)
28324 {
28325 if (asm_noperands (PATTERN (insn)) >= 0)
28326 return -1;
28327 int has_modrm = get_attr_modrm (insn);
28328 if (!has_modrm)
28329 return -1;
28330 enum attr_modrm_class cls = get_attr_modrm_class (insn);
28331 rtx op0, op1;
28332 switch (cls)
28333 {
28334 case MODRM_CLASS_OP02:
28335 gcc_assert (noperands >= 3);
28336 if (popno0)
28337 {
28338 *popno0 = 0;
28339 *popno1 = 2;
28340 }
28341 op0 = operands[0];
28342 op1 = operands[2];
28343 break;
28344 case MODRM_CLASS_OP01:
28345 gcc_assert (noperands >= 2);
28346 if (popno0)
28347 {
28348 *popno0 = 0;
28349 *popno1 = 1;
28350 }
28351 op0 = operands[0];
28352 op1 = operands[1];
28353 break;
28354 default:
28355 return -1;
28356 }
28357 if (REG_P (op0) && REG_P (op1))
28358 {
28359 int enc0 = reg_encoded_number (op0);
28360 int enc1 = reg_encoded_number (op1);
28361 return 0xc0 + (enc1 << 3) + enc0;
28362 }
28363 return -1;
28364 }
28365
28366 /* Check whether x86 address PARTS is a pc-relative address. */
28367
28368 static bool
28369 rip_relative_addr_p (struct ix86_address *parts)
28370 {
28371 rtx base, index, disp;
28372
28373 base = parts->base;
28374 index = parts->index;
28375 disp = parts->disp;
28376
28377 if (disp && !base && !index)
28378 {
28379 if (TARGET_64BIT)
28380 {
28381 rtx symbol = disp;
28382
28383 if (GET_CODE (disp) == CONST)
28384 symbol = XEXP (disp, 0);
28385 if (GET_CODE (symbol) == PLUS
28386 && CONST_INT_P (XEXP (symbol, 1)))
28387 symbol = XEXP (symbol, 0);
28388
28389 if (GET_CODE (symbol) == LABEL_REF
28390 || (GET_CODE (symbol) == SYMBOL_REF
28391 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
28392 || (GET_CODE (symbol) == UNSPEC
28393 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
28394 || XINT (symbol, 1) == UNSPEC_PCREL
28395 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
28396 return true;
28397 }
28398 }
28399 return false;
28400 }
28401
28402 /* Calculate the length of the memory address in the instruction encoding.
28403 Includes addr32 prefix, does not include the one-byte modrm, opcode,
28404 or other prefixes. We never generate addr32 prefix for LEA insn. */
28405
28406 int
28407 memory_address_length (rtx addr, bool lea)
28408 {
28409 struct ix86_address parts;
28410 rtx base, index, disp;
28411 int len;
28412 int ok;
28413
28414 if (GET_CODE (addr) == PRE_DEC
28415 || GET_CODE (addr) == POST_INC
28416 || GET_CODE (addr) == PRE_MODIFY
28417 || GET_CODE (addr) == POST_MODIFY)
28418 return 0;
28419
28420 ok = ix86_decompose_address (addr, &parts);
28421 gcc_assert (ok);
28422
28423 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
28424
28425 /* If this is not LEA instruction, add the length of addr32 prefix. */
28426 if (TARGET_64BIT && !lea
28427 && (SImode_address_operand (addr, VOIDmode)
28428 || (parts.base && GET_MODE (parts.base) == SImode)
28429 || (parts.index && GET_MODE (parts.index) == SImode)))
28430 len++;
28431
28432 base = parts.base;
28433 index = parts.index;
28434 disp = parts.disp;
28435
28436 if (base && SUBREG_P (base))
28437 base = SUBREG_REG (base);
28438 if (index && SUBREG_P (index))
28439 index = SUBREG_REG (index);
28440
28441 gcc_assert (base == NULL_RTX || REG_P (base));
28442 gcc_assert (index == NULL_RTX || REG_P (index));
28443
28444 /* Rule of thumb:
28445 - esp as the base always wants an index,
28446 - ebp as the base always wants a displacement,
28447 - r12 as the base always wants an index,
28448 - r13 as the base always wants a displacement. */
28449
28450 /* Register Indirect. */
28451 if (base && !index && !disp)
28452 {
28453 /* esp (for its index) and ebp (for its displacement) need
28454 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
28455 code. */
28456 if (base == arg_pointer_rtx
28457 || base == frame_pointer_rtx
28458 || REGNO (base) == SP_REG
28459 || REGNO (base) == BP_REG
28460 || REGNO (base) == R12_REG
28461 || REGNO (base) == R13_REG)
28462 len++;
28463 }
28464
28465 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
28466 is not disp32, but disp32(%rip), so for disp32
28467 SIB byte is needed, unless print_operand_address
28468 optimizes it into disp32(%rip) or (%rip) is implied
28469 by UNSPEC. */
28470 else if (disp && !base && !index)
28471 {
28472 len += 4;
28473 if (rip_relative_addr_p (&parts))
28474 len++;
28475 }
28476 else
28477 {
28478 /* Find the length of the displacement constant. */
28479 if (disp)
28480 {
28481 if (base && satisfies_constraint_K (disp))
28482 len += 1;
28483 else
28484 len += 4;
28485 }
28486 /* ebp always wants a displacement. Similarly r13. */
28487 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
28488 len++;
28489
28490 /* An index requires the two-byte modrm form.... */
28491 if (index
28492 /* ...like esp (or r12), which always wants an index. */
28493 || base == arg_pointer_rtx
28494 || base == frame_pointer_rtx
28495 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
28496 len++;
28497 }
28498
28499 return len;
28500 }
28501
28502 /* Compute default value for "length_immediate" attribute. When SHORTFORM
28503 is set, expect that insn have 8bit immediate alternative. */
28504 int
28505 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
28506 {
28507 int len = 0;
28508 int i;
28509 extract_insn_cached (insn);
28510 for (i = recog_data.n_operands - 1; i >= 0; --i)
28511 if (CONSTANT_P (recog_data.operand[i]))
28512 {
28513 enum attr_mode mode = get_attr_mode (insn);
28514
28515 gcc_assert (!len);
28516 if (shortform && CONST_INT_P (recog_data.operand[i]))
28517 {
28518 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
28519 switch (mode)
28520 {
28521 case MODE_QI:
28522 len = 1;
28523 continue;
28524 case MODE_HI:
28525 ival = trunc_int_for_mode (ival, HImode);
28526 break;
28527 case MODE_SI:
28528 ival = trunc_int_for_mode (ival, SImode);
28529 break;
28530 default:
28531 break;
28532 }
28533 if (IN_RANGE (ival, -128, 127))
28534 {
28535 len = 1;
28536 continue;
28537 }
28538 }
28539 switch (mode)
28540 {
28541 case MODE_QI:
28542 len = 1;
28543 break;
28544 case MODE_HI:
28545 len = 2;
28546 break;
28547 case MODE_SI:
28548 len = 4;
28549 break;
28550 /* Immediates for DImode instructions are encoded
28551 as 32bit sign extended values. */
28552 case MODE_DI:
28553 len = 4;
28554 break;
28555 default:
28556 fatal_insn ("unknown insn mode", insn);
28557 }
28558 }
28559 return len;
28560 }
28561
28562 /* Compute default value for "length_address" attribute. */
28563 int
28564 ix86_attr_length_address_default (rtx_insn *insn)
28565 {
28566 int i;
28567
28568 if (get_attr_type (insn) == TYPE_LEA)
28569 {
28570 rtx set = PATTERN (insn), addr;
28571
28572 if (GET_CODE (set) == PARALLEL)
28573 set = XVECEXP (set, 0, 0);
28574
28575 gcc_assert (GET_CODE (set) == SET);
28576
28577 addr = SET_SRC (set);
28578
28579 return memory_address_length (addr, true);
28580 }
28581
28582 extract_insn_cached (insn);
28583 for (i = recog_data.n_operands - 1; i >= 0; --i)
28584 {
28585 rtx op = recog_data.operand[i];
28586 if (MEM_P (op))
28587 {
28588 constrain_operands_cached (insn, reload_completed);
28589 if (which_alternative != -1)
28590 {
28591 const char *constraints = recog_data.constraints[i];
28592 int alt = which_alternative;
28593
28594 while (*constraints == '=' || *constraints == '+')
28595 constraints++;
28596 while (alt-- > 0)
28597 while (*constraints++ != ',')
28598 ;
28599 /* Skip ignored operands. */
28600 if (*constraints == 'X')
28601 continue;
28602 }
28603
28604 int len = memory_address_length (XEXP (op, 0), false);
28605
28606 /* Account for segment prefix for non-default addr spaces. */
28607 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
28608 len++;
28609
28610 return len;
28611 }
28612 }
28613 return 0;
28614 }
28615
28616 /* Compute default value for "length_vex" attribute. It includes
28617 2 or 3 byte VEX prefix and 1 opcode byte. */
28618
28619 int
28620 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
28621 bool has_vex_w)
28622 {
28623 int i;
28624
28625 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
28626 byte VEX prefix. */
28627 if (!has_0f_opcode || has_vex_w)
28628 return 3 + 1;
28629
28630 /* We can always use 2 byte VEX prefix in 32bit. */
28631 if (!TARGET_64BIT)
28632 return 2 + 1;
28633
28634 extract_insn_cached (insn);
28635
28636 for (i = recog_data.n_operands - 1; i >= 0; --i)
28637 if (REG_P (recog_data.operand[i]))
28638 {
28639 /* REX.W bit uses 3 byte VEX prefix. */
28640 if (GET_MODE (recog_data.operand[i]) == DImode
28641 && GENERAL_REG_P (recog_data.operand[i]))
28642 return 3 + 1;
28643 }
28644 else
28645 {
28646 /* REX.X or REX.B bits use 3 byte VEX prefix. */
28647 if (MEM_P (recog_data.operand[i])
28648 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
28649 return 3 + 1;
28650 }
28651
28652 return 2 + 1;
28653 }
28654 \f
28655 /* Return the maximum number of instructions a cpu can issue. */
28656
28657 static int
28658 ix86_issue_rate (void)
28659 {
28660 switch (ix86_tune)
28661 {
28662 case PROCESSOR_PENTIUM:
28663 case PROCESSOR_LAKEMONT:
28664 case PROCESSOR_BONNELL:
28665 case PROCESSOR_SILVERMONT:
28666 case PROCESSOR_KNL:
28667 case PROCESSOR_INTEL:
28668 case PROCESSOR_K6:
28669 case PROCESSOR_BTVER2:
28670 case PROCESSOR_PENTIUM4:
28671 case PROCESSOR_NOCONA:
28672 return 2;
28673
28674 case PROCESSOR_PENTIUMPRO:
28675 case PROCESSOR_ATHLON:
28676 case PROCESSOR_K8:
28677 case PROCESSOR_AMDFAM10:
28678 case PROCESSOR_GENERIC:
28679 case PROCESSOR_BTVER1:
28680 return 3;
28681
28682 case PROCESSOR_BDVER1:
28683 case PROCESSOR_BDVER2:
28684 case PROCESSOR_BDVER3:
28685 case PROCESSOR_BDVER4:
28686 case PROCESSOR_ZNVER1:
28687 case PROCESSOR_CORE2:
28688 case PROCESSOR_NEHALEM:
28689 case PROCESSOR_SANDYBRIDGE:
28690 case PROCESSOR_HASWELL:
28691 return 4;
28692
28693 default:
28694 return 1;
28695 }
28696 }
28697
28698 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
28699 by DEP_INSN and nothing set by DEP_INSN. */
28700
28701 static bool
28702 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
28703 {
28704 rtx set, set2;
28705
28706 /* Simplify the test for uninteresting insns. */
28707 if (insn_type != TYPE_SETCC
28708 && insn_type != TYPE_ICMOV
28709 && insn_type != TYPE_FCMOV
28710 && insn_type != TYPE_IBR)
28711 return false;
28712
28713 if ((set = single_set (dep_insn)) != 0)
28714 {
28715 set = SET_DEST (set);
28716 set2 = NULL_RTX;
28717 }
28718 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
28719 && XVECLEN (PATTERN (dep_insn), 0) == 2
28720 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
28721 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
28722 {
28723 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
28724 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
28725 }
28726 else
28727 return false;
28728
28729 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
28730 return false;
28731
28732 /* This test is true if the dependent insn reads the flags but
28733 not any other potentially set register. */
28734 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
28735 return false;
28736
28737 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
28738 return false;
28739
28740 return true;
28741 }
28742
28743 /* Return true iff USE_INSN has a memory address with operands set by
28744 SET_INSN. */
28745
28746 bool
28747 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
28748 {
28749 int i;
28750 extract_insn_cached (use_insn);
28751 for (i = recog_data.n_operands - 1; i >= 0; --i)
28752 if (MEM_P (recog_data.operand[i]))
28753 {
28754 rtx addr = XEXP (recog_data.operand[i], 0);
28755 return modified_in_p (addr, set_insn) != 0;
28756 }
28757 return false;
28758 }
28759
28760 /* Helper function for exact_store_load_dependency.
28761 Return true if addr is found in insn. */
28762 static bool
28763 exact_dependency_1 (rtx addr, rtx insn)
28764 {
28765 enum rtx_code code;
28766 const char *format_ptr;
28767 int i, j;
28768
28769 code = GET_CODE (insn);
28770 switch (code)
28771 {
28772 case MEM:
28773 if (rtx_equal_p (addr, insn))
28774 return true;
28775 break;
28776 case REG:
28777 CASE_CONST_ANY:
28778 case SYMBOL_REF:
28779 case CODE_LABEL:
28780 case PC:
28781 case CC0:
28782 case EXPR_LIST:
28783 return false;
28784 default:
28785 break;
28786 }
28787
28788 format_ptr = GET_RTX_FORMAT (code);
28789 for (i = 0; i < GET_RTX_LENGTH (code); i++)
28790 {
28791 switch (*format_ptr++)
28792 {
28793 case 'e':
28794 if (exact_dependency_1 (addr, XEXP (insn, i)))
28795 return true;
28796 break;
28797 case 'E':
28798 for (j = 0; j < XVECLEN (insn, i); j++)
28799 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
28800 return true;
28801 break;
28802 }
28803 }
28804 return false;
28805 }
28806
28807 /* Return true if there exists exact dependency for store & load, i.e.
28808 the same memory address is used in them. */
28809 static bool
28810 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
28811 {
28812 rtx set1, set2;
28813
28814 set1 = single_set (store);
28815 if (!set1)
28816 return false;
28817 if (!MEM_P (SET_DEST (set1)))
28818 return false;
28819 set2 = single_set (load);
28820 if (!set2)
28821 return false;
28822 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
28823 return true;
28824 return false;
28825 }
28826
28827 static int
28828 ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
28829 unsigned int)
28830 {
28831 enum attr_type insn_type, dep_insn_type;
28832 enum attr_memory memory;
28833 rtx set, set2;
28834 int dep_insn_code_number;
28835
28836 /* Anti and output dependencies have zero cost on all CPUs. */
28837 if (dep_type != 0)
28838 return 0;
28839
28840 dep_insn_code_number = recog_memoized (dep_insn);
28841
28842 /* If we can't recognize the insns, we can't really do anything. */
28843 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
28844 return cost;
28845
28846 insn_type = get_attr_type (insn);
28847 dep_insn_type = get_attr_type (dep_insn);
28848
28849 switch (ix86_tune)
28850 {
28851 case PROCESSOR_PENTIUM:
28852 case PROCESSOR_LAKEMONT:
28853 /* Address Generation Interlock adds a cycle of latency. */
28854 if (insn_type == TYPE_LEA)
28855 {
28856 rtx addr = PATTERN (insn);
28857
28858 if (GET_CODE (addr) == PARALLEL)
28859 addr = XVECEXP (addr, 0, 0);
28860
28861 gcc_assert (GET_CODE (addr) == SET);
28862
28863 addr = SET_SRC (addr);
28864 if (modified_in_p (addr, dep_insn))
28865 cost += 1;
28866 }
28867 else if (ix86_agi_dependent (dep_insn, insn))
28868 cost += 1;
28869
28870 /* ??? Compares pair with jump/setcc. */
28871 if (ix86_flags_dependent (insn, dep_insn, insn_type))
28872 cost = 0;
28873
28874 /* Floating point stores require value to be ready one cycle earlier. */
28875 if (insn_type == TYPE_FMOV
28876 && get_attr_memory (insn) == MEMORY_STORE
28877 && !ix86_agi_dependent (dep_insn, insn))
28878 cost += 1;
28879 break;
28880
28881 case PROCESSOR_PENTIUMPRO:
28882 /* INT->FP conversion is expensive. */
28883 if (get_attr_fp_int_src (dep_insn))
28884 cost += 5;
28885
28886 /* There is one cycle extra latency between an FP op and a store. */
28887 if (insn_type == TYPE_FMOV
28888 && (set = single_set (dep_insn)) != NULL_RTX
28889 && (set2 = single_set (insn)) != NULL_RTX
28890 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
28891 && MEM_P (SET_DEST (set2)))
28892 cost += 1;
28893
28894 memory = get_attr_memory (insn);
28895
28896 /* Show ability of reorder buffer to hide latency of load by executing
28897 in parallel with previous instruction in case
28898 previous instruction is not needed to compute the address. */
28899 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28900 && !ix86_agi_dependent (dep_insn, insn))
28901 {
28902 /* Claim moves to take one cycle, as core can issue one load
28903 at time and the next load can start cycle later. */
28904 if (dep_insn_type == TYPE_IMOV
28905 || dep_insn_type == TYPE_FMOV)
28906 cost = 1;
28907 else if (cost > 1)
28908 cost--;
28909 }
28910 break;
28911
28912 case PROCESSOR_K6:
28913 /* The esp dependency is resolved before
28914 the instruction is really finished. */
28915 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28916 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28917 return 1;
28918
28919 /* INT->FP conversion is expensive. */
28920 if (get_attr_fp_int_src (dep_insn))
28921 cost += 5;
28922
28923 memory = get_attr_memory (insn);
28924
28925 /* Show ability of reorder buffer to hide latency of load by executing
28926 in parallel with previous instruction in case
28927 previous instruction is not needed to compute the address. */
28928 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28929 && !ix86_agi_dependent (dep_insn, insn))
28930 {
28931 /* Claim moves to take one cycle, as core can issue one load
28932 at time and the next load can start cycle later. */
28933 if (dep_insn_type == TYPE_IMOV
28934 || dep_insn_type == TYPE_FMOV)
28935 cost = 1;
28936 else if (cost > 2)
28937 cost -= 2;
28938 else
28939 cost = 1;
28940 }
28941 break;
28942
28943 case PROCESSOR_AMDFAM10:
28944 case PROCESSOR_BDVER1:
28945 case PROCESSOR_BDVER2:
28946 case PROCESSOR_BDVER3:
28947 case PROCESSOR_BDVER4:
28948 case PROCESSOR_ZNVER1:
28949 case PROCESSOR_BTVER1:
28950 case PROCESSOR_BTVER2:
28951 case PROCESSOR_GENERIC:
28952 /* Stack engine allows to execute push&pop instructions in parall. */
28953 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28954 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28955 return 0;
28956 /* FALLTHRU */
28957
28958 case PROCESSOR_ATHLON:
28959 case PROCESSOR_K8:
28960 memory = get_attr_memory (insn);
28961
28962 /* Show ability of reorder buffer to hide latency of load by executing
28963 in parallel with previous instruction in case
28964 previous instruction is not needed to compute the address. */
28965 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28966 && !ix86_agi_dependent (dep_insn, insn))
28967 {
28968 enum attr_unit unit = get_attr_unit (insn);
28969 int loadcost = 3;
28970
28971 /* Because of the difference between the length of integer and
28972 floating unit pipeline preparation stages, the memory operands
28973 for floating point are cheaper.
28974
28975 ??? For Athlon it the difference is most probably 2. */
28976 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28977 loadcost = 3;
28978 else
28979 loadcost = TARGET_ATHLON ? 2 : 0;
28980
28981 if (cost >= loadcost)
28982 cost -= loadcost;
28983 else
28984 cost = 0;
28985 }
28986 break;
28987
28988 case PROCESSOR_CORE2:
28989 case PROCESSOR_NEHALEM:
28990 case PROCESSOR_SANDYBRIDGE:
28991 case PROCESSOR_HASWELL:
28992 /* Stack engine allows to execute push&pop instructions in parall. */
28993 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28994 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28995 return 0;
28996
28997 memory = get_attr_memory (insn);
28998
28999 /* Show ability of reorder buffer to hide latency of load by executing
29000 in parallel with previous instruction in case
29001 previous instruction is not needed to compute the address. */
29002 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
29003 && !ix86_agi_dependent (dep_insn, insn))
29004 {
29005 if (cost >= 4)
29006 cost -= 4;
29007 else
29008 cost = 0;
29009 }
29010 break;
29011
29012 case PROCESSOR_SILVERMONT:
29013 case PROCESSOR_KNL:
29014 case PROCESSOR_INTEL:
29015 if (!reload_completed)
29016 return cost;
29017
29018 /* Increase cost of integer loads. */
29019 memory = get_attr_memory (dep_insn);
29020 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
29021 {
29022 enum attr_unit unit = get_attr_unit (dep_insn);
29023 if (unit == UNIT_INTEGER && cost == 1)
29024 {
29025 if (memory == MEMORY_LOAD)
29026 cost = 3;
29027 else
29028 {
29029 /* Increase cost of ld/st for short int types only
29030 because of store forwarding issue. */
29031 rtx set = single_set (dep_insn);
29032 if (set && (GET_MODE (SET_DEST (set)) == QImode
29033 || GET_MODE (SET_DEST (set)) == HImode))
29034 {
29035 /* Increase cost of store/load insn if exact
29036 dependence exists and it is load insn. */
29037 enum attr_memory insn_memory = get_attr_memory (insn);
29038 if (insn_memory == MEMORY_LOAD
29039 && exact_store_load_dependency (dep_insn, insn))
29040 cost = 3;
29041 }
29042 }
29043 }
29044 }
29045
29046 default:
29047 break;
29048 }
29049
29050 return cost;
29051 }
29052
29053 /* How many alternative schedules to try. This should be as wide as the
29054 scheduling freedom in the DFA, but no wider. Making this value too
29055 large results extra work for the scheduler. */
29056
29057 static int
29058 ia32_multipass_dfa_lookahead (void)
29059 {
29060 switch (ix86_tune)
29061 {
29062 case PROCESSOR_PENTIUM:
29063 case PROCESSOR_LAKEMONT:
29064 return 2;
29065
29066 case PROCESSOR_PENTIUMPRO:
29067 case PROCESSOR_K6:
29068 return 1;
29069
29070 case PROCESSOR_BDVER1:
29071 case PROCESSOR_BDVER2:
29072 case PROCESSOR_BDVER3:
29073 case PROCESSOR_BDVER4:
29074 /* We use lookahead value 4 for BD both before and after reload
29075 schedules. Plan is to have value 8 included for O3. */
29076 return 4;
29077
29078 case PROCESSOR_CORE2:
29079 case PROCESSOR_NEHALEM:
29080 case PROCESSOR_SANDYBRIDGE:
29081 case PROCESSOR_HASWELL:
29082 case PROCESSOR_BONNELL:
29083 case PROCESSOR_SILVERMONT:
29084 case PROCESSOR_KNL:
29085 case PROCESSOR_INTEL:
29086 /* Generally, we want haifa-sched:max_issue() to look ahead as far
29087 as many instructions can be executed on a cycle, i.e.,
29088 issue_rate. I wonder why tuning for many CPUs does not do this. */
29089 if (reload_completed)
29090 return ix86_issue_rate ();
29091 /* Don't use lookahead for pre-reload schedule to save compile time. */
29092 return 0;
29093
29094 default:
29095 return 0;
29096 }
29097 }
29098
29099 /* Return true if target platform supports macro-fusion. */
29100
29101 static bool
29102 ix86_macro_fusion_p ()
29103 {
29104 return TARGET_FUSE_CMP_AND_BRANCH;
29105 }
29106
29107 /* Check whether current microarchitecture support macro fusion
29108 for insn pair "CONDGEN + CONDJMP". Refer to
29109 "Intel Architectures Optimization Reference Manual". */
29110
29111 static bool
29112 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
29113 {
29114 rtx src, dest;
29115 enum rtx_code ccode;
29116 rtx compare_set = NULL_RTX, test_if, cond;
29117 rtx alu_set = NULL_RTX, addr = NULL_RTX;
29118
29119 if (!any_condjump_p (condjmp))
29120 return false;
29121
29122 if (get_attr_type (condgen) != TYPE_TEST
29123 && get_attr_type (condgen) != TYPE_ICMP
29124 && get_attr_type (condgen) != TYPE_INCDEC
29125 && get_attr_type (condgen) != TYPE_ALU)
29126 return false;
29127
29128 compare_set = single_set (condgen);
29129 if (compare_set == NULL_RTX
29130 && !TARGET_FUSE_ALU_AND_BRANCH)
29131 return false;
29132
29133 if (compare_set == NULL_RTX)
29134 {
29135 int i;
29136 rtx pat = PATTERN (condgen);
29137 for (i = 0; i < XVECLEN (pat, 0); i++)
29138 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
29139 {
29140 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
29141 if (GET_CODE (set_src) == COMPARE)
29142 compare_set = XVECEXP (pat, 0, i);
29143 else
29144 alu_set = XVECEXP (pat, 0, i);
29145 }
29146 }
29147 if (compare_set == NULL_RTX)
29148 return false;
29149 src = SET_SRC (compare_set);
29150 if (GET_CODE (src) != COMPARE)
29151 return false;
29152
29153 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
29154 supported. */
29155 if ((MEM_P (XEXP (src, 0))
29156 && CONST_INT_P (XEXP (src, 1)))
29157 || (MEM_P (XEXP (src, 1))
29158 && CONST_INT_P (XEXP (src, 0))))
29159 return false;
29160
29161 /* No fusion for RIP-relative address. */
29162 if (MEM_P (XEXP (src, 0)))
29163 addr = XEXP (XEXP (src, 0), 0);
29164 else if (MEM_P (XEXP (src, 1)))
29165 addr = XEXP (XEXP (src, 1), 0);
29166
29167 if (addr) {
29168 ix86_address parts;
29169 int ok = ix86_decompose_address (addr, &parts);
29170 gcc_assert (ok);
29171
29172 if (rip_relative_addr_p (&parts))
29173 return false;
29174 }
29175
29176 test_if = SET_SRC (pc_set (condjmp));
29177 cond = XEXP (test_if, 0);
29178 ccode = GET_CODE (cond);
29179 /* Check whether conditional jump use Sign or Overflow Flags. */
29180 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
29181 && (ccode == GE
29182 || ccode == GT
29183 || ccode == LE
29184 || ccode == LT))
29185 return false;
29186
29187 /* Return true for TYPE_TEST and TYPE_ICMP. */
29188 if (get_attr_type (condgen) == TYPE_TEST
29189 || get_attr_type (condgen) == TYPE_ICMP)
29190 return true;
29191
29192 /* The following is the case that macro-fusion for alu + jmp. */
29193 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
29194 return false;
29195
29196 /* No fusion for alu op with memory destination operand. */
29197 dest = SET_DEST (alu_set);
29198 if (MEM_P (dest))
29199 return false;
29200
29201 /* Macro-fusion for inc/dec + unsigned conditional jump is not
29202 supported. */
29203 if (get_attr_type (condgen) == TYPE_INCDEC
29204 && (ccode == GEU
29205 || ccode == GTU
29206 || ccode == LEU
29207 || ccode == LTU))
29208 return false;
29209
29210 return true;
29211 }
29212
29213 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
29214 execution. It is applied if
29215 (1) IMUL instruction is on the top of list;
29216 (2) There exists the only producer of independent IMUL instruction in
29217 ready list.
29218 Return index of IMUL producer if it was found and -1 otherwise. */
29219 static int
29220 do_reorder_for_imul (rtx_insn **ready, int n_ready)
29221 {
29222 rtx_insn *insn;
29223 rtx set, insn1, insn2;
29224 sd_iterator_def sd_it;
29225 dep_t dep;
29226 int index = -1;
29227 int i;
29228
29229 if (!TARGET_BONNELL)
29230 return index;
29231
29232 /* Check that IMUL instruction is on the top of ready list. */
29233 insn = ready[n_ready - 1];
29234 set = single_set (insn);
29235 if (!set)
29236 return index;
29237 if (!(GET_CODE (SET_SRC (set)) == MULT
29238 && GET_MODE (SET_SRC (set)) == SImode))
29239 return index;
29240
29241 /* Search for producer of independent IMUL instruction. */
29242 for (i = n_ready - 2; i >= 0; i--)
29243 {
29244 insn = ready[i];
29245 if (!NONDEBUG_INSN_P (insn))
29246 continue;
29247 /* Skip IMUL instruction. */
29248 insn2 = PATTERN (insn);
29249 if (GET_CODE (insn2) == PARALLEL)
29250 insn2 = XVECEXP (insn2, 0, 0);
29251 if (GET_CODE (insn2) == SET
29252 && GET_CODE (SET_SRC (insn2)) == MULT
29253 && GET_MODE (SET_SRC (insn2)) == SImode)
29254 continue;
29255
29256 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
29257 {
29258 rtx con;
29259 con = DEP_CON (dep);
29260 if (!NONDEBUG_INSN_P (con))
29261 continue;
29262 insn1 = PATTERN (con);
29263 if (GET_CODE (insn1) == PARALLEL)
29264 insn1 = XVECEXP (insn1, 0, 0);
29265
29266 if (GET_CODE (insn1) == SET
29267 && GET_CODE (SET_SRC (insn1)) == MULT
29268 && GET_MODE (SET_SRC (insn1)) == SImode)
29269 {
29270 sd_iterator_def sd_it1;
29271 dep_t dep1;
29272 /* Check if there is no other dependee for IMUL. */
29273 index = i;
29274 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
29275 {
29276 rtx pro;
29277 pro = DEP_PRO (dep1);
29278 if (!NONDEBUG_INSN_P (pro))
29279 continue;
29280 if (pro != insn)
29281 index = -1;
29282 }
29283 if (index >= 0)
29284 break;
29285 }
29286 }
29287 if (index >= 0)
29288 break;
29289 }
29290 return index;
29291 }
29292
29293 /* Try to find the best candidate on the top of ready list if two insns
29294 have the same priority - candidate is best if its dependees were
29295 scheduled earlier. Applied for Silvermont only.
29296 Return true if top 2 insns must be interchanged. */
29297 static bool
29298 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
29299 {
29300 rtx_insn *top = ready[n_ready - 1];
29301 rtx_insn *next = ready[n_ready - 2];
29302 rtx set;
29303 sd_iterator_def sd_it;
29304 dep_t dep;
29305 int clock1 = -1;
29306 int clock2 = -1;
29307 #define INSN_TICK(INSN) (HID (INSN)->tick)
29308
29309 if (!TARGET_SILVERMONT && !TARGET_INTEL)
29310 return false;
29311
29312 if (!NONDEBUG_INSN_P (top))
29313 return false;
29314 if (!NONJUMP_INSN_P (top))
29315 return false;
29316 if (!NONDEBUG_INSN_P (next))
29317 return false;
29318 if (!NONJUMP_INSN_P (next))
29319 return false;
29320 set = single_set (top);
29321 if (!set)
29322 return false;
29323 set = single_set (next);
29324 if (!set)
29325 return false;
29326
29327 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
29328 {
29329 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
29330 return false;
29331 /* Determine winner more precise. */
29332 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
29333 {
29334 rtx pro;
29335 pro = DEP_PRO (dep);
29336 if (!NONDEBUG_INSN_P (pro))
29337 continue;
29338 if (INSN_TICK (pro) > clock1)
29339 clock1 = INSN_TICK (pro);
29340 }
29341 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
29342 {
29343 rtx pro;
29344 pro = DEP_PRO (dep);
29345 if (!NONDEBUG_INSN_P (pro))
29346 continue;
29347 if (INSN_TICK (pro) > clock2)
29348 clock2 = INSN_TICK (pro);
29349 }
29350
29351 if (clock1 == clock2)
29352 {
29353 /* Determine winner - load must win. */
29354 enum attr_memory memory1, memory2;
29355 memory1 = get_attr_memory (top);
29356 memory2 = get_attr_memory (next);
29357 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
29358 return true;
29359 }
29360 return (bool) (clock2 < clock1);
29361 }
29362 return false;
29363 #undef INSN_TICK
29364 }
29365
29366 /* Perform possible reodering of ready list for Atom/Silvermont only.
29367 Return issue rate. */
29368 static int
29369 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
29370 int *pn_ready, int clock_var)
29371 {
29372 int issue_rate = -1;
29373 int n_ready = *pn_ready;
29374 int i;
29375 rtx_insn *insn;
29376 int index = -1;
29377
29378 /* Set up issue rate. */
29379 issue_rate = ix86_issue_rate ();
29380
29381 /* Do reodering for BONNELL/SILVERMONT only. */
29382 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
29383 return issue_rate;
29384
29385 /* Nothing to do if ready list contains only 1 instruction. */
29386 if (n_ready <= 1)
29387 return issue_rate;
29388
29389 /* Do reodering for post-reload scheduler only. */
29390 if (!reload_completed)
29391 return issue_rate;
29392
29393 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
29394 {
29395 if (sched_verbose > 1)
29396 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
29397 INSN_UID (ready[index]));
29398
29399 /* Put IMUL producer (ready[index]) at the top of ready list. */
29400 insn = ready[index];
29401 for (i = index; i < n_ready - 1; i++)
29402 ready[i] = ready[i + 1];
29403 ready[n_ready - 1] = insn;
29404 return issue_rate;
29405 }
29406
29407 /* Skip selective scheduling since HID is not populated in it. */
29408 if (clock_var != 0
29409 && !sel_sched_p ()
29410 && swap_top_of_ready_list (ready, n_ready))
29411 {
29412 if (sched_verbose > 1)
29413 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
29414 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
29415 /* Swap 2 top elements of ready list. */
29416 insn = ready[n_ready - 1];
29417 ready[n_ready - 1] = ready[n_ready - 2];
29418 ready[n_ready - 2] = insn;
29419 }
29420 return issue_rate;
29421 }
29422
29423 static bool
29424 ix86_class_likely_spilled_p (reg_class_t);
29425
29426 /* Returns true if lhs of insn is HW function argument register and set up
29427 is_spilled to true if it is likely spilled HW register. */
29428 static bool
29429 insn_is_function_arg (rtx insn, bool* is_spilled)
29430 {
29431 rtx dst;
29432
29433 if (!NONDEBUG_INSN_P (insn))
29434 return false;
29435 /* Call instructions are not movable, ignore it. */
29436 if (CALL_P (insn))
29437 return false;
29438 insn = PATTERN (insn);
29439 if (GET_CODE (insn) == PARALLEL)
29440 insn = XVECEXP (insn, 0, 0);
29441 if (GET_CODE (insn) != SET)
29442 return false;
29443 dst = SET_DEST (insn);
29444 if (REG_P (dst) && HARD_REGISTER_P (dst)
29445 && ix86_function_arg_regno_p (REGNO (dst)))
29446 {
29447 /* Is it likely spilled HW register? */
29448 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
29449 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
29450 *is_spilled = true;
29451 return true;
29452 }
29453 return false;
29454 }
29455
29456 /* Add output dependencies for chain of function adjacent arguments if only
29457 there is a move to likely spilled HW register. Return first argument
29458 if at least one dependence was added or NULL otherwise. */
29459 static rtx_insn *
29460 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
29461 {
29462 rtx_insn *insn;
29463 rtx_insn *last = call;
29464 rtx_insn *first_arg = NULL;
29465 bool is_spilled = false;
29466
29467 head = PREV_INSN (head);
29468
29469 /* Find nearest to call argument passing instruction. */
29470 while (true)
29471 {
29472 last = PREV_INSN (last);
29473 if (last == head)
29474 return NULL;
29475 if (!NONDEBUG_INSN_P (last))
29476 continue;
29477 if (insn_is_function_arg (last, &is_spilled))
29478 break;
29479 return NULL;
29480 }
29481
29482 first_arg = last;
29483 while (true)
29484 {
29485 insn = PREV_INSN (last);
29486 if (!INSN_P (insn))
29487 break;
29488 if (insn == head)
29489 break;
29490 if (!NONDEBUG_INSN_P (insn))
29491 {
29492 last = insn;
29493 continue;
29494 }
29495 if (insn_is_function_arg (insn, &is_spilled))
29496 {
29497 /* Add output depdendence between two function arguments if chain
29498 of output arguments contains likely spilled HW registers. */
29499 if (is_spilled)
29500 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29501 first_arg = last = insn;
29502 }
29503 else
29504 break;
29505 }
29506 if (!is_spilled)
29507 return NULL;
29508 return first_arg;
29509 }
29510
29511 /* Add output or anti dependency from insn to first_arg to restrict its code
29512 motion. */
29513 static void
29514 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
29515 {
29516 rtx set;
29517 rtx tmp;
29518
29519 /* Add anti dependencies for bounds stores. */
29520 if (INSN_P (insn)
29521 && GET_CODE (PATTERN (insn)) == PARALLEL
29522 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
29523 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
29524 {
29525 add_dependence (first_arg, insn, REG_DEP_ANTI);
29526 return;
29527 }
29528
29529 set = single_set (insn);
29530 if (!set)
29531 return;
29532 tmp = SET_DEST (set);
29533 if (REG_P (tmp))
29534 {
29535 /* Add output dependency to the first function argument. */
29536 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29537 return;
29538 }
29539 /* Add anti dependency. */
29540 add_dependence (first_arg, insn, REG_DEP_ANTI);
29541 }
29542
29543 /* Avoid cross block motion of function argument through adding dependency
29544 from the first non-jump instruction in bb. */
29545 static void
29546 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
29547 {
29548 rtx_insn *insn = BB_END (bb);
29549
29550 while (insn)
29551 {
29552 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
29553 {
29554 rtx set = single_set (insn);
29555 if (set)
29556 {
29557 avoid_func_arg_motion (arg, insn);
29558 return;
29559 }
29560 }
29561 if (insn == BB_HEAD (bb))
29562 return;
29563 insn = PREV_INSN (insn);
29564 }
29565 }
29566
29567 /* Hook for pre-reload schedule - avoid motion of function arguments
29568 passed in likely spilled HW registers. */
29569 static void
29570 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
29571 {
29572 rtx_insn *insn;
29573 rtx_insn *first_arg = NULL;
29574 if (reload_completed)
29575 return;
29576 while (head != tail && DEBUG_INSN_P (head))
29577 head = NEXT_INSN (head);
29578 for (insn = tail; insn != head; insn = PREV_INSN (insn))
29579 if (INSN_P (insn) && CALL_P (insn))
29580 {
29581 first_arg = add_parameter_dependencies (insn, head);
29582 if (first_arg)
29583 {
29584 /* Add dependee for first argument to predecessors if only
29585 region contains more than one block. */
29586 basic_block bb = BLOCK_FOR_INSN (insn);
29587 int rgn = CONTAINING_RGN (bb->index);
29588 int nr_blks = RGN_NR_BLOCKS (rgn);
29589 /* Skip trivial regions and region head blocks that can have
29590 predecessors outside of region. */
29591 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
29592 {
29593 edge e;
29594 edge_iterator ei;
29595
29596 /* Regions are SCCs with the exception of selective
29597 scheduling with pipelining of outer blocks enabled.
29598 So also check that immediate predecessors of a non-head
29599 block are in the same region. */
29600 FOR_EACH_EDGE (e, ei, bb->preds)
29601 {
29602 /* Avoid creating of loop-carried dependencies through
29603 using topological ordering in the region. */
29604 if (rgn == CONTAINING_RGN (e->src->index)
29605 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
29606 add_dependee_for_func_arg (first_arg, e->src);
29607 }
29608 }
29609 insn = first_arg;
29610 if (insn == head)
29611 break;
29612 }
29613 }
29614 else if (first_arg)
29615 avoid_func_arg_motion (first_arg, insn);
29616 }
29617
29618 /* Hook for pre-reload schedule - set priority of moves from likely spilled
29619 HW registers to maximum, to schedule them at soon as possible. These are
29620 moves from function argument registers at the top of the function entry
29621 and moves from function return value registers after call. */
29622 static int
29623 ix86_adjust_priority (rtx_insn *insn, int priority)
29624 {
29625 rtx set;
29626
29627 if (reload_completed)
29628 return priority;
29629
29630 if (!NONDEBUG_INSN_P (insn))
29631 return priority;
29632
29633 set = single_set (insn);
29634 if (set)
29635 {
29636 rtx tmp = SET_SRC (set);
29637 if (REG_P (tmp)
29638 && HARD_REGISTER_P (tmp)
29639 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
29640 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
29641 return current_sched_info->sched_max_insns_priority;
29642 }
29643
29644 return priority;
29645 }
29646
29647 /* Model decoder of Core 2/i7.
29648 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
29649 track the instruction fetch block boundaries and make sure that long
29650 (9+ bytes) instructions are assigned to D0. */
29651
29652 /* Maximum length of an insn that can be handled by
29653 a secondary decoder unit. '8' for Core 2/i7. */
29654 static int core2i7_secondary_decoder_max_insn_size;
29655
29656 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
29657 '16' for Core 2/i7. */
29658 static int core2i7_ifetch_block_size;
29659
29660 /* Maximum number of instructions decoder can handle per cycle.
29661 '6' for Core 2/i7. */
29662 static int core2i7_ifetch_block_max_insns;
29663
29664 typedef struct ix86_first_cycle_multipass_data_ *
29665 ix86_first_cycle_multipass_data_t;
29666 typedef const struct ix86_first_cycle_multipass_data_ *
29667 const_ix86_first_cycle_multipass_data_t;
29668
29669 /* A variable to store target state across calls to max_issue within
29670 one cycle. */
29671 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
29672 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
29673
29674 /* Initialize DATA. */
29675 static void
29676 core2i7_first_cycle_multipass_init (void *_data)
29677 {
29678 ix86_first_cycle_multipass_data_t data
29679 = (ix86_first_cycle_multipass_data_t) _data;
29680
29681 data->ifetch_block_len = 0;
29682 data->ifetch_block_n_insns = 0;
29683 data->ready_try_change = NULL;
29684 data->ready_try_change_size = 0;
29685 }
29686
29687 /* Advancing the cycle; reset ifetch block counts. */
29688 static void
29689 core2i7_dfa_post_advance_cycle (void)
29690 {
29691 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
29692
29693 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
29694
29695 data->ifetch_block_len = 0;
29696 data->ifetch_block_n_insns = 0;
29697 }
29698
29699 static int min_insn_size (rtx_insn *);
29700
29701 /* Filter out insns from ready_try that the core will not be able to issue
29702 on current cycle due to decoder. */
29703 static void
29704 core2i7_first_cycle_multipass_filter_ready_try
29705 (const_ix86_first_cycle_multipass_data_t data,
29706 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
29707 {
29708 while (n_ready--)
29709 {
29710 rtx_insn *insn;
29711 int insn_size;
29712
29713 if (ready_try[n_ready])
29714 continue;
29715
29716 insn = get_ready_element (n_ready);
29717 insn_size = min_insn_size (insn);
29718
29719 if (/* If this is a too long an insn for a secondary decoder ... */
29720 (!first_cycle_insn_p
29721 && insn_size > core2i7_secondary_decoder_max_insn_size)
29722 /* ... or it would not fit into the ifetch block ... */
29723 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
29724 /* ... or the decoder is full already ... */
29725 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
29726 /* ... mask the insn out. */
29727 {
29728 ready_try[n_ready] = 1;
29729
29730 if (data->ready_try_change)
29731 bitmap_set_bit (data->ready_try_change, n_ready);
29732 }
29733 }
29734 }
29735
29736 /* Prepare for a new round of multipass lookahead scheduling. */
29737 static void
29738 core2i7_first_cycle_multipass_begin (void *_data,
29739 signed char *ready_try, int n_ready,
29740 bool first_cycle_insn_p)
29741 {
29742 ix86_first_cycle_multipass_data_t data
29743 = (ix86_first_cycle_multipass_data_t) _data;
29744 const_ix86_first_cycle_multipass_data_t prev_data
29745 = ix86_first_cycle_multipass_data;
29746
29747 /* Restore the state from the end of the previous round. */
29748 data->ifetch_block_len = prev_data->ifetch_block_len;
29749 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
29750
29751 /* Filter instructions that cannot be issued on current cycle due to
29752 decoder restrictions. */
29753 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
29754 first_cycle_insn_p);
29755 }
29756
29757 /* INSN is being issued in current solution. Account for its impact on
29758 the decoder model. */
29759 static void
29760 core2i7_first_cycle_multipass_issue (void *_data,
29761 signed char *ready_try, int n_ready,
29762 rtx_insn *insn, const void *_prev_data)
29763 {
29764 ix86_first_cycle_multipass_data_t data
29765 = (ix86_first_cycle_multipass_data_t) _data;
29766 const_ix86_first_cycle_multipass_data_t prev_data
29767 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
29768
29769 int insn_size = min_insn_size (insn);
29770
29771 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
29772 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
29773 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
29774 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
29775
29776 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
29777 if (!data->ready_try_change)
29778 {
29779 data->ready_try_change = sbitmap_alloc (n_ready);
29780 data->ready_try_change_size = n_ready;
29781 }
29782 else if (data->ready_try_change_size < n_ready)
29783 {
29784 data->ready_try_change = sbitmap_resize (data->ready_try_change,
29785 n_ready, 0);
29786 data->ready_try_change_size = n_ready;
29787 }
29788 bitmap_clear (data->ready_try_change);
29789
29790 /* Filter out insns from ready_try that the core will not be able to issue
29791 on current cycle due to decoder. */
29792 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
29793 false);
29794 }
29795
29796 /* Revert the effect on ready_try. */
29797 static void
29798 core2i7_first_cycle_multipass_backtrack (const void *_data,
29799 signed char *ready_try,
29800 int n_ready ATTRIBUTE_UNUSED)
29801 {
29802 const_ix86_first_cycle_multipass_data_t data
29803 = (const_ix86_first_cycle_multipass_data_t) _data;
29804 unsigned int i = 0;
29805 sbitmap_iterator sbi;
29806
29807 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
29808 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
29809 {
29810 ready_try[i] = 0;
29811 }
29812 }
29813
29814 /* Save the result of multipass lookahead scheduling for the next round. */
29815 static void
29816 core2i7_first_cycle_multipass_end (const void *_data)
29817 {
29818 const_ix86_first_cycle_multipass_data_t data
29819 = (const_ix86_first_cycle_multipass_data_t) _data;
29820 ix86_first_cycle_multipass_data_t next_data
29821 = ix86_first_cycle_multipass_data;
29822
29823 if (data != NULL)
29824 {
29825 next_data->ifetch_block_len = data->ifetch_block_len;
29826 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
29827 }
29828 }
29829
29830 /* Deallocate target data. */
29831 static void
29832 core2i7_first_cycle_multipass_fini (void *_data)
29833 {
29834 ix86_first_cycle_multipass_data_t data
29835 = (ix86_first_cycle_multipass_data_t) _data;
29836
29837 if (data->ready_try_change)
29838 {
29839 sbitmap_free (data->ready_try_change);
29840 data->ready_try_change = NULL;
29841 data->ready_try_change_size = 0;
29842 }
29843 }
29844
29845 /* Prepare for scheduling pass. */
29846 static void
29847 ix86_sched_init_global (FILE *, int, int)
29848 {
29849 /* Install scheduling hooks for current CPU. Some of these hooks are used
29850 in time-critical parts of the scheduler, so we only set them up when
29851 they are actually used. */
29852 switch (ix86_tune)
29853 {
29854 case PROCESSOR_CORE2:
29855 case PROCESSOR_NEHALEM:
29856 case PROCESSOR_SANDYBRIDGE:
29857 case PROCESSOR_HASWELL:
29858 /* Do not perform multipass scheduling for pre-reload schedule
29859 to save compile time. */
29860 if (reload_completed)
29861 {
29862 targetm.sched.dfa_post_advance_cycle
29863 = core2i7_dfa_post_advance_cycle;
29864 targetm.sched.first_cycle_multipass_init
29865 = core2i7_first_cycle_multipass_init;
29866 targetm.sched.first_cycle_multipass_begin
29867 = core2i7_first_cycle_multipass_begin;
29868 targetm.sched.first_cycle_multipass_issue
29869 = core2i7_first_cycle_multipass_issue;
29870 targetm.sched.first_cycle_multipass_backtrack
29871 = core2i7_first_cycle_multipass_backtrack;
29872 targetm.sched.first_cycle_multipass_end
29873 = core2i7_first_cycle_multipass_end;
29874 targetm.sched.first_cycle_multipass_fini
29875 = core2i7_first_cycle_multipass_fini;
29876
29877 /* Set decoder parameters. */
29878 core2i7_secondary_decoder_max_insn_size = 8;
29879 core2i7_ifetch_block_size = 16;
29880 core2i7_ifetch_block_max_insns = 6;
29881 break;
29882 }
29883 /* Fall through. */
29884 default:
29885 targetm.sched.dfa_post_advance_cycle = NULL;
29886 targetm.sched.first_cycle_multipass_init = NULL;
29887 targetm.sched.first_cycle_multipass_begin = NULL;
29888 targetm.sched.first_cycle_multipass_issue = NULL;
29889 targetm.sched.first_cycle_multipass_backtrack = NULL;
29890 targetm.sched.first_cycle_multipass_end = NULL;
29891 targetm.sched.first_cycle_multipass_fini = NULL;
29892 break;
29893 }
29894 }
29895
29896 \f
29897 /* Compute the alignment given to a constant that is being placed in memory.
29898 EXP is the constant and ALIGN is the alignment that the object would
29899 ordinarily have.
29900 The value of this function is used instead of that alignment to align
29901 the object. */
29902
29903 int
29904 ix86_constant_alignment (tree exp, int align)
29905 {
29906 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29907 || TREE_CODE (exp) == INTEGER_CST)
29908 {
29909 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
29910 return 64;
29911 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
29912 return 128;
29913 }
29914 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29915 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29916 return BITS_PER_WORD;
29917
29918 return align;
29919 }
29920
29921 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
29922 the data type, and ALIGN is the alignment that the object would
29923 ordinarily have. */
29924
29925 static int
29926 iamcu_alignment (tree type, int align)
29927 {
29928 enum machine_mode mode;
29929
29930 if (align < 32 || TYPE_USER_ALIGN (type))
29931 return align;
29932
29933 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29934 bytes. */
29935 mode = TYPE_MODE (strip_array_types (type));
29936 switch (GET_MODE_CLASS (mode))
29937 {
29938 case MODE_INT:
29939 case MODE_COMPLEX_INT:
29940 case MODE_COMPLEX_FLOAT:
29941 case MODE_FLOAT:
29942 case MODE_DECIMAL_FLOAT:
29943 return 32;
29944 default:
29945 return align;
29946 }
29947 }
29948
29949 /* Compute the alignment for a static variable.
29950 TYPE is the data type, and ALIGN is the alignment that
29951 the object would ordinarily have. The value of this function is used
29952 instead of that alignment to align the object. */
29953
29954 int
29955 ix86_data_alignment (tree type, int align, bool opt)
29956 {
29957 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29958 for symbols from other compilation units or symbols that don't need
29959 to bind locally. In order to preserve some ABI compatibility with
29960 those compilers, ensure we don't decrease alignment from what we
29961 used to assume. */
29962
29963 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29964
29965 /* A data structure, equal or greater than the size of a cache line
29966 (64 bytes in the Pentium 4 and other recent Intel processors, including
29967 processors based on Intel Core microarchitecture) should be aligned
29968 so that its base address is a multiple of a cache line size. */
29969
29970 int max_align
29971 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29972
29973 if (max_align < BITS_PER_WORD)
29974 max_align = BITS_PER_WORD;
29975
29976 switch (ix86_align_data_type)
29977 {
29978 case ix86_align_data_type_abi: opt = false; break;
29979 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29980 case ix86_align_data_type_cacheline: break;
29981 }
29982
29983 if (TARGET_IAMCU)
29984 align = iamcu_alignment (type, align);
29985
29986 if (opt
29987 && AGGREGATE_TYPE_P (type)
29988 && TYPE_SIZE (type)
29989 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29990 {
29991 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29992 && align < max_align_compat)
29993 align = max_align_compat;
29994 if (wi::geu_p (TYPE_SIZE (type), max_align)
29995 && align < max_align)
29996 align = max_align;
29997 }
29998
29999 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30000 to 16byte boundary. */
30001 if (TARGET_64BIT)
30002 {
30003 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
30004 && TYPE_SIZE (type)
30005 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30006 && wi::geu_p (TYPE_SIZE (type), 128)
30007 && align < 128)
30008 return 128;
30009 }
30010
30011 if (!opt)
30012 return align;
30013
30014 if (TREE_CODE (type) == ARRAY_TYPE)
30015 {
30016 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30017 return 64;
30018 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30019 return 128;
30020 }
30021 else if (TREE_CODE (type) == COMPLEX_TYPE)
30022 {
30023
30024 if (TYPE_MODE (type) == DCmode && align < 64)
30025 return 64;
30026 if ((TYPE_MODE (type) == XCmode
30027 || TYPE_MODE (type) == TCmode) && align < 128)
30028 return 128;
30029 }
30030 else if ((TREE_CODE (type) == RECORD_TYPE
30031 || TREE_CODE (type) == UNION_TYPE
30032 || TREE_CODE (type) == QUAL_UNION_TYPE)
30033 && TYPE_FIELDS (type))
30034 {
30035 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30036 return 64;
30037 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30038 return 128;
30039 }
30040 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30041 || TREE_CODE (type) == INTEGER_TYPE)
30042 {
30043 if (TYPE_MODE (type) == DFmode && align < 64)
30044 return 64;
30045 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30046 return 128;
30047 }
30048
30049 return align;
30050 }
30051
30052 /* Compute the alignment for a local variable or a stack slot. EXP is
30053 the data type or decl itself, MODE is the widest mode available and
30054 ALIGN is the alignment that the object would ordinarily have. The
30055 value of this macro is used instead of that alignment to align the
30056 object. */
30057
30058 unsigned int
30059 ix86_local_alignment (tree exp, machine_mode mode,
30060 unsigned int align)
30061 {
30062 tree type, decl;
30063
30064 if (exp && DECL_P (exp))
30065 {
30066 type = TREE_TYPE (exp);
30067 decl = exp;
30068 }
30069 else
30070 {
30071 type = exp;
30072 decl = NULL;
30073 }
30074
30075 /* Don't do dynamic stack realignment for long long objects with
30076 -mpreferred-stack-boundary=2. */
30077 if (!TARGET_64BIT
30078 && align == 64
30079 && ix86_preferred_stack_boundary < 64
30080 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
30081 && (!type || !TYPE_USER_ALIGN (type))
30082 && (!decl || !DECL_USER_ALIGN (decl)))
30083 align = 32;
30084
30085 /* If TYPE is NULL, we are allocating a stack slot for caller-save
30086 register in MODE. We will return the largest alignment of XF
30087 and DF. */
30088 if (!type)
30089 {
30090 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
30091 align = GET_MODE_ALIGNMENT (DFmode);
30092 return align;
30093 }
30094
30095 /* Don't increase alignment for Intel MCU psABI. */
30096 if (TARGET_IAMCU)
30097 return align;
30098
30099 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30100 to 16byte boundary. Exact wording is:
30101
30102 An array uses the same alignment as its elements, except that a local or
30103 global array variable of length at least 16 bytes or
30104 a C99 variable-length array variable always has alignment of at least 16 bytes.
30105
30106 This was added to allow use of aligned SSE instructions at arrays. This
30107 rule is meant for static storage (where compiler can not do the analysis
30108 by itself). We follow it for automatic variables only when convenient.
30109 We fully control everything in the function compiled and functions from
30110 other unit can not rely on the alignment.
30111
30112 Exclude va_list type. It is the common case of local array where
30113 we can not benefit from the alignment.
30114
30115 TODO: Probably one should optimize for size only when var is not escaping. */
30116 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
30117 && TARGET_SSE)
30118 {
30119 if (AGGREGATE_TYPE_P (type)
30120 && (va_list_type_node == NULL_TREE
30121 || (TYPE_MAIN_VARIANT (type)
30122 != TYPE_MAIN_VARIANT (va_list_type_node)))
30123 && TYPE_SIZE (type)
30124 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30125 && wi::geu_p (TYPE_SIZE (type), 16)
30126 && align < 128)
30127 return 128;
30128 }
30129 if (TREE_CODE (type) == ARRAY_TYPE)
30130 {
30131 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30132 return 64;
30133 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30134 return 128;
30135 }
30136 else if (TREE_CODE (type) == COMPLEX_TYPE)
30137 {
30138 if (TYPE_MODE (type) == DCmode && align < 64)
30139 return 64;
30140 if ((TYPE_MODE (type) == XCmode
30141 || TYPE_MODE (type) == TCmode) && align < 128)
30142 return 128;
30143 }
30144 else if ((TREE_CODE (type) == RECORD_TYPE
30145 || TREE_CODE (type) == UNION_TYPE
30146 || TREE_CODE (type) == QUAL_UNION_TYPE)
30147 && TYPE_FIELDS (type))
30148 {
30149 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30150 return 64;
30151 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30152 return 128;
30153 }
30154 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30155 || TREE_CODE (type) == INTEGER_TYPE)
30156 {
30157
30158 if (TYPE_MODE (type) == DFmode && align < 64)
30159 return 64;
30160 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30161 return 128;
30162 }
30163 return align;
30164 }
30165
30166 /* Compute the minimum required alignment for dynamic stack realignment
30167 purposes for a local variable, parameter or a stack slot. EXP is
30168 the data type or decl itself, MODE is its mode and ALIGN is the
30169 alignment that the object would ordinarily have. */
30170
30171 unsigned int
30172 ix86_minimum_alignment (tree exp, machine_mode mode,
30173 unsigned int align)
30174 {
30175 tree type, decl;
30176
30177 if (exp && DECL_P (exp))
30178 {
30179 type = TREE_TYPE (exp);
30180 decl = exp;
30181 }
30182 else
30183 {
30184 type = exp;
30185 decl = NULL;
30186 }
30187
30188 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
30189 return align;
30190
30191 /* Don't do dynamic stack realignment for long long objects with
30192 -mpreferred-stack-boundary=2. */
30193 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
30194 && (!type || !TYPE_USER_ALIGN (type))
30195 && (!decl || !DECL_USER_ALIGN (decl)))
30196 {
30197 gcc_checking_assert (!TARGET_STV);
30198 return 32;
30199 }
30200
30201 return align;
30202 }
30203 \f
30204 /* Find a location for the static chain incoming to a nested function.
30205 This is a register, unless all free registers are used by arguments. */
30206
30207 static rtx
30208 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
30209 {
30210 unsigned regno;
30211
30212 /* While this function won't be called by the middle-end when a static
30213 chain isn't needed, it's also used throughout the backend so it's
30214 easiest to keep this check centralized. */
30215 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
30216 return NULL;
30217
30218 if (TARGET_64BIT)
30219 {
30220 /* We always use R10 in 64-bit mode. */
30221 regno = R10_REG;
30222 }
30223 else
30224 {
30225 const_tree fntype, fndecl;
30226 unsigned int ccvt;
30227
30228 /* By default in 32-bit mode we use ECX to pass the static chain. */
30229 regno = CX_REG;
30230
30231 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
30232 {
30233 fntype = TREE_TYPE (fndecl_or_type);
30234 fndecl = fndecl_or_type;
30235 }
30236 else
30237 {
30238 fntype = fndecl_or_type;
30239 fndecl = NULL;
30240 }
30241
30242 ccvt = ix86_get_callcvt (fntype);
30243 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30244 {
30245 /* Fastcall functions use ecx/edx for arguments, which leaves
30246 us with EAX for the static chain.
30247 Thiscall functions use ecx for arguments, which also
30248 leaves us with EAX for the static chain. */
30249 regno = AX_REG;
30250 }
30251 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30252 {
30253 /* Thiscall functions use ecx for arguments, which leaves
30254 us with EAX and EDX for the static chain.
30255 We are using for abi-compatibility EAX. */
30256 regno = AX_REG;
30257 }
30258 else if (ix86_function_regparm (fntype, fndecl) == 3)
30259 {
30260 /* For regparm 3, we have no free call-clobbered registers in
30261 which to store the static chain. In order to implement this,
30262 we have the trampoline push the static chain to the stack.
30263 However, we can't push a value below the return address when
30264 we call the nested function directly, so we have to use an
30265 alternate entry point. For this we use ESI, and have the
30266 alternate entry point push ESI, so that things appear the
30267 same once we're executing the nested function. */
30268 if (incoming_p)
30269 {
30270 if (fndecl == current_function_decl)
30271 ix86_static_chain_on_stack = true;
30272 return gen_frame_mem (SImode,
30273 plus_constant (Pmode,
30274 arg_pointer_rtx, -8));
30275 }
30276 regno = SI_REG;
30277 }
30278 }
30279
30280 return gen_rtx_REG (Pmode, regno);
30281 }
30282
30283 /* Emit RTL insns to initialize the variable parts of a trampoline.
30284 FNDECL is the decl of the target address; M_TRAMP is a MEM for
30285 the trampoline, and CHAIN_VALUE is an RTX for the static chain
30286 to be passed to the target function. */
30287
30288 static void
30289 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
30290 {
30291 rtx mem, fnaddr;
30292 int opcode;
30293 int offset = 0;
30294
30295 fnaddr = XEXP (DECL_RTL (fndecl), 0);
30296
30297 if (TARGET_64BIT)
30298 {
30299 int size;
30300
30301 /* Load the function address to r11. Try to load address using
30302 the shorter movl instead of movabs. We may want to support
30303 movq for kernel mode, but kernel does not use trampolines at
30304 the moment. FNADDR is a 32bit address and may not be in
30305 DImode when ptr_mode == SImode. Always use movl in this
30306 case. */
30307 if (ptr_mode == SImode
30308 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
30309 {
30310 fnaddr = copy_addr_to_reg (fnaddr);
30311
30312 mem = adjust_address (m_tramp, HImode, offset);
30313 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
30314
30315 mem = adjust_address (m_tramp, SImode, offset + 2);
30316 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
30317 offset += 6;
30318 }
30319 else
30320 {
30321 mem = adjust_address (m_tramp, HImode, offset);
30322 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
30323
30324 mem = adjust_address (m_tramp, DImode, offset + 2);
30325 emit_move_insn (mem, fnaddr);
30326 offset += 10;
30327 }
30328
30329 /* Load static chain using movabs to r10. Use the shorter movl
30330 instead of movabs when ptr_mode == SImode. */
30331 if (ptr_mode == SImode)
30332 {
30333 opcode = 0xba41;
30334 size = 6;
30335 }
30336 else
30337 {
30338 opcode = 0xba49;
30339 size = 10;
30340 }
30341
30342 mem = adjust_address (m_tramp, HImode, offset);
30343 emit_move_insn (mem, gen_int_mode (opcode, HImode));
30344
30345 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
30346 emit_move_insn (mem, chain_value);
30347 offset += size;
30348
30349 /* Jump to r11; the last (unused) byte is a nop, only there to
30350 pad the write out to a single 32-bit store. */
30351 mem = adjust_address (m_tramp, SImode, offset);
30352 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
30353 offset += 4;
30354 }
30355 else
30356 {
30357 rtx disp, chain;
30358
30359 /* Depending on the static chain location, either load a register
30360 with a constant, or push the constant to the stack. All of the
30361 instructions are the same size. */
30362 chain = ix86_static_chain (fndecl, true);
30363 if (REG_P (chain))
30364 {
30365 switch (REGNO (chain))
30366 {
30367 case AX_REG:
30368 opcode = 0xb8; break;
30369 case CX_REG:
30370 opcode = 0xb9; break;
30371 default:
30372 gcc_unreachable ();
30373 }
30374 }
30375 else
30376 opcode = 0x68;
30377
30378 mem = adjust_address (m_tramp, QImode, offset);
30379 emit_move_insn (mem, gen_int_mode (opcode, QImode));
30380
30381 mem = adjust_address (m_tramp, SImode, offset + 1);
30382 emit_move_insn (mem, chain_value);
30383 offset += 5;
30384
30385 mem = adjust_address (m_tramp, QImode, offset);
30386 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
30387
30388 mem = adjust_address (m_tramp, SImode, offset + 1);
30389
30390 /* Compute offset from the end of the jmp to the target function.
30391 In the case in which the trampoline stores the static chain on
30392 the stack, we need to skip the first insn which pushes the
30393 (call-saved) register static chain; this push is 1 byte. */
30394 offset += 5;
30395 disp = expand_binop (SImode, sub_optab, fnaddr,
30396 plus_constant (Pmode, XEXP (m_tramp, 0),
30397 offset - (MEM_P (chain) ? 1 : 0)),
30398 NULL_RTX, 1, OPTAB_DIRECT);
30399 emit_move_insn (mem, disp);
30400 }
30401
30402 gcc_assert (offset <= TRAMPOLINE_SIZE);
30403
30404 #ifdef HAVE_ENABLE_EXECUTE_STACK
30405 #ifdef CHECK_EXECUTE_STACK_ENABLED
30406 if (CHECK_EXECUTE_STACK_ENABLED)
30407 #endif
30408 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
30409 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
30410 #endif
30411 }
30412 \f
30413 /* The following file contains several enumerations and data structures
30414 built from the definitions in i386-builtin-types.def. */
30415
30416 #include "i386-builtin-types.inc"
30417
30418 /* Table for the ix86 builtin non-function types. */
30419 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
30420
30421 /* Retrieve an element from the above table, building some of
30422 the types lazily. */
30423
30424 static tree
30425 ix86_get_builtin_type (enum ix86_builtin_type tcode)
30426 {
30427 unsigned int index;
30428 tree type, itype;
30429
30430 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
30431
30432 type = ix86_builtin_type_tab[(int) tcode];
30433 if (type != NULL)
30434 return type;
30435
30436 gcc_assert (tcode > IX86_BT_LAST_PRIM);
30437 if (tcode <= IX86_BT_LAST_VECT)
30438 {
30439 machine_mode mode;
30440
30441 index = tcode - IX86_BT_LAST_PRIM - 1;
30442 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
30443 mode = ix86_builtin_type_vect_mode[index];
30444
30445 type = build_vector_type_for_mode (itype, mode);
30446 }
30447 else
30448 {
30449 int quals;
30450
30451 index = tcode - IX86_BT_LAST_VECT - 1;
30452 if (tcode <= IX86_BT_LAST_PTR)
30453 quals = TYPE_UNQUALIFIED;
30454 else
30455 quals = TYPE_QUAL_CONST;
30456
30457 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
30458 if (quals != TYPE_UNQUALIFIED)
30459 itype = build_qualified_type (itype, quals);
30460
30461 type = build_pointer_type (itype);
30462 }
30463
30464 ix86_builtin_type_tab[(int) tcode] = type;
30465 return type;
30466 }
30467
30468 /* Table for the ix86 builtin function types. */
30469 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
30470
30471 /* Retrieve an element from the above table, building some of
30472 the types lazily. */
30473
30474 static tree
30475 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
30476 {
30477 tree type;
30478
30479 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
30480
30481 type = ix86_builtin_func_type_tab[(int) tcode];
30482 if (type != NULL)
30483 return type;
30484
30485 if (tcode <= IX86_BT_LAST_FUNC)
30486 {
30487 unsigned start = ix86_builtin_func_start[(int) tcode];
30488 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
30489 tree rtype, atype, args = void_list_node;
30490 unsigned i;
30491
30492 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
30493 for (i = after - 1; i > start; --i)
30494 {
30495 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
30496 args = tree_cons (NULL, atype, args);
30497 }
30498
30499 type = build_function_type (rtype, args);
30500 }
30501 else
30502 {
30503 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
30504 enum ix86_builtin_func_type icode;
30505
30506 icode = ix86_builtin_func_alias_base[index];
30507 type = ix86_get_builtin_func_type (icode);
30508 }
30509
30510 ix86_builtin_func_type_tab[(int) tcode] = type;
30511 return type;
30512 }
30513
30514
30515 /* Codes for all the SSE/MMX builtins. */
30516 enum ix86_builtins
30517 {
30518 IX86_BUILTIN_ADDPS,
30519 IX86_BUILTIN_ADDSS,
30520 IX86_BUILTIN_DIVPS,
30521 IX86_BUILTIN_DIVSS,
30522 IX86_BUILTIN_MULPS,
30523 IX86_BUILTIN_MULSS,
30524 IX86_BUILTIN_SUBPS,
30525 IX86_BUILTIN_SUBSS,
30526
30527 IX86_BUILTIN_CMPEQPS,
30528 IX86_BUILTIN_CMPLTPS,
30529 IX86_BUILTIN_CMPLEPS,
30530 IX86_BUILTIN_CMPGTPS,
30531 IX86_BUILTIN_CMPGEPS,
30532 IX86_BUILTIN_CMPNEQPS,
30533 IX86_BUILTIN_CMPNLTPS,
30534 IX86_BUILTIN_CMPNLEPS,
30535 IX86_BUILTIN_CMPNGTPS,
30536 IX86_BUILTIN_CMPNGEPS,
30537 IX86_BUILTIN_CMPORDPS,
30538 IX86_BUILTIN_CMPUNORDPS,
30539 IX86_BUILTIN_CMPEQSS,
30540 IX86_BUILTIN_CMPLTSS,
30541 IX86_BUILTIN_CMPLESS,
30542 IX86_BUILTIN_CMPNEQSS,
30543 IX86_BUILTIN_CMPNLTSS,
30544 IX86_BUILTIN_CMPNLESS,
30545 IX86_BUILTIN_CMPORDSS,
30546 IX86_BUILTIN_CMPUNORDSS,
30547
30548 IX86_BUILTIN_COMIEQSS,
30549 IX86_BUILTIN_COMILTSS,
30550 IX86_BUILTIN_COMILESS,
30551 IX86_BUILTIN_COMIGTSS,
30552 IX86_BUILTIN_COMIGESS,
30553 IX86_BUILTIN_COMINEQSS,
30554 IX86_BUILTIN_UCOMIEQSS,
30555 IX86_BUILTIN_UCOMILTSS,
30556 IX86_BUILTIN_UCOMILESS,
30557 IX86_BUILTIN_UCOMIGTSS,
30558 IX86_BUILTIN_UCOMIGESS,
30559 IX86_BUILTIN_UCOMINEQSS,
30560
30561 IX86_BUILTIN_CVTPI2PS,
30562 IX86_BUILTIN_CVTPS2PI,
30563 IX86_BUILTIN_CVTSI2SS,
30564 IX86_BUILTIN_CVTSI642SS,
30565 IX86_BUILTIN_CVTSS2SI,
30566 IX86_BUILTIN_CVTSS2SI64,
30567 IX86_BUILTIN_CVTTPS2PI,
30568 IX86_BUILTIN_CVTTSS2SI,
30569 IX86_BUILTIN_CVTTSS2SI64,
30570
30571 IX86_BUILTIN_MAXPS,
30572 IX86_BUILTIN_MAXSS,
30573 IX86_BUILTIN_MINPS,
30574 IX86_BUILTIN_MINSS,
30575
30576 IX86_BUILTIN_LOADUPS,
30577 IX86_BUILTIN_STOREUPS,
30578 IX86_BUILTIN_MOVSS,
30579
30580 IX86_BUILTIN_MOVHLPS,
30581 IX86_BUILTIN_MOVLHPS,
30582 IX86_BUILTIN_LOADHPS,
30583 IX86_BUILTIN_LOADLPS,
30584 IX86_BUILTIN_STOREHPS,
30585 IX86_BUILTIN_STORELPS,
30586
30587 IX86_BUILTIN_MASKMOVQ,
30588 IX86_BUILTIN_MOVMSKPS,
30589 IX86_BUILTIN_PMOVMSKB,
30590
30591 IX86_BUILTIN_MOVNTPS,
30592 IX86_BUILTIN_MOVNTQ,
30593
30594 IX86_BUILTIN_LOADDQU,
30595 IX86_BUILTIN_STOREDQU,
30596
30597 IX86_BUILTIN_PACKSSWB,
30598 IX86_BUILTIN_PACKSSDW,
30599 IX86_BUILTIN_PACKUSWB,
30600
30601 IX86_BUILTIN_PADDB,
30602 IX86_BUILTIN_PADDW,
30603 IX86_BUILTIN_PADDD,
30604 IX86_BUILTIN_PADDQ,
30605 IX86_BUILTIN_PADDSB,
30606 IX86_BUILTIN_PADDSW,
30607 IX86_BUILTIN_PADDUSB,
30608 IX86_BUILTIN_PADDUSW,
30609 IX86_BUILTIN_PSUBB,
30610 IX86_BUILTIN_PSUBW,
30611 IX86_BUILTIN_PSUBD,
30612 IX86_BUILTIN_PSUBQ,
30613 IX86_BUILTIN_PSUBSB,
30614 IX86_BUILTIN_PSUBSW,
30615 IX86_BUILTIN_PSUBUSB,
30616 IX86_BUILTIN_PSUBUSW,
30617
30618 IX86_BUILTIN_PAND,
30619 IX86_BUILTIN_PANDN,
30620 IX86_BUILTIN_POR,
30621 IX86_BUILTIN_PXOR,
30622
30623 IX86_BUILTIN_PAVGB,
30624 IX86_BUILTIN_PAVGW,
30625
30626 IX86_BUILTIN_PCMPEQB,
30627 IX86_BUILTIN_PCMPEQW,
30628 IX86_BUILTIN_PCMPEQD,
30629 IX86_BUILTIN_PCMPGTB,
30630 IX86_BUILTIN_PCMPGTW,
30631 IX86_BUILTIN_PCMPGTD,
30632
30633 IX86_BUILTIN_PMADDWD,
30634
30635 IX86_BUILTIN_PMAXSW,
30636 IX86_BUILTIN_PMAXUB,
30637 IX86_BUILTIN_PMINSW,
30638 IX86_BUILTIN_PMINUB,
30639
30640 IX86_BUILTIN_PMULHUW,
30641 IX86_BUILTIN_PMULHW,
30642 IX86_BUILTIN_PMULLW,
30643
30644 IX86_BUILTIN_PSADBW,
30645 IX86_BUILTIN_PSHUFW,
30646
30647 IX86_BUILTIN_PSLLW,
30648 IX86_BUILTIN_PSLLD,
30649 IX86_BUILTIN_PSLLQ,
30650 IX86_BUILTIN_PSRAW,
30651 IX86_BUILTIN_PSRAD,
30652 IX86_BUILTIN_PSRLW,
30653 IX86_BUILTIN_PSRLD,
30654 IX86_BUILTIN_PSRLQ,
30655 IX86_BUILTIN_PSLLWI,
30656 IX86_BUILTIN_PSLLDI,
30657 IX86_BUILTIN_PSLLQI,
30658 IX86_BUILTIN_PSRAWI,
30659 IX86_BUILTIN_PSRADI,
30660 IX86_BUILTIN_PSRLWI,
30661 IX86_BUILTIN_PSRLDI,
30662 IX86_BUILTIN_PSRLQI,
30663
30664 IX86_BUILTIN_PUNPCKHBW,
30665 IX86_BUILTIN_PUNPCKHWD,
30666 IX86_BUILTIN_PUNPCKHDQ,
30667 IX86_BUILTIN_PUNPCKLBW,
30668 IX86_BUILTIN_PUNPCKLWD,
30669 IX86_BUILTIN_PUNPCKLDQ,
30670
30671 IX86_BUILTIN_SHUFPS,
30672
30673 IX86_BUILTIN_RCPPS,
30674 IX86_BUILTIN_RCPSS,
30675 IX86_BUILTIN_RSQRTPS,
30676 IX86_BUILTIN_RSQRTPS_NR,
30677 IX86_BUILTIN_RSQRTSS,
30678 IX86_BUILTIN_RSQRTF,
30679 IX86_BUILTIN_SQRTPS,
30680 IX86_BUILTIN_SQRTPS_NR,
30681 IX86_BUILTIN_SQRTSS,
30682
30683 IX86_BUILTIN_UNPCKHPS,
30684 IX86_BUILTIN_UNPCKLPS,
30685
30686 IX86_BUILTIN_ANDPS,
30687 IX86_BUILTIN_ANDNPS,
30688 IX86_BUILTIN_ORPS,
30689 IX86_BUILTIN_XORPS,
30690
30691 IX86_BUILTIN_EMMS,
30692 IX86_BUILTIN_LDMXCSR,
30693 IX86_BUILTIN_STMXCSR,
30694 IX86_BUILTIN_SFENCE,
30695
30696 IX86_BUILTIN_FXSAVE,
30697 IX86_BUILTIN_FXRSTOR,
30698 IX86_BUILTIN_FXSAVE64,
30699 IX86_BUILTIN_FXRSTOR64,
30700
30701 IX86_BUILTIN_XSAVE,
30702 IX86_BUILTIN_XRSTOR,
30703 IX86_BUILTIN_XSAVE64,
30704 IX86_BUILTIN_XRSTOR64,
30705
30706 IX86_BUILTIN_XSAVEOPT,
30707 IX86_BUILTIN_XSAVEOPT64,
30708
30709 IX86_BUILTIN_XSAVEC,
30710 IX86_BUILTIN_XSAVEC64,
30711
30712 IX86_BUILTIN_XSAVES,
30713 IX86_BUILTIN_XRSTORS,
30714 IX86_BUILTIN_XSAVES64,
30715 IX86_BUILTIN_XRSTORS64,
30716
30717 /* 3DNow! Original */
30718 IX86_BUILTIN_FEMMS,
30719 IX86_BUILTIN_PAVGUSB,
30720 IX86_BUILTIN_PF2ID,
30721 IX86_BUILTIN_PFACC,
30722 IX86_BUILTIN_PFADD,
30723 IX86_BUILTIN_PFCMPEQ,
30724 IX86_BUILTIN_PFCMPGE,
30725 IX86_BUILTIN_PFCMPGT,
30726 IX86_BUILTIN_PFMAX,
30727 IX86_BUILTIN_PFMIN,
30728 IX86_BUILTIN_PFMUL,
30729 IX86_BUILTIN_PFRCP,
30730 IX86_BUILTIN_PFRCPIT1,
30731 IX86_BUILTIN_PFRCPIT2,
30732 IX86_BUILTIN_PFRSQIT1,
30733 IX86_BUILTIN_PFRSQRT,
30734 IX86_BUILTIN_PFSUB,
30735 IX86_BUILTIN_PFSUBR,
30736 IX86_BUILTIN_PI2FD,
30737 IX86_BUILTIN_PMULHRW,
30738
30739 /* 3DNow! Athlon Extensions */
30740 IX86_BUILTIN_PF2IW,
30741 IX86_BUILTIN_PFNACC,
30742 IX86_BUILTIN_PFPNACC,
30743 IX86_BUILTIN_PI2FW,
30744 IX86_BUILTIN_PSWAPDSI,
30745 IX86_BUILTIN_PSWAPDSF,
30746
30747 /* SSE2 */
30748 IX86_BUILTIN_ADDPD,
30749 IX86_BUILTIN_ADDSD,
30750 IX86_BUILTIN_DIVPD,
30751 IX86_BUILTIN_DIVSD,
30752 IX86_BUILTIN_MULPD,
30753 IX86_BUILTIN_MULSD,
30754 IX86_BUILTIN_SUBPD,
30755 IX86_BUILTIN_SUBSD,
30756
30757 IX86_BUILTIN_CMPEQPD,
30758 IX86_BUILTIN_CMPLTPD,
30759 IX86_BUILTIN_CMPLEPD,
30760 IX86_BUILTIN_CMPGTPD,
30761 IX86_BUILTIN_CMPGEPD,
30762 IX86_BUILTIN_CMPNEQPD,
30763 IX86_BUILTIN_CMPNLTPD,
30764 IX86_BUILTIN_CMPNLEPD,
30765 IX86_BUILTIN_CMPNGTPD,
30766 IX86_BUILTIN_CMPNGEPD,
30767 IX86_BUILTIN_CMPORDPD,
30768 IX86_BUILTIN_CMPUNORDPD,
30769 IX86_BUILTIN_CMPEQSD,
30770 IX86_BUILTIN_CMPLTSD,
30771 IX86_BUILTIN_CMPLESD,
30772 IX86_BUILTIN_CMPNEQSD,
30773 IX86_BUILTIN_CMPNLTSD,
30774 IX86_BUILTIN_CMPNLESD,
30775 IX86_BUILTIN_CMPORDSD,
30776 IX86_BUILTIN_CMPUNORDSD,
30777
30778 IX86_BUILTIN_COMIEQSD,
30779 IX86_BUILTIN_COMILTSD,
30780 IX86_BUILTIN_COMILESD,
30781 IX86_BUILTIN_COMIGTSD,
30782 IX86_BUILTIN_COMIGESD,
30783 IX86_BUILTIN_COMINEQSD,
30784 IX86_BUILTIN_UCOMIEQSD,
30785 IX86_BUILTIN_UCOMILTSD,
30786 IX86_BUILTIN_UCOMILESD,
30787 IX86_BUILTIN_UCOMIGTSD,
30788 IX86_BUILTIN_UCOMIGESD,
30789 IX86_BUILTIN_UCOMINEQSD,
30790
30791 IX86_BUILTIN_MAXPD,
30792 IX86_BUILTIN_MAXSD,
30793 IX86_BUILTIN_MINPD,
30794 IX86_BUILTIN_MINSD,
30795
30796 IX86_BUILTIN_ANDPD,
30797 IX86_BUILTIN_ANDNPD,
30798 IX86_BUILTIN_ORPD,
30799 IX86_BUILTIN_XORPD,
30800
30801 IX86_BUILTIN_SQRTPD,
30802 IX86_BUILTIN_SQRTSD,
30803
30804 IX86_BUILTIN_UNPCKHPD,
30805 IX86_BUILTIN_UNPCKLPD,
30806
30807 IX86_BUILTIN_SHUFPD,
30808
30809 IX86_BUILTIN_LOADUPD,
30810 IX86_BUILTIN_STOREUPD,
30811 IX86_BUILTIN_MOVSD,
30812
30813 IX86_BUILTIN_LOADHPD,
30814 IX86_BUILTIN_LOADLPD,
30815
30816 IX86_BUILTIN_CVTDQ2PD,
30817 IX86_BUILTIN_CVTDQ2PS,
30818
30819 IX86_BUILTIN_CVTPD2DQ,
30820 IX86_BUILTIN_CVTPD2PI,
30821 IX86_BUILTIN_CVTPD2PS,
30822 IX86_BUILTIN_CVTTPD2DQ,
30823 IX86_BUILTIN_CVTTPD2PI,
30824
30825 IX86_BUILTIN_CVTPI2PD,
30826 IX86_BUILTIN_CVTSI2SD,
30827 IX86_BUILTIN_CVTSI642SD,
30828
30829 IX86_BUILTIN_CVTSD2SI,
30830 IX86_BUILTIN_CVTSD2SI64,
30831 IX86_BUILTIN_CVTSD2SS,
30832 IX86_BUILTIN_CVTSS2SD,
30833 IX86_BUILTIN_CVTTSD2SI,
30834 IX86_BUILTIN_CVTTSD2SI64,
30835
30836 IX86_BUILTIN_CVTPS2DQ,
30837 IX86_BUILTIN_CVTPS2PD,
30838 IX86_BUILTIN_CVTTPS2DQ,
30839
30840 IX86_BUILTIN_MOVNTI,
30841 IX86_BUILTIN_MOVNTI64,
30842 IX86_BUILTIN_MOVNTPD,
30843 IX86_BUILTIN_MOVNTDQ,
30844
30845 IX86_BUILTIN_MOVQ128,
30846
30847 /* SSE2 MMX */
30848 IX86_BUILTIN_MASKMOVDQU,
30849 IX86_BUILTIN_MOVMSKPD,
30850 IX86_BUILTIN_PMOVMSKB128,
30851
30852 IX86_BUILTIN_PACKSSWB128,
30853 IX86_BUILTIN_PACKSSDW128,
30854 IX86_BUILTIN_PACKUSWB128,
30855
30856 IX86_BUILTIN_PADDB128,
30857 IX86_BUILTIN_PADDW128,
30858 IX86_BUILTIN_PADDD128,
30859 IX86_BUILTIN_PADDQ128,
30860 IX86_BUILTIN_PADDSB128,
30861 IX86_BUILTIN_PADDSW128,
30862 IX86_BUILTIN_PADDUSB128,
30863 IX86_BUILTIN_PADDUSW128,
30864 IX86_BUILTIN_PSUBB128,
30865 IX86_BUILTIN_PSUBW128,
30866 IX86_BUILTIN_PSUBD128,
30867 IX86_BUILTIN_PSUBQ128,
30868 IX86_BUILTIN_PSUBSB128,
30869 IX86_BUILTIN_PSUBSW128,
30870 IX86_BUILTIN_PSUBUSB128,
30871 IX86_BUILTIN_PSUBUSW128,
30872
30873 IX86_BUILTIN_PAND128,
30874 IX86_BUILTIN_PANDN128,
30875 IX86_BUILTIN_POR128,
30876 IX86_BUILTIN_PXOR128,
30877
30878 IX86_BUILTIN_PAVGB128,
30879 IX86_BUILTIN_PAVGW128,
30880
30881 IX86_BUILTIN_PCMPEQB128,
30882 IX86_BUILTIN_PCMPEQW128,
30883 IX86_BUILTIN_PCMPEQD128,
30884 IX86_BUILTIN_PCMPGTB128,
30885 IX86_BUILTIN_PCMPGTW128,
30886 IX86_BUILTIN_PCMPGTD128,
30887
30888 IX86_BUILTIN_PMADDWD128,
30889
30890 IX86_BUILTIN_PMAXSW128,
30891 IX86_BUILTIN_PMAXUB128,
30892 IX86_BUILTIN_PMINSW128,
30893 IX86_BUILTIN_PMINUB128,
30894
30895 IX86_BUILTIN_PMULUDQ,
30896 IX86_BUILTIN_PMULUDQ128,
30897 IX86_BUILTIN_PMULHUW128,
30898 IX86_BUILTIN_PMULHW128,
30899 IX86_BUILTIN_PMULLW128,
30900
30901 IX86_BUILTIN_PSADBW128,
30902 IX86_BUILTIN_PSHUFHW,
30903 IX86_BUILTIN_PSHUFLW,
30904 IX86_BUILTIN_PSHUFD,
30905
30906 IX86_BUILTIN_PSLLDQI128,
30907 IX86_BUILTIN_PSLLWI128,
30908 IX86_BUILTIN_PSLLDI128,
30909 IX86_BUILTIN_PSLLQI128,
30910 IX86_BUILTIN_PSRAWI128,
30911 IX86_BUILTIN_PSRADI128,
30912 IX86_BUILTIN_PSRLDQI128,
30913 IX86_BUILTIN_PSRLWI128,
30914 IX86_BUILTIN_PSRLDI128,
30915 IX86_BUILTIN_PSRLQI128,
30916
30917 IX86_BUILTIN_PSLLDQ128,
30918 IX86_BUILTIN_PSLLW128,
30919 IX86_BUILTIN_PSLLD128,
30920 IX86_BUILTIN_PSLLQ128,
30921 IX86_BUILTIN_PSRAW128,
30922 IX86_BUILTIN_PSRAD128,
30923 IX86_BUILTIN_PSRLW128,
30924 IX86_BUILTIN_PSRLD128,
30925 IX86_BUILTIN_PSRLQ128,
30926
30927 IX86_BUILTIN_PUNPCKHBW128,
30928 IX86_BUILTIN_PUNPCKHWD128,
30929 IX86_BUILTIN_PUNPCKHDQ128,
30930 IX86_BUILTIN_PUNPCKHQDQ128,
30931 IX86_BUILTIN_PUNPCKLBW128,
30932 IX86_BUILTIN_PUNPCKLWD128,
30933 IX86_BUILTIN_PUNPCKLDQ128,
30934 IX86_BUILTIN_PUNPCKLQDQ128,
30935
30936 IX86_BUILTIN_CLFLUSH,
30937 IX86_BUILTIN_MFENCE,
30938 IX86_BUILTIN_LFENCE,
30939 IX86_BUILTIN_PAUSE,
30940
30941 IX86_BUILTIN_FNSTENV,
30942 IX86_BUILTIN_FLDENV,
30943 IX86_BUILTIN_FNSTSW,
30944 IX86_BUILTIN_FNCLEX,
30945
30946 IX86_BUILTIN_BSRSI,
30947 IX86_BUILTIN_BSRDI,
30948 IX86_BUILTIN_RDPMC,
30949 IX86_BUILTIN_RDTSC,
30950 IX86_BUILTIN_RDTSCP,
30951 IX86_BUILTIN_ROLQI,
30952 IX86_BUILTIN_ROLHI,
30953 IX86_BUILTIN_RORQI,
30954 IX86_BUILTIN_RORHI,
30955
30956 /* SSE3. */
30957 IX86_BUILTIN_ADDSUBPS,
30958 IX86_BUILTIN_HADDPS,
30959 IX86_BUILTIN_HSUBPS,
30960 IX86_BUILTIN_MOVSHDUP,
30961 IX86_BUILTIN_MOVSLDUP,
30962 IX86_BUILTIN_ADDSUBPD,
30963 IX86_BUILTIN_HADDPD,
30964 IX86_BUILTIN_HSUBPD,
30965 IX86_BUILTIN_LDDQU,
30966
30967 IX86_BUILTIN_MONITOR,
30968 IX86_BUILTIN_MWAIT,
30969 IX86_BUILTIN_CLZERO,
30970
30971 /* SSSE3. */
30972 IX86_BUILTIN_PHADDW,
30973 IX86_BUILTIN_PHADDD,
30974 IX86_BUILTIN_PHADDSW,
30975 IX86_BUILTIN_PHSUBW,
30976 IX86_BUILTIN_PHSUBD,
30977 IX86_BUILTIN_PHSUBSW,
30978 IX86_BUILTIN_PMADDUBSW,
30979 IX86_BUILTIN_PMULHRSW,
30980 IX86_BUILTIN_PSHUFB,
30981 IX86_BUILTIN_PSIGNB,
30982 IX86_BUILTIN_PSIGNW,
30983 IX86_BUILTIN_PSIGND,
30984 IX86_BUILTIN_PALIGNR,
30985 IX86_BUILTIN_PABSB,
30986 IX86_BUILTIN_PABSW,
30987 IX86_BUILTIN_PABSD,
30988
30989 IX86_BUILTIN_PHADDW128,
30990 IX86_BUILTIN_PHADDD128,
30991 IX86_BUILTIN_PHADDSW128,
30992 IX86_BUILTIN_PHSUBW128,
30993 IX86_BUILTIN_PHSUBD128,
30994 IX86_BUILTIN_PHSUBSW128,
30995 IX86_BUILTIN_PMADDUBSW128,
30996 IX86_BUILTIN_PMULHRSW128,
30997 IX86_BUILTIN_PSHUFB128,
30998 IX86_BUILTIN_PSIGNB128,
30999 IX86_BUILTIN_PSIGNW128,
31000 IX86_BUILTIN_PSIGND128,
31001 IX86_BUILTIN_PALIGNR128,
31002 IX86_BUILTIN_PABSB128,
31003 IX86_BUILTIN_PABSW128,
31004 IX86_BUILTIN_PABSD128,
31005
31006 /* AMDFAM10 - SSE4A New Instructions. */
31007 IX86_BUILTIN_MOVNTSD,
31008 IX86_BUILTIN_MOVNTSS,
31009 IX86_BUILTIN_EXTRQI,
31010 IX86_BUILTIN_EXTRQ,
31011 IX86_BUILTIN_INSERTQI,
31012 IX86_BUILTIN_INSERTQ,
31013
31014 /* SSE4.1. */
31015 IX86_BUILTIN_BLENDPD,
31016 IX86_BUILTIN_BLENDPS,
31017 IX86_BUILTIN_BLENDVPD,
31018 IX86_BUILTIN_BLENDVPS,
31019 IX86_BUILTIN_PBLENDVB128,
31020 IX86_BUILTIN_PBLENDW128,
31021
31022 IX86_BUILTIN_DPPD,
31023 IX86_BUILTIN_DPPS,
31024
31025 IX86_BUILTIN_INSERTPS128,
31026
31027 IX86_BUILTIN_MOVNTDQA,
31028 IX86_BUILTIN_MPSADBW128,
31029 IX86_BUILTIN_PACKUSDW128,
31030 IX86_BUILTIN_PCMPEQQ,
31031 IX86_BUILTIN_PHMINPOSUW128,
31032
31033 IX86_BUILTIN_PMAXSB128,
31034 IX86_BUILTIN_PMAXSD128,
31035 IX86_BUILTIN_PMAXUD128,
31036 IX86_BUILTIN_PMAXUW128,
31037
31038 IX86_BUILTIN_PMINSB128,
31039 IX86_BUILTIN_PMINSD128,
31040 IX86_BUILTIN_PMINUD128,
31041 IX86_BUILTIN_PMINUW128,
31042
31043 IX86_BUILTIN_PMOVSXBW128,
31044 IX86_BUILTIN_PMOVSXBD128,
31045 IX86_BUILTIN_PMOVSXBQ128,
31046 IX86_BUILTIN_PMOVSXWD128,
31047 IX86_BUILTIN_PMOVSXWQ128,
31048 IX86_BUILTIN_PMOVSXDQ128,
31049
31050 IX86_BUILTIN_PMOVZXBW128,
31051 IX86_BUILTIN_PMOVZXBD128,
31052 IX86_BUILTIN_PMOVZXBQ128,
31053 IX86_BUILTIN_PMOVZXWD128,
31054 IX86_BUILTIN_PMOVZXWQ128,
31055 IX86_BUILTIN_PMOVZXDQ128,
31056
31057 IX86_BUILTIN_PMULDQ128,
31058 IX86_BUILTIN_PMULLD128,
31059
31060 IX86_BUILTIN_ROUNDSD,
31061 IX86_BUILTIN_ROUNDSS,
31062
31063 IX86_BUILTIN_ROUNDPD,
31064 IX86_BUILTIN_ROUNDPS,
31065
31066 IX86_BUILTIN_FLOORPD,
31067 IX86_BUILTIN_CEILPD,
31068 IX86_BUILTIN_TRUNCPD,
31069 IX86_BUILTIN_RINTPD,
31070 IX86_BUILTIN_ROUNDPD_AZ,
31071
31072 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
31073 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
31074 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
31075
31076 IX86_BUILTIN_FLOORPS,
31077 IX86_BUILTIN_CEILPS,
31078 IX86_BUILTIN_TRUNCPS,
31079 IX86_BUILTIN_RINTPS,
31080 IX86_BUILTIN_ROUNDPS_AZ,
31081
31082 IX86_BUILTIN_FLOORPS_SFIX,
31083 IX86_BUILTIN_CEILPS_SFIX,
31084 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
31085
31086 IX86_BUILTIN_PTESTZ,
31087 IX86_BUILTIN_PTESTC,
31088 IX86_BUILTIN_PTESTNZC,
31089
31090 IX86_BUILTIN_VEC_INIT_V2SI,
31091 IX86_BUILTIN_VEC_INIT_V4HI,
31092 IX86_BUILTIN_VEC_INIT_V8QI,
31093 IX86_BUILTIN_VEC_EXT_V2DF,
31094 IX86_BUILTIN_VEC_EXT_V2DI,
31095 IX86_BUILTIN_VEC_EXT_V4SF,
31096 IX86_BUILTIN_VEC_EXT_V4SI,
31097 IX86_BUILTIN_VEC_EXT_V8HI,
31098 IX86_BUILTIN_VEC_EXT_V2SI,
31099 IX86_BUILTIN_VEC_EXT_V4HI,
31100 IX86_BUILTIN_VEC_EXT_V16QI,
31101 IX86_BUILTIN_VEC_SET_V2DI,
31102 IX86_BUILTIN_VEC_SET_V4SF,
31103 IX86_BUILTIN_VEC_SET_V4SI,
31104 IX86_BUILTIN_VEC_SET_V8HI,
31105 IX86_BUILTIN_VEC_SET_V4HI,
31106 IX86_BUILTIN_VEC_SET_V16QI,
31107
31108 IX86_BUILTIN_VEC_PACK_SFIX,
31109 IX86_BUILTIN_VEC_PACK_SFIX256,
31110
31111 /* SSE4.2. */
31112 IX86_BUILTIN_CRC32QI,
31113 IX86_BUILTIN_CRC32HI,
31114 IX86_BUILTIN_CRC32SI,
31115 IX86_BUILTIN_CRC32DI,
31116
31117 IX86_BUILTIN_PCMPESTRI128,
31118 IX86_BUILTIN_PCMPESTRM128,
31119 IX86_BUILTIN_PCMPESTRA128,
31120 IX86_BUILTIN_PCMPESTRC128,
31121 IX86_BUILTIN_PCMPESTRO128,
31122 IX86_BUILTIN_PCMPESTRS128,
31123 IX86_BUILTIN_PCMPESTRZ128,
31124 IX86_BUILTIN_PCMPISTRI128,
31125 IX86_BUILTIN_PCMPISTRM128,
31126 IX86_BUILTIN_PCMPISTRA128,
31127 IX86_BUILTIN_PCMPISTRC128,
31128 IX86_BUILTIN_PCMPISTRO128,
31129 IX86_BUILTIN_PCMPISTRS128,
31130 IX86_BUILTIN_PCMPISTRZ128,
31131
31132 IX86_BUILTIN_PCMPGTQ,
31133
31134 /* AES instructions */
31135 IX86_BUILTIN_AESENC128,
31136 IX86_BUILTIN_AESENCLAST128,
31137 IX86_BUILTIN_AESDEC128,
31138 IX86_BUILTIN_AESDECLAST128,
31139 IX86_BUILTIN_AESIMC128,
31140 IX86_BUILTIN_AESKEYGENASSIST128,
31141
31142 /* PCLMUL instruction */
31143 IX86_BUILTIN_PCLMULQDQ128,
31144
31145 /* AVX */
31146 IX86_BUILTIN_ADDPD256,
31147 IX86_BUILTIN_ADDPS256,
31148 IX86_BUILTIN_ADDSUBPD256,
31149 IX86_BUILTIN_ADDSUBPS256,
31150 IX86_BUILTIN_ANDPD256,
31151 IX86_BUILTIN_ANDPS256,
31152 IX86_BUILTIN_ANDNPD256,
31153 IX86_BUILTIN_ANDNPS256,
31154 IX86_BUILTIN_BLENDPD256,
31155 IX86_BUILTIN_BLENDPS256,
31156 IX86_BUILTIN_BLENDVPD256,
31157 IX86_BUILTIN_BLENDVPS256,
31158 IX86_BUILTIN_DIVPD256,
31159 IX86_BUILTIN_DIVPS256,
31160 IX86_BUILTIN_DPPS256,
31161 IX86_BUILTIN_HADDPD256,
31162 IX86_BUILTIN_HADDPS256,
31163 IX86_BUILTIN_HSUBPD256,
31164 IX86_BUILTIN_HSUBPS256,
31165 IX86_BUILTIN_MAXPD256,
31166 IX86_BUILTIN_MAXPS256,
31167 IX86_BUILTIN_MINPD256,
31168 IX86_BUILTIN_MINPS256,
31169 IX86_BUILTIN_MULPD256,
31170 IX86_BUILTIN_MULPS256,
31171 IX86_BUILTIN_ORPD256,
31172 IX86_BUILTIN_ORPS256,
31173 IX86_BUILTIN_SHUFPD256,
31174 IX86_BUILTIN_SHUFPS256,
31175 IX86_BUILTIN_SUBPD256,
31176 IX86_BUILTIN_SUBPS256,
31177 IX86_BUILTIN_XORPD256,
31178 IX86_BUILTIN_XORPS256,
31179 IX86_BUILTIN_CMPSD,
31180 IX86_BUILTIN_CMPSS,
31181 IX86_BUILTIN_CMPPD,
31182 IX86_BUILTIN_CMPPS,
31183 IX86_BUILTIN_CMPPD256,
31184 IX86_BUILTIN_CMPPS256,
31185 IX86_BUILTIN_CVTDQ2PD256,
31186 IX86_BUILTIN_CVTDQ2PS256,
31187 IX86_BUILTIN_CVTPD2PS256,
31188 IX86_BUILTIN_CVTPS2DQ256,
31189 IX86_BUILTIN_CVTPS2PD256,
31190 IX86_BUILTIN_CVTTPD2DQ256,
31191 IX86_BUILTIN_CVTPD2DQ256,
31192 IX86_BUILTIN_CVTTPS2DQ256,
31193 IX86_BUILTIN_EXTRACTF128PD256,
31194 IX86_BUILTIN_EXTRACTF128PS256,
31195 IX86_BUILTIN_EXTRACTF128SI256,
31196 IX86_BUILTIN_VZEROALL,
31197 IX86_BUILTIN_VZEROUPPER,
31198 IX86_BUILTIN_VPERMILVARPD,
31199 IX86_BUILTIN_VPERMILVARPS,
31200 IX86_BUILTIN_VPERMILVARPD256,
31201 IX86_BUILTIN_VPERMILVARPS256,
31202 IX86_BUILTIN_VPERMILPD,
31203 IX86_BUILTIN_VPERMILPS,
31204 IX86_BUILTIN_VPERMILPD256,
31205 IX86_BUILTIN_VPERMILPS256,
31206 IX86_BUILTIN_VPERMIL2PD,
31207 IX86_BUILTIN_VPERMIL2PS,
31208 IX86_BUILTIN_VPERMIL2PD256,
31209 IX86_BUILTIN_VPERMIL2PS256,
31210 IX86_BUILTIN_VPERM2F128PD256,
31211 IX86_BUILTIN_VPERM2F128PS256,
31212 IX86_BUILTIN_VPERM2F128SI256,
31213 IX86_BUILTIN_VBROADCASTSS,
31214 IX86_BUILTIN_VBROADCASTSD256,
31215 IX86_BUILTIN_VBROADCASTSS256,
31216 IX86_BUILTIN_VBROADCASTPD256,
31217 IX86_BUILTIN_VBROADCASTPS256,
31218 IX86_BUILTIN_VINSERTF128PD256,
31219 IX86_BUILTIN_VINSERTF128PS256,
31220 IX86_BUILTIN_VINSERTF128SI256,
31221 IX86_BUILTIN_LOADUPD256,
31222 IX86_BUILTIN_LOADUPS256,
31223 IX86_BUILTIN_STOREUPD256,
31224 IX86_BUILTIN_STOREUPS256,
31225 IX86_BUILTIN_LDDQU256,
31226 IX86_BUILTIN_MOVNTDQ256,
31227 IX86_BUILTIN_MOVNTPD256,
31228 IX86_BUILTIN_MOVNTPS256,
31229 IX86_BUILTIN_LOADDQU256,
31230 IX86_BUILTIN_STOREDQU256,
31231 IX86_BUILTIN_MASKLOADPD,
31232 IX86_BUILTIN_MASKLOADPS,
31233 IX86_BUILTIN_MASKSTOREPD,
31234 IX86_BUILTIN_MASKSTOREPS,
31235 IX86_BUILTIN_MASKLOADPD256,
31236 IX86_BUILTIN_MASKLOADPS256,
31237 IX86_BUILTIN_MASKSTOREPD256,
31238 IX86_BUILTIN_MASKSTOREPS256,
31239 IX86_BUILTIN_MOVSHDUP256,
31240 IX86_BUILTIN_MOVSLDUP256,
31241 IX86_BUILTIN_MOVDDUP256,
31242
31243 IX86_BUILTIN_SQRTPD256,
31244 IX86_BUILTIN_SQRTPS256,
31245 IX86_BUILTIN_SQRTPS_NR256,
31246 IX86_BUILTIN_RSQRTPS256,
31247 IX86_BUILTIN_RSQRTPS_NR256,
31248
31249 IX86_BUILTIN_RCPPS256,
31250
31251 IX86_BUILTIN_ROUNDPD256,
31252 IX86_BUILTIN_ROUNDPS256,
31253
31254 IX86_BUILTIN_FLOORPD256,
31255 IX86_BUILTIN_CEILPD256,
31256 IX86_BUILTIN_TRUNCPD256,
31257 IX86_BUILTIN_RINTPD256,
31258 IX86_BUILTIN_ROUNDPD_AZ256,
31259
31260 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
31261 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
31262 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
31263
31264 IX86_BUILTIN_FLOORPS256,
31265 IX86_BUILTIN_CEILPS256,
31266 IX86_BUILTIN_TRUNCPS256,
31267 IX86_BUILTIN_RINTPS256,
31268 IX86_BUILTIN_ROUNDPS_AZ256,
31269
31270 IX86_BUILTIN_FLOORPS_SFIX256,
31271 IX86_BUILTIN_CEILPS_SFIX256,
31272 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
31273
31274 IX86_BUILTIN_UNPCKHPD256,
31275 IX86_BUILTIN_UNPCKLPD256,
31276 IX86_BUILTIN_UNPCKHPS256,
31277 IX86_BUILTIN_UNPCKLPS256,
31278
31279 IX86_BUILTIN_SI256_SI,
31280 IX86_BUILTIN_PS256_PS,
31281 IX86_BUILTIN_PD256_PD,
31282 IX86_BUILTIN_SI_SI256,
31283 IX86_BUILTIN_PS_PS256,
31284 IX86_BUILTIN_PD_PD256,
31285
31286 IX86_BUILTIN_VTESTZPD,
31287 IX86_BUILTIN_VTESTCPD,
31288 IX86_BUILTIN_VTESTNZCPD,
31289 IX86_BUILTIN_VTESTZPS,
31290 IX86_BUILTIN_VTESTCPS,
31291 IX86_BUILTIN_VTESTNZCPS,
31292 IX86_BUILTIN_VTESTZPD256,
31293 IX86_BUILTIN_VTESTCPD256,
31294 IX86_BUILTIN_VTESTNZCPD256,
31295 IX86_BUILTIN_VTESTZPS256,
31296 IX86_BUILTIN_VTESTCPS256,
31297 IX86_BUILTIN_VTESTNZCPS256,
31298 IX86_BUILTIN_PTESTZ256,
31299 IX86_BUILTIN_PTESTC256,
31300 IX86_BUILTIN_PTESTNZC256,
31301
31302 IX86_BUILTIN_MOVMSKPD256,
31303 IX86_BUILTIN_MOVMSKPS256,
31304
31305 /* AVX2 */
31306 IX86_BUILTIN_MPSADBW256,
31307 IX86_BUILTIN_PABSB256,
31308 IX86_BUILTIN_PABSW256,
31309 IX86_BUILTIN_PABSD256,
31310 IX86_BUILTIN_PACKSSDW256,
31311 IX86_BUILTIN_PACKSSWB256,
31312 IX86_BUILTIN_PACKUSDW256,
31313 IX86_BUILTIN_PACKUSWB256,
31314 IX86_BUILTIN_PADDB256,
31315 IX86_BUILTIN_PADDW256,
31316 IX86_BUILTIN_PADDD256,
31317 IX86_BUILTIN_PADDQ256,
31318 IX86_BUILTIN_PADDSB256,
31319 IX86_BUILTIN_PADDSW256,
31320 IX86_BUILTIN_PADDUSB256,
31321 IX86_BUILTIN_PADDUSW256,
31322 IX86_BUILTIN_PALIGNR256,
31323 IX86_BUILTIN_AND256I,
31324 IX86_BUILTIN_ANDNOT256I,
31325 IX86_BUILTIN_PAVGB256,
31326 IX86_BUILTIN_PAVGW256,
31327 IX86_BUILTIN_PBLENDVB256,
31328 IX86_BUILTIN_PBLENDVW256,
31329 IX86_BUILTIN_PCMPEQB256,
31330 IX86_BUILTIN_PCMPEQW256,
31331 IX86_BUILTIN_PCMPEQD256,
31332 IX86_BUILTIN_PCMPEQQ256,
31333 IX86_BUILTIN_PCMPGTB256,
31334 IX86_BUILTIN_PCMPGTW256,
31335 IX86_BUILTIN_PCMPGTD256,
31336 IX86_BUILTIN_PCMPGTQ256,
31337 IX86_BUILTIN_PHADDW256,
31338 IX86_BUILTIN_PHADDD256,
31339 IX86_BUILTIN_PHADDSW256,
31340 IX86_BUILTIN_PHSUBW256,
31341 IX86_BUILTIN_PHSUBD256,
31342 IX86_BUILTIN_PHSUBSW256,
31343 IX86_BUILTIN_PMADDUBSW256,
31344 IX86_BUILTIN_PMADDWD256,
31345 IX86_BUILTIN_PMAXSB256,
31346 IX86_BUILTIN_PMAXSW256,
31347 IX86_BUILTIN_PMAXSD256,
31348 IX86_BUILTIN_PMAXUB256,
31349 IX86_BUILTIN_PMAXUW256,
31350 IX86_BUILTIN_PMAXUD256,
31351 IX86_BUILTIN_PMINSB256,
31352 IX86_BUILTIN_PMINSW256,
31353 IX86_BUILTIN_PMINSD256,
31354 IX86_BUILTIN_PMINUB256,
31355 IX86_BUILTIN_PMINUW256,
31356 IX86_BUILTIN_PMINUD256,
31357 IX86_BUILTIN_PMOVMSKB256,
31358 IX86_BUILTIN_PMOVSXBW256,
31359 IX86_BUILTIN_PMOVSXBD256,
31360 IX86_BUILTIN_PMOVSXBQ256,
31361 IX86_BUILTIN_PMOVSXWD256,
31362 IX86_BUILTIN_PMOVSXWQ256,
31363 IX86_BUILTIN_PMOVSXDQ256,
31364 IX86_BUILTIN_PMOVZXBW256,
31365 IX86_BUILTIN_PMOVZXBD256,
31366 IX86_BUILTIN_PMOVZXBQ256,
31367 IX86_BUILTIN_PMOVZXWD256,
31368 IX86_BUILTIN_PMOVZXWQ256,
31369 IX86_BUILTIN_PMOVZXDQ256,
31370 IX86_BUILTIN_PMULDQ256,
31371 IX86_BUILTIN_PMULHRSW256,
31372 IX86_BUILTIN_PMULHUW256,
31373 IX86_BUILTIN_PMULHW256,
31374 IX86_BUILTIN_PMULLW256,
31375 IX86_BUILTIN_PMULLD256,
31376 IX86_BUILTIN_PMULUDQ256,
31377 IX86_BUILTIN_POR256,
31378 IX86_BUILTIN_PSADBW256,
31379 IX86_BUILTIN_PSHUFB256,
31380 IX86_BUILTIN_PSHUFD256,
31381 IX86_BUILTIN_PSHUFHW256,
31382 IX86_BUILTIN_PSHUFLW256,
31383 IX86_BUILTIN_PSIGNB256,
31384 IX86_BUILTIN_PSIGNW256,
31385 IX86_BUILTIN_PSIGND256,
31386 IX86_BUILTIN_PSLLDQI256,
31387 IX86_BUILTIN_PSLLWI256,
31388 IX86_BUILTIN_PSLLW256,
31389 IX86_BUILTIN_PSLLDI256,
31390 IX86_BUILTIN_PSLLD256,
31391 IX86_BUILTIN_PSLLQI256,
31392 IX86_BUILTIN_PSLLQ256,
31393 IX86_BUILTIN_PSRAWI256,
31394 IX86_BUILTIN_PSRAW256,
31395 IX86_BUILTIN_PSRADI256,
31396 IX86_BUILTIN_PSRAD256,
31397 IX86_BUILTIN_PSRLDQI256,
31398 IX86_BUILTIN_PSRLWI256,
31399 IX86_BUILTIN_PSRLW256,
31400 IX86_BUILTIN_PSRLDI256,
31401 IX86_BUILTIN_PSRLD256,
31402 IX86_BUILTIN_PSRLQI256,
31403 IX86_BUILTIN_PSRLQ256,
31404 IX86_BUILTIN_PSUBB256,
31405 IX86_BUILTIN_PSUBW256,
31406 IX86_BUILTIN_PSUBD256,
31407 IX86_BUILTIN_PSUBQ256,
31408 IX86_BUILTIN_PSUBSB256,
31409 IX86_BUILTIN_PSUBSW256,
31410 IX86_BUILTIN_PSUBUSB256,
31411 IX86_BUILTIN_PSUBUSW256,
31412 IX86_BUILTIN_PUNPCKHBW256,
31413 IX86_BUILTIN_PUNPCKHWD256,
31414 IX86_BUILTIN_PUNPCKHDQ256,
31415 IX86_BUILTIN_PUNPCKHQDQ256,
31416 IX86_BUILTIN_PUNPCKLBW256,
31417 IX86_BUILTIN_PUNPCKLWD256,
31418 IX86_BUILTIN_PUNPCKLDQ256,
31419 IX86_BUILTIN_PUNPCKLQDQ256,
31420 IX86_BUILTIN_PXOR256,
31421 IX86_BUILTIN_MOVNTDQA256,
31422 IX86_BUILTIN_VBROADCASTSS_PS,
31423 IX86_BUILTIN_VBROADCASTSS_PS256,
31424 IX86_BUILTIN_VBROADCASTSD_PD256,
31425 IX86_BUILTIN_VBROADCASTSI256,
31426 IX86_BUILTIN_PBLENDD256,
31427 IX86_BUILTIN_PBLENDD128,
31428 IX86_BUILTIN_PBROADCASTB256,
31429 IX86_BUILTIN_PBROADCASTW256,
31430 IX86_BUILTIN_PBROADCASTD256,
31431 IX86_BUILTIN_PBROADCASTQ256,
31432 IX86_BUILTIN_PBROADCASTB128,
31433 IX86_BUILTIN_PBROADCASTW128,
31434 IX86_BUILTIN_PBROADCASTD128,
31435 IX86_BUILTIN_PBROADCASTQ128,
31436 IX86_BUILTIN_VPERMVARSI256,
31437 IX86_BUILTIN_VPERMDF256,
31438 IX86_BUILTIN_VPERMVARSF256,
31439 IX86_BUILTIN_VPERMDI256,
31440 IX86_BUILTIN_VPERMTI256,
31441 IX86_BUILTIN_VEXTRACT128I256,
31442 IX86_BUILTIN_VINSERT128I256,
31443 IX86_BUILTIN_MASKLOADD,
31444 IX86_BUILTIN_MASKLOADQ,
31445 IX86_BUILTIN_MASKLOADD256,
31446 IX86_BUILTIN_MASKLOADQ256,
31447 IX86_BUILTIN_MASKSTORED,
31448 IX86_BUILTIN_MASKSTOREQ,
31449 IX86_BUILTIN_MASKSTORED256,
31450 IX86_BUILTIN_MASKSTOREQ256,
31451 IX86_BUILTIN_PSLLVV4DI,
31452 IX86_BUILTIN_PSLLVV2DI,
31453 IX86_BUILTIN_PSLLVV8SI,
31454 IX86_BUILTIN_PSLLVV4SI,
31455 IX86_BUILTIN_PSRAVV8SI,
31456 IX86_BUILTIN_PSRAVV4SI,
31457 IX86_BUILTIN_PSRLVV4DI,
31458 IX86_BUILTIN_PSRLVV2DI,
31459 IX86_BUILTIN_PSRLVV8SI,
31460 IX86_BUILTIN_PSRLVV4SI,
31461
31462 IX86_BUILTIN_GATHERSIV2DF,
31463 IX86_BUILTIN_GATHERSIV4DF,
31464 IX86_BUILTIN_GATHERDIV2DF,
31465 IX86_BUILTIN_GATHERDIV4DF,
31466 IX86_BUILTIN_GATHERSIV4SF,
31467 IX86_BUILTIN_GATHERSIV8SF,
31468 IX86_BUILTIN_GATHERDIV4SF,
31469 IX86_BUILTIN_GATHERDIV8SF,
31470 IX86_BUILTIN_GATHERSIV2DI,
31471 IX86_BUILTIN_GATHERSIV4DI,
31472 IX86_BUILTIN_GATHERDIV2DI,
31473 IX86_BUILTIN_GATHERDIV4DI,
31474 IX86_BUILTIN_GATHERSIV4SI,
31475 IX86_BUILTIN_GATHERSIV8SI,
31476 IX86_BUILTIN_GATHERDIV4SI,
31477 IX86_BUILTIN_GATHERDIV8SI,
31478
31479 /* AVX512F */
31480 IX86_BUILTIN_SI512_SI256,
31481 IX86_BUILTIN_PD512_PD256,
31482 IX86_BUILTIN_PS512_PS256,
31483 IX86_BUILTIN_SI512_SI,
31484 IX86_BUILTIN_PD512_PD,
31485 IX86_BUILTIN_PS512_PS,
31486 IX86_BUILTIN_ADDPD512,
31487 IX86_BUILTIN_ADDPS512,
31488 IX86_BUILTIN_ADDSD_ROUND,
31489 IX86_BUILTIN_ADDSS_ROUND,
31490 IX86_BUILTIN_ALIGND512,
31491 IX86_BUILTIN_ALIGNQ512,
31492 IX86_BUILTIN_BLENDMD512,
31493 IX86_BUILTIN_BLENDMPD512,
31494 IX86_BUILTIN_BLENDMPS512,
31495 IX86_BUILTIN_BLENDMQ512,
31496 IX86_BUILTIN_BROADCASTF32X4_512,
31497 IX86_BUILTIN_BROADCASTF64X4_512,
31498 IX86_BUILTIN_BROADCASTI32X4_512,
31499 IX86_BUILTIN_BROADCASTI64X4_512,
31500 IX86_BUILTIN_BROADCASTSD512,
31501 IX86_BUILTIN_BROADCASTSS512,
31502 IX86_BUILTIN_CMPD512,
31503 IX86_BUILTIN_CMPPD512,
31504 IX86_BUILTIN_CMPPS512,
31505 IX86_BUILTIN_CMPQ512,
31506 IX86_BUILTIN_CMPSD_MASK,
31507 IX86_BUILTIN_CMPSS_MASK,
31508 IX86_BUILTIN_COMIDF,
31509 IX86_BUILTIN_COMISF,
31510 IX86_BUILTIN_COMPRESSPD512,
31511 IX86_BUILTIN_COMPRESSPDSTORE512,
31512 IX86_BUILTIN_COMPRESSPS512,
31513 IX86_BUILTIN_COMPRESSPSSTORE512,
31514 IX86_BUILTIN_CVTDQ2PD512,
31515 IX86_BUILTIN_CVTDQ2PS512,
31516 IX86_BUILTIN_CVTPD2DQ512,
31517 IX86_BUILTIN_CVTPD2PS512,
31518 IX86_BUILTIN_CVTPD2UDQ512,
31519 IX86_BUILTIN_CVTPH2PS512,
31520 IX86_BUILTIN_CVTPS2DQ512_MASK,
31521 IX86_BUILTIN_CVTPS2PD512,
31522 IX86_BUILTIN_CVTPS2PH512,
31523 IX86_BUILTIN_CVTPS2UDQ512,
31524 IX86_BUILTIN_CVTSD2SS_ROUND,
31525 IX86_BUILTIN_CVTSI2SD64,
31526 IX86_BUILTIN_CVTSI2SS32,
31527 IX86_BUILTIN_CVTSI2SS64,
31528 IX86_BUILTIN_CVTSS2SD_ROUND,
31529 IX86_BUILTIN_CVTTPD2DQ512,
31530 IX86_BUILTIN_CVTTPD2UDQ512,
31531 IX86_BUILTIN_CVTTPS2DQ512,
31532 IX86_BUILTIN_CVTTPS2UDQ512,
31533 IX86_BUILTIN_CVTUDQ2PD512,
31534 IX86_BUILTIN_CVTUDQ2PS512,
31535 IX86_BUILTIN_CVTUSI2SD32,
31536 IX86_BUILTIN_CVTUSI2SD64,
31537 IX86_BUILTIN_CVTUSI2SS32,
31538 IX86_BUILTIN_CVTUSI2SS64,
31539 IX86_BUILTIN_DIVPD512,
31540 IX86_BUILTIN_DIVPS512,
31541 IX86_BUILTIN_DIVSD_ROUND,
31542 IX86_BUILTIN_DIVSS_ROUND,
31543 IX86_BUILTIN_EXPANDPD512,
31544 IX86_BUILTIN_EXPANDPD512Z,
31545 IX86_BUILTIN_EXPANDPDLOAD512,
31546 IX86_BUILTIN_EXPANDPDLOAD512Z,
31547 IX86_BUILTIN_EXPANDPS512,
31548 IX86_BUILTIN_EXPANDPS512Z,
31549 IX86_BUILTIN_EXPANDPSLOAD512,
31550 IX86_BUILTIN_EXPANDPSLOAD512Z,
31551 IX86_BUILTIN_EXTRACTF32X4,
31552 IX86_BUILTIN_EXTRACTF64X4,
31553 IX86_BUILTIN_EXTRACTI32X4,
31554 IX86_BUILTIN_EXTRACTI64X4,
31555 IX86_BUILTIN_FIXUPIMMPD512_MASK,
31556 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
31557 IX86_BUILTIN_FIXUPIMMPS512_MASK,
31558 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
31559 IX86_BUILTIN_FIXUPIMMSD128_MASK,
31560 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
31561 IX86_BUILTIN_FIXUPIMMSS128_MASK,
31562 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
31563 IX86_BUILTIN_GETEXPPD512,
31564 IX86_BUILTIN_GETEXPPS512,
31565 IX86_BUILTIN_GETEXPSD128,
31566 IX86_BUILTIN_GETEXPSS128,
31567 IX86_BUILTIN_GETMANTPD512,
31568 IX86_BUILTIN_GETMANTPS512,
31569 IX86_BUILTIN_GETMANTSD128,
31570 IX86_BUILTIN_GETMANTSS128,
31571 IX86_BUILTIN_INSERTF32X4,
31572 IX86_BUILTIN_INSERTF64X4,
31573 IX86_BUILTIN_INSERTI32X4,
31574 IX86_BUILTIN_INSERTI64X4,
31575 IX86_BUILTIN_LOADAPD512,
31576 IX86_BUILTIN_LOADAPS512,
31577 IX86_BUILTIN_LOADDQUDI512,
31578 IX86_BUILTIN_LOADDQUSI512,
31579 IX86_BUILTIN_LOADUPD512,
31580 IX86_BUILTIN_LOADUPS512,
31581 IX86_BUILTIN_MAXPD512,
31582 IX86_BUILTIN_MAXPS512,
31583 IX86_BUILTIN_MAXSD_ROUND,
31584 IX86_BUILTIN_MAXSS_ROUND,
31585 IX86_BUILTIN_MINPD512,
31586 IX86_BUILTIN_MINPS512,
31587 IX86_BUILTIN_MINSD_ROUND,
31588 IX86_BUILTIN_MINSS_ROUND,
31589 IX86_BUILTIN_MOVAPD512,
31590 IX86_BUILTIN_MOVAPS512,
31591 IX86_BUILTIN_MOVDDUP512,
31592 IX86_BUILTIN_MOVDQA32LOAD512,
31593 IX86_BUILTIN_MOVDQA32STORE512,
31594 IX86_BUILTIN_MOVDQA32_512,
31595 IX86_BUILTIN_MOVDQA64LOAD512,
31596 IX86_BUILTIN_MOVDQA64STORE512,
31597 IX86_BUILTIN_MOVDQA64_512,
31598 IX86_BUILTIN_MOVNTDQ512,
31599 IX86_BUILTIN_MOVNTDQA512,
31600 IX86_BUILTIN_MOVNTPD512,
31601 IX86_BUILTIN_MOVNTPS512,
31602 IX86_BUILTIN_MOVSHDUP512,
31603 IX86_BUILTIN_MOVSLDUP512,
31604 IX86_BUILTIN_MULPD512,
31605 IX86_BUILTIN_MULPS512,
31606 IX86_BUILTIN_MULSD_ROUND,
31607 IX86_BUILTIN_MULSS_ROUND,
31608 IX86_BUILTIN_PABSD512,
31609 IX86_BUILTIN_PABSQ512,
31610 IX86_BUILTIN_PADDD512,
31611 IX86_BUILTIN_PADDQ512,
31612 IX86_BUILTIN_PANDD512,
31613 IX86_BUILTIN_PANDND512,
31614 IX86_BUILTIN_PANDNQ512,
31615 IX86_BUILTIN_PANDQ512,
31616 IX86_BUILTIN_PBROADCASTD512,
31617 IX86_BUILTIN_PBROADCASTD512_GPR,
31618 IX86_BUILTIN_PBROADCASTMB512,
31619 IX86_BUILTIN_PBROADCASTMW512,
31620 IX86_BUILTIN_PBROADCASTQ512,
31621 IX86_BUILTIN_PBROADCASTQ512_GPR,
31622 IX86_BUILTIN_PCMPEQD512_MASK,
31623 IX86_BUILTIN_PCMPEQQ512_MASK,
31624 IX86_BUILTIN_PCMPGTD512_MASK,
31625 IX86_BUILTIN_PCMPGTQ512_MASK,
31626 IX86_BUILTIN_PCOMPRESSD512,
31627 IX86_BUILTIN_PCOMPRESSDSTORE512,
31628 IX86_BUILTIN_PCOMPRESSQ512,
31629 IX86_BUILTIN_PCOMPRESSQSTORE512,
31630 IX86_BUILTIN_PEXPANDD512,
31631 IX86_BUILTIN_PEXPANDD512Z,
31632 IX86_BUILTIN_PEXPANDDLOAD512,
31633 IX86_BUILTIN_PEXPANDDLOAD512Z,
31634 IX86_BUILTIN_PEXPANDQ512,
31635 IX86_BUILTIN_PEXPANDQ512Z,
31636 IX86_BUILTIN_PEXPANDQLOAD512,
31637 IX86_BUILTIN_PEXPANDQLOAD512Z,
31638 IX86_BUILTIN_PMAXSD512,
31639 IX86_BUILTIN_PMAXSQ512,
31640 IX86_BUILTIN_PMAXUD512,
31641 IX86_BUILTIN_PMAXUQ512,
31642 IX86_BUILTIN_PMINSD512,
31643 IX86_BUILTIN_PMINSQ512,
31644 IX86_BUILTIN_PMINUD512,
31645 IX86_BUILTIN_PMINUQ512,
31646 IX86_BUILTIN_PMOVDB512,
31647 IX86_BUILTIN_PMOVDB512_MEM,
31648 IX86_BUILTIN_PMOVDW512,
31649 IX86_BUILTIN_PMOVDW512_MEM,
31650 IX86_BUILTIN_PMOVQB512,
31651 IX86_BUILTIN_PMOVQB512_MEM,
31652 IX86_BUILTIN_PMOVQD512,
31653 IX86_BUILTIN_PMOVQD512_MEM,
31654 IX86_BUILTIN_PMOVQW512,
31655 IX86_BUILTIN_PMOVQW512_MEM,
31656 IX86_BUILTIN_PMOVSDB512,
31657 IX86_BUILTIN_PMOVSDB512_MEM,
31658 IX86_BUILTIN_PMOVSDW512,
31659 IX86_BUILTIN_PMOVSDW512_MEM,
31660 IX86_BUILTIN_PMOVSQB512,
31661 IX86_BUILTIN_PMOVSQB512_MEM,
31662 IX86_BUILTIN_PMOVSQD512,
31663 IX86_BUILTIN_PMOVSQD512_MEM,
31664 IX86_BUILTIN_PMOVSQW512,
31665 IX86_BUILTIN_PMOVSQW512_MEM,
31666 IX86_BUILTIN_PMOVSXBD512,
31667 IX86_BUILTIN_PMOVSXBQ512,
31668 IX86_BUILTIN_PMOVSXDQ512,
31669 IX86_BUILTIN_PMOVSXWD512,
31670 IX86_BUILTIN_PMOVSXWQ512,
31671 IX86_BUILTIN_PMOVUSDB512,
31672 IX86_BUILTIN_PMOVUSDB512_MEM,
31673 IX86_BUILTIN_PMOVUSDW512,
31674 IX86_BUILTIN_PMOVUSDW512_MEM,
31675 IX86_BUILTIN_PMOVUSQB512,
31676 IX86_BUILTIN_PMOVUSQB512_MEM,
31677 IX86_BUILTIN_PMOVUSQD512,
31678 IX86_BUILTIN_PMOVUSQD512_MEM,
31679 IX86_BUILTIN_PMOVUSQW512,
31680 IX86_BUILTIN_PMOVUSQW512_MEM,
31681 IX86_BUILTIN_PMOVZXBD512,
31682 IX86_BUILTIN_PMOVZXBQ512,
31683 IX86_BUILTIN_PMOVZXDQ512,
31684 IX86_BUILTIN_PMOVZXWD512,
31685 IX86_BUILTIN_PMOVZXWQ512,
31686 IX86_BUILTIN_PMULDQ512,
31687 IX86_BUILTIN_PMULLD512,
31688 IX86_BUILTIN_PMULUDQ512,
31689 IX86_BUILTIN_PORD512,
31690 IX86_BUILTIN_PORQ512,
31691 IX86_BUILTIN_PROLD512,
31692 IX86_BUILTIN_PROLQ512,
31693 IX86_BUILTIN_PROLVD512,
31694 IX86_BUILTIN_PROLVQ512,
31695 IX86_BUILTIN_PRORD512,
31696 IX86_BUILTIN_PRORQ512,
31697 IX86_BUILTIN_PRORVD512,
31698 IX86_BUILTIN_PRORVQ512,
31699 IX86_BUILTIN_PSHUFD512,
31700 IX86_BUILTIN_PSLLD512,
31701 IX86_BUILTIN_PSLLDI512,
31702 IX86_BUILTIN_PSLLQ512,
31703 IX86_BUILTIN_PSLLQI512,
31704 IX86_BUILTIN_PSLLVV16SI,
31705 IX86_BUILTIN_PSLLVV8DI,
31706 IX86_BUILTIN_PSRAD512,
31707 IX86_BUILTIN_PSRADI512,
31708 IX86_BUILTIN_PSRAQ512,
31709 IX86_BUILTIN_PSRAQI512,
31710 IX86_BUILTIN_PSRAVV16SI,
31711 IX86_BUILTIN_PSRAVV8DI,
31712 IX86_BUILTIN_PSRLD512,
31713 IX86_BUILTIN_PSRLDI512,
31714 IX86_BUILTIN_PSRLQ512,
31715 IX86_BUILTIN_PSRLQI512,
31716 IX86_BUILTIN_PSRLVV16SI,
31717 IX86_BUILTIN_PSRLVV8DI,
31718 IX86_BUILTIN_PSUBD512,
31719 IX86_BUILTIN_PSUBQ512,
31720 IX86_BUILTIN_PTESTMD512,
31721 IX86_BUILTIN_PTESTMQ512,
31722 IX86_BUILTIN_PTESTNMD512,
31723 IX86_BUILTIN_PTESTNMQ512,
31724 IX86_BUILTIN_PUNPCKHDQ512,
31725 IX86_BUILTIN_PUNPCKHQDQ512,
31726 IX86_BUILTIN_PUNPCKLDQ512,
31727 IX86_BUILTIN_PUNPCKLQDQ512,
31728 IX86_BUILTIN_PXORD512,
31729 IX86_BUILTIN_PXORQ512,
31730 IX86_BUILTIN_RCP14PD512,
31731 IX86_BUILTIN_RCP14PS512,
31732 IX86_BUILTIN_RCP14SD,
31733 IX86_BUILTIN_RCP14SS,
31734 IX86_BUILTIN_RNDSCALEPD,
31735 IX86_BUILTIN_RNDSCALEPS,
31736 IX86_BUILTIN_RNDSCALESD,
31737 IX86_BUILTIN_RNDSCALESS,
31738 IX86_BUILTIN_RSQRT14PD512,
31739 IX86_BUILTIN_RSQRT14PS512,
31740 IX86_BUILTIN_RSQRT14SD,
31741 IX86_BUILTIN_RSQRT14SS,
31742 IX86_BUILTIN_SCALEFPD512,
31743 IX86_BUILTIN_SCALEFPS512,
31744 IX86_BUILTIN_SCALEFSD,
31745 IX86_BUILTIN_SCALEFSS,
31746 IX86_BUILTIN_SHUFPD512,
31747 IX86_BUILTIN_SHUFPS512,
31748 IX86_BUILTIN_SHUF_F32x4,
31749 IX86_BUILTIN_SHUF_F64x2,
31750 IX86_BUILTIN_SHUF_I32x4,
31751 IX86_BUILTIN_SHUF_I64x2,
31752 IX86_BUILTIN_SQRTPD512,
31753 IX86_BUILTIN_SQRTPD512_MASK,
31754 IX86_BUILTIN_SQRTPS512_MASK,
31755 IX86_BUILTIN_SQRTPS_NR512,
31756 IX86_BUILTIN_SQRTSD_ROUND,
31757 IX86_BUILTIN_SQRTSS_ROUND,
31758 IX86_BUILTIN_STOREAPD512,
31759 IX86_BUILTIN_STOREAPS512,
31760 IX86_BUILTIN_STOREDQUDI512,
31761 IX86_BUILTIN_STOREDQUSI512,
31762 IX86_BUILTIN_STOREUPD512,
31763 IX86_BUILTIN_STOREUPS512,
31764 IX86_BUILTIN_SUBPD512,
31765 IX86_BUILTIN_SUBPS512,
31766 IX86_BUILTIN_SUBSD_ROUND,
31767 IX86_BUILTIN_SUBSS_ROUND,
31768 IX86_BUILTIN_UCMPD512,
31769 IX86_BUILTIN_UCMPQ512,
31770 IX86_BUILTIN_UNPCKHPD512,
31771 IX86_BUILTIN_UNPCKHPS512,
31772 IX86_BUILTIN_UNPCKLPD512,
31773 IX86_BUILTIN_UNPCKLPS512,
31774 IX86_BUILTIN_VCVTSD2SI32,
31775 IX86_BUILTIN_VCVTSD2SI64,
31776 IX86_BUILTIN_VCVTSD2USI32,
31777 IX86_BUILTIN_VCVTSD2USI64,
31778 IX86_BUILTIN_VCVTSS2SI32,
31779 IX86_BUILTIN_VCVTSS2SI64,
31780 IX86_BUILTIN_VCVTSS2USI32,
31781 IX86_BUILTIN_VCVTSS2USI64,
31782 IX86_BUILTIN_VCVTTSD2SI32,
31783 IX86_BUILTIN_VCVTTSD2SI64,
31784 IX86_BUILTIN_VCVTTSD2USI32,
31785 IX86_BUILTIN_VCVTTSD2USI64,
31786 IX86_BUILTIN_VCVTTSS2SI32,
31787 IX86_BUILTIN_VCVTTSS2SI64,
31788 IX86_BUILTIN_VCVTTSS2USI32,
31789 IX86_BUILTIN_VCVTTSS2USI64,
31790 IX86_BUILTIN_VFMADDPD512_MASK,
31791 IX86_BUILTIN_VFMADDPD512_MASK3,
31792 IX86_BUILTIN_VFMADDPD512_MASKZ,
31793 IX86_BUILTIN_VFMADDPS512_MASK,
31794 IX86_BUILTIN_VFMADDPS512_MASK3,
31795 IX86_BUILTIN_VFMADDPS512_MASKZ,
31796 IX86_BUILTIN_VFMADDSD3_ROUND,
31797 IX86_BUILTIN_VFMADDSS3_ROUND,
31798 IX86_BUILTIN_VFMADDSUBPD512_MASK,
31799 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
31800 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
31801 IX86_BUILTIN_VFMADDSUBPS512_MASK,
31802 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
31803 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
31804 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
31805 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
31806 IX86_BUILTIN_VFMSUBPD512_MASK3,
31807 IX86_BUILTIN_VFMSUBPS512_MASK3,
31808 IX86_BUILTIN_VFMSUBSD3_MASK3,
31809 IX86_BUILTIN_VFMSUBSS3_MASK3,
31810 IX86_BUILTIN_VFNMADDPD512_MASK,
31811 IX86_BUILTIN_VFNMADDPS512_MASK,
31812 IX86_BUILTIN_VFNMSUBPD512_MASK,
31813 IX86_BUILTIN_VFNMSUBPD512_MASK3,
31814 IX86_BUILTIN_VFNMSUBPS512_MASK,
31815 IX86_BUILTIN_VFNMSUBPS512_MASK3,
31816 IX86_BUILTIN_VPCLZCNTD512,
31817 IX86_BUILTIN_VPCLZCNTQ512,
31818 IX86_BUILTIN_VPCONFLICTD512,
31819 IX86_BUILTIN_VPCONFLICTQ512,
31820 IX86_BUILTIN_VPERMDF512,
31821 IX86_BUILTIN_VPERMDI512,
31822 IX86_BUILTIN_VPERMI2VARD512,
31823 IX86_BUILTIN_VPERMI2VARPD512,
31824 IX86_BUILTIN_VPERMI2VARPS512,
31825 IX86_BUILTIN_VPERMI2VARQ512,
31826 IX86_BUILTIN_VPERMILPD512,
31827 IX86_BUILTIN_VPERMILPS512,
31828 IX86_BUILTIN_VPERMILVARPD512,
31829 IX86_BUILTIN_VPERMILVARPS512,
31830 IX86_BUILTIN_VPERMT2VARD512,
31831 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
31832 IX86_BUILTIN_VPERMT2VARPD512,
31833 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
31834 IX86_BUILTIN_VPERMT2VARPS512,
31835 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
31836 IX86_BUILTIN_VPERMT2VARQ512,
31837 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
31838 IX86_BUILTIN_VPERMVARDF512,
31839 IX86_BUILTIN_VPERMVARDI512,
31840 IX86_BUILTIN_VPERMVARSF512,
31841 IX86_BUILTIN_VPERMVARSI512,
31842 IX86_BUILTIN_VTERNLOGD512_MASK,
31843 IX86_BUILTIN_VTERNLOGD512_MASKZ,
31844 IX86_BUILTIN_VTERNLOGQ512_MASK,
31845 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
31846
31847 /* Mask arithmetic operations */
31848 IX86_BUILTIN_KAND16,
31849 IX86_BUILTIN_KANDN16,
31850 IX86_BUILTIN_KNOT16,
31851 IX86_BUILTIN_KOR16,
31852 IX86_BUILTIN_KORTESTC16,
31853 IX86_BUILTIN_KORTESTZ16,
31854 IX86_BUILTIN_KUNPCKBW,
31855 IX86_BUILTIN_KXNOR16,
31856 IX86_BUILTIN_KXOR16,
31857 IX86_BUILTIN_KMOV16,
31858
31859 /* AVX512VL. */
31860 IX86_BUILTIN_PMOVUSQD256_MEM,
31861 IX86_BUILTIN_PMOVUSQD128_MEM,
31862 IX86_BUILTIN_PMOVSQD256_MEM,
31863 IX86_BUILTIN_PMOVSQD128_MEM,
31864 IX86_BUILTIN_PMOVQD256_MEM,
31865 IX86_BUILTIN_PMOVQD128_MEM,
31866 IX86_BUILTIN_PMOVUSQW256_MEM,
31867 IX86_BUILTIN_PMOVUSQW128_MEM,
31868 IX86_BUILTIN_PMOVSQW256_MEM,
31869 IX86_BUILTIN_PMOVSQW128_MEM,
31870 IX86_BUILTIN_PMOVQW256_MEM,
31871 IX86_BUILTIN_PMOVQW128_MEM,
31872 IX86_BUILTIN_PMOVUSQB256_MEM,
31873 IX86_BUILTIN_PMOVUSQB128_MEM,
31874 IX86_BUILTIN_PMOVSQB256_MEM,
31875 IX86_BUILTIN_PMOVSQB128_MEM,
31876 IX86_BUILTIN_PMOVQB256_MEM,
31877 IX86_BUILTIN_PMOVQB128_MEM,
31878 IX86_BUILTIN_PMOVUSDW256_MEM,
31879 IX86_BUILTIN_PMOVUSDW128_MEM,
31880 IX86_BUILTIN_PMOVSDW256_MEM,
31881 IX86_BUILTIN_PMOVSDW128_MEM,
31882 IX86_BUILTIN_PMOVDW256_MEM,
31883 IX86_BUILTIN_PMOVDW128_MEM,
31884 IX86_BUILTIN_PMOVUSDB256_MEM,
31885 IX86_BUILTIN_PMOVUSDB128_MEM,
31886 IX86_BUILTIN_PMOVSDB256_MEM,
31887 IX86_BUILTIN_PMOVSDB128_MEM,
31888 IX86_BUILTIN_PMOVDB256_MEM,
31889 IX86_BUILTIN_PMOVDB128_MEM,
31890 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
31891 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
31892 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
31893 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
31894 IX86_BUILTIN_MOVDQA64STORE256_MASK,
31895 IX86_BUILTIN_MOVDQA64STORE128_MASK,
31896 IX86_BUILTIN_MOVDQA32STORE256_MASK,
31897 IX86_BUILTIN_MOVDQA32STORE128_MASK,
31898 IX86_BUILTIN_LOADAPD256_MASK,
31899 IX86_BUILTIN_LOADAPD128_MASK,
31900 IX86_BUILTIN_LOADAPS256_MASK,
31901 IX86_BUILTIN_LOADAPS128_MASK,
31902 IX86_BUILTIN_STOREAPD256_MASK,
31903 IX86_BUILTIN_STOREAPD128_MASK,
31904 IX86_BUILTIN_STOREAPS256_MASK,
31905 IX86_BUILTIN_STOREAPS128_MASK,
31906 IX86_BUILTIN_LOADUPD256_MASK,
31907 IX86_BUILTIN_LOADUPD128_MASK,
31908 IX86_BUILTIN_LOADUPS256_MASK,
31909 IX86_BUILTIN_LOADUPS128_MASK,
31910 IX86_BUILTIN_STOREUPD256_MASK,
31911 IX86_BUILTIN_STOREUPD128_MASK,
31912 IX86_BUILTIN_STOREUPS256_MASK,
31913 IX86_BUILTIN_STOREUPS128_MASK,
31914 IX86_BUILTIN_LOADDQUDI256_MASK,
31915 IX86_BUILTIN_LOADDQUDI128_MASK,
31916 IX86_BUILTIN_LOADDQUSI256_MASK,
31917 IX86_BUILTIN_LOADDQUSI128_MASK,
31918 IX86_BUILTIN_LOADDQUHI256_MASK,
31919 IX86_BUILTIN_LOADDQUHI128_MASK,
31920 IX86_BUILTIN_LOADDQUQI256_MASK,
31921 IX86_BUILTIN_LOADDQUQI128_MASK,
31922 IX86_BUILTIN_STOREDQUDI256_MASK,
31923 IX86_BUILTIN_STOREDQUDI128_MASK,
31924 IX86_BUILTIN_STOREDQUSI256_MASK,
31925 IX86_BUILTIN_STOREDQUSI128_MASK,
31926 IX86_BUILTIN_STOREDQUHI256_MASK,
31927 IX86_BUILTIN_STOREDQUHI128_MASK,
31928 IX86_BUILTIN_STOREDQUQI256_MASK,
31929 IX86_BUILTIN_STOREDQUQI128_MASK,
31930 IX86_BUILTIN_COMPRESSPDSTORE256,
31931 IX86_BUILTIN_COMPRESSPDSTORE128,
31932 IX86_BUILTIN_COMPRESSPSSTORE256,
31933 IX86_BUILTIN_COMPRESSPSSTORE128,
31934 IX86_BUILTIN_PCOMPRESSQSTORE256,
31935 IX86_BUILTIN_PCOMPRESSQSTORE128,
31936 IX86_BUILTIN_PCOMPRESSDSTORE256,
31937 IX86_BUILTIN_PCOMPRESSDSTORE128,
31938 IX86_BUILTIN_EXPANDPDLOAD256,
31939 IX86_BUILTIN_EXPANDPDLOAD128,
31940 IX86_BUILTIN_EXPANDPSLOAD256,
31941 IX86_BUILTIN_EXPANDPSLOAD128,
31942 IX86_BUILTIN_PEXPANDQLOAD256,
31943 IX86_BUILTIN_PEXPANDQLOAD128,
31944 IX86_BUILTIN_PEXPANDDLOAD256,
31945 IX86_BUILTIN_PEXPANDDLOAD128,
31946 IX86_BUILTIN_EXPANDPDLOAD256Z,
31947 IX86_BUILTIN_EXPANDPDLOAD128Z,
31948 IX86_BUILTIN_EXPANDPSLOAD256Z,
31949 IX86_BUILTIN_EXPANDPSLOAD128Z,
31950 IX86_BUILTIN_PEXPANDQLOAD256Z,
31951 IX86_BUILTIN_PEXPANDQLOAD128Z,
31952 IX86_BUILTIN_PEXPANDDLOAD256Z,
31953 IX86_BUILTIN_PEXPANDDLOAD128Z,
31954 IX86_BUILTIN_PALIGNR256_MASK,
31955 IX86_BUILTIN_PALIGNR128_MASK,
31956 IX86_BUILTIN_MOVDQA64_256_MASK,
31957 IX86_BUILTIN_MOVDQA64_128_MASK,
31958 IX86_BUILTIN_MOVDQA32_256_MASK,
31959 IX86_BUILTIN_MOVDQA32_128_MASK,
31960 IX86_BUILTIN_MOVAPD256_MASK,
31961 IX86_BUILTIN_MOVAPD128_MASK,
31962 IX86_BUILTIN_MOVAPS256_MASK,
31963 IX86_BUILTIN_MOVAPS128_MASK,
31964 IX86_BUILTIN_MOVDQUHI256_MASK,
31965 IX86_BUILTIN_MOVDQUHI128_MASK,
31966 IX86_BUILTIN_MOVDQUQI256_MASK,
31967 IX86_BUILTIN_MOVDQUQI128_MASK,
31968 IX86_BUILTIN_MINPS128_MASK,
31969 IX86_BUILTIN_MAXPS128_MASK,
31970 IX86_BUILTIN_MINPD128_MASK,
31971 IX86_BUILTIN_MAXPD128_MASK,
31972 IX86_BUILTIN_MAXPD256_MASK,
31973 IX86_BUILTIN_MAXPS256_MASK,
31974 IX86_BUILTIN_MINPD256_MASK,
31975 IX86_BUILTIN_MINPS256_MASK,
31976 IX86_BUILTIN_MULPS128_MASK,
31977 IX86_BUILTIN_DIVPS128_MASK,
31978 IX86_BUILTIN_MULPD128_MASK,
31979 IX86_BUILTIN_DIVPD128_MASK,
31980 IX86_BUILTIN_DIVPD256_MASK,
31981 IX86_BUILTIN_DIVPS256_MASK,
31982 IX86_BUILTIN_MULPD256_MASK,
31983 IX86_BUILTIN_MULPS256_MASK,
31984 IX86_BUILTIN_ADDPD128_MASK,
31985 IX86_BUILTIN_ADDPD256_MASK,
31986 IX86_BUILTIN_ADDPS128_MASK,
31987 IX86_BUILTIN_ADDPS256_MASK,
31988 IX86_BUILTIN_SUBPD128_MASK,
31989 IX86_BUILTIN_SUBPD256_MASK,
31990 IX86_BUILTIN_SUBPS128_MASK,
31991 IX86_BUILTIN_SUBPS256_MASK,
31992 IX86_BUILTIN_XORPD256_MASK,
31993 IX86_BUILTIN_XORPD128_MASK,
31994 IX86_BUILTIN_XORPS256_MASK,
31995 IX86_BUILTIN_XORPS128_MASK,
31996 IX86_BUILTIN_ORPD256_MASK,
31997 IX86_BUILTIN_ORPD128_MASK,
31998 IX86_BUILTIN_ORPS256_MASK,
31999 IX86_BUILTIN_ORPS128_MASK,
32000 IX86_BUILTIN_BROADCASTF32x2_256,
32001 IX86_BUILTIN_BROADCASTI32x2_256,
32002 IX86_BUILTIN_BROADCASTI32x2_128,
32003 IX86_BUILTIN_BROADCASTF64X2_256,
32004 IX86_BUILTIN_BROADCASTI64X2_256,
32005 IX86_BUILTIN_BROADCASTF32X4_256,
32006 IX86_BUILTIN_BROADCASTI32X4_256,
32007 IX86_BUILTIN_EXTRACTF32X4_256,
32008 IX86_BUILTIN_EXTRACTI32X4_256,
32009 IX86_BUILTIN_DBPSADBW256,
32010 IX86_BUILTIN_DBPSADBW128,
32011 IX86_BUILTIN_CVTTPD2QQ256,
32012 IX86_BUILTIN_CVTTPD2QQ128,
32013 IX86_BUILTIN_CVTTPD2UQQ256,
32014 IX86_BUILTIN_CVTTPD2UQQ128,
32015 IX86_BUILTIN_CVTPD2QQ256,
32016 IX86_BUILTIN_CVTPD2QQ128,
32017 IX86_BUILTIN_CVTPD2UQQ256,
32018 IX86_BUILTIN_CVTPD2UQQ128,
32019 IX86_BUILTIN_CVTPD2UDQ256_MASK,
32020 IX86_BUILTIN_CVTPD2UDQ128_MASK,
32021 IX86_BUILTIN_CVTTPS2QQ256,
32022 IX86_BUILTIN_CVTTPS2QQ128,
32023 IX86_BUILTIN_CVTTPS2UQQ256,
32024 IX86_BUILTIN_CVTTPS2UQQ128,
32025 IX86_BUILTIN_CVTTPS2DQ256_MASK,
32026 IX86_BUILTIN_CVTTPS2DQ128_MASK,
32027 IX86_BUILTIN_CVTTPS2UDQ256,
32028 IX86_BUILTIN_CVTTPS2UDQ128,
32029 IX86_BUILTIN_CVTTPD2DQ256_MASK,
32030 IX86_BUILTIN_CVTTPD2DQ128_MASK,
32031 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
32032 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
32033 IX86_BUILTIN_CVTPD2DQ256_MASK,
32034 IX86_BUILTIN_CVTPD2DQ128_MASK,
32035 IX86_BUILTIN_CVTDQ2PD256_MASK,
32036 IX86_BUILTIN_CVTDQ2PD128_MASK,
32037 IX86_BUILTIN_CVTUDQ2PD256_MASK,
32038 IX86_BUILTIN_CVTUDQ2PD128_MASK,
32039 IX86_BUILTIN_CVTDQ2PS256_MASK,
32040 IX86_BUILTIN_CVTDQ2PS128_MASK,
32041 IX86_BUILTIN_CVTUDQ2PS256_MASK,
32042 IX86_BUILTIN_CVTUDQ2PS128_MASK,
32043 IX86_BUILTIN_CVTPS2PD256_MASK,
32044 IX86_BUILTIN_CVTPS2PD128_MASK,
32045 IX86_BUILTIN_PBROADCASTB256_MASK,
32046 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
32047 IX86_BUILTIN_PBROADCASTB128_MASK,
32048 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
32049 IX86_BUILTIN_PBROADCASTW256_MASK,
32050 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
32051 IX86_BUILTIN_PBROADCASTW128_MASK,
32052 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
32053 IX86_BUILTIN_PBROADCASTD256_MASK,
32054 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
32055 IX86_BUILTIN_PBROADCASTD128_MASK,
32056 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
32057 IX86_BUILTIN_PBROADCASTQ256_MASK,
32058 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
32059 IX86_BUILTIN_PBROADCASTQ128_MASK,
32060 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
32061 IX86_BUILTIN_BROADCASTSS256,
32062 IX86_BUILTIN_BROADCASTSS128,
32063 IX86_BUILTIN_BROADCASTSD256,
32064 IX86_BUILTIN_EXTRACTF64X2_256,
32065 IX86_BUILTIN_EXTRACTI64X2_256,
32066 IX86_BUILTIN_INSERTF32X4_256,
32067 IX86_BUILTIN_INSERTI32X4_256,
32068 IX86_BUILTIN_PMOVSXBW256_MASK,
32069 IX86_BUILTIN_PMOVSXBW128_MASK,
32070 IX86_BUILTIN_PMOVSXBD256_MASK,
32071 IX86_BUILTIN_PMOVSXBD128_MASK,
32072 IX86_BUILTIN_PMOVSXBQ256_MASK,
32073 IX86_BUILTIN_PMOVSXBQ128_MASK,
32074 IX86_BUILTIN_PMOVSXWD256_MASK,
32075 IX86_BUILTIN_PMOVSXWD128_MASK,
32076 IX86_BUILTIN_PMOVSXWQ256_MASK,
32077 IX86_BUILTIN_PMOVSXWQ128_MASK,
32078 IX86_BUILTIN_PMOVSXDQ256_MASK,
32079 IX86_BUILTIN_PMOVSXDQ128_MASK,
32080 IX86_BUILTIN_PMOVZXBW256_MASK,
32081 IX86_BUILTIN_PMOVZXBW128_MASK,
32082 IX86_BUILTIN_PMOVZXBD256_MASK,
32083 IX86_BUILTIN_PMOVZXBD128_MASK,
32084 IX86_BUILTIN_PMOVZXBQ256_MASK,
32085 IX86_BUILTIN_PMOVZXBQ128_MASK,
32086 IX86_BUILTIN_PMOVZXWD256_MASK,
32087 IX86_BUILTIN_PMOVZXWD128_MASK,
32088 IX86_BUILTIN_PMOVZXWQ256_MASK,
32089 IX86_BUILTIN_PMOVZXWQ128_MASK,
32090 IX86_BUILTIN_PMOVZXDQ256_MASK,
32091 IX86_BUILTIN_PMOVZXDQ128_MASK,
32092 IX86_BUILTIN_REDUCEPD256_MASK,
32093 IX86_BUILTIN_REDUCEPD128_MASK,
32094 IX86_BUILTIN_REDUCEPS256_MASK,
32095 IX86_BUILTIN_REDUCEPS128_MASK,
32096 IX86_BUILTIN_REDUCESD_MASK,
32097 IX86_BUILTIN_REDUCESS_MASK,
32098 IX86_BUILTIN_VPERMVARHI256_MASK,
32099 IX86_BUILTIN_VPERMVARHI128_MASK,
32100 IX86_BUILTIN_VPERMT2VARHI256,
32101 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
32102 IX86_BUILTIN_VPERMT2VARHI128,
32103 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
32104 IX86_BUILTIN_VPERMI2VARHI256,
32105 IX86_BUILTIN_VPERMI2VARHI128,
32106 IX86_BUILTIN_RCP14PD256,
32107 IX86_BUILTIN_RCP14PD128,
32108 IX86_BUILTIN_RCP14PS256,
32109 IX86_BUILTIN_RCP14PS128,
32110 IX86_BUILTIN_RSQRT14PD256_MASK,
32111 IX86_BUILTIN_RSQRT14PD128_MASK,
32112 IX86_BUILTIN_RSQRT14PS256_MASK,
32113 IX86_BUILTIN_RSQRT14PS128_MASK,
32114 IX86_BUILTIN_SQRTPD256_MASK,
32115 IX86_BUILTIN_SQRTPD128_MASK,
32116 IX86_BUILTIN_SQRTPS256_MASK,
32117 IX86_BUILTIN_SQRTPS128_MASK,
32118 IX86_BUILTIN_PADDB128_MASK,
32119 IX86_BUILTIN_PADDW128_MASK,
32120 IX86_BUILTIN_PADDD128_MASK,
32121 IX86_BUILTIN_PADDQ128_MASK,
32122 IX86_BUILTIN_PSUBB128_MASK,
32123 IX86_BUILTIN_PSUBW128_MASK,
32124 IX86_BUILTIN_PSUBD128_MASK,
32125 IX86_BUILTIN_PSUBQ128_MASK,
32126 IX86_BUILTIN_PADDSB128_MASK,
32127 IX86_BUILTIN_PADDSW128_MASK,
32128 IX86_BUILTIN_PSUBSB128_MASK,
32129 IX86_BUILTIN_PSUBSW128_MASK,
32130 IX86_BUILTIN_PADDUSB128_MASK,
32131 IX86_BUILTIN_PADDUSW128_MASK,
32132 IX86_BUILTIN_PSUBUSB128_MASK,
32133 IX86_BUILTIN_PSUBUSW128_MASK,
32134 IX86_BUILTIN_PADDB256_MASK,
32135 IX86_BUILTIN_PADDW256_MASK,
32136 IX86_BUILTIN_PADDD256_MASK,
32137 IX86_BUILTIN_PADDQ256_MASK,
32138 IX86_BUILTIN_PADDSB256_MASK,
32139 IX86_BUILTIN_PADDSW256_MASK,
32140 IX86_BUILTIN_PADDUSB256_MASK,
32141 IX86_BUILTIN_PADDUSW256_MASK,
32142 IX86_BUILTIN_PSUBB256_MASK,
32143 IX86_BUILTIN_PSUBW256_MASK,
32144 IX86_BUILTIN_PSUBD256_MASK,
32145 IX86_BUILTIN_PSUBQ256_MASK,
32146 IX86_BUILTIN_PSUBSB256_MASK,
32147 IX86_BUILTIN_PSUBSW256_MASK,
32148 IX86_BUILTIN_PSUBUSB256_MASK,
32149 IX86_BUILTIN_PSUBUSW256_MASK,
32150 IX86_BUILTIN_SHUF_F64x2_256,
32151 IX86_BUILTIN_SHUF_I64x2_256,
32152 IX86_BUILTIN_SHUF_I32x4_256,
32153 IX86_BUILTIN_SHUF_F32x4_256,
32154 IX86_BUILTIN_PMOVWB128,
32155 IX86_BUILTIN_PMOVWB256,
32156 IX86_BUILTIN_PMOVSWB128,
32157 IX86_BUILTIN_PMOVSWB256,
32158 IX86_BUILTIN_PMOVUSWB128,
32159 IX86_BUILTIN_PMOVUSWB256,
32160 IX86_BUILTIN_PMOVDB128,
32161 IX86_BUILTIN_PMOVDB256,
32162 IX86_BUILTIN_PMOVSDB128,
32163 IX86_BUILTIN_PMOVSDB256,
32164 IX86_BUILTIN_PMOVUSDB128,
32165 IX86_BUILTIN_PMOVUSDB256,
32166 IX86_BUILTIN_PMOVDW128,
32167 IX86_BUILTIN_PMOVDW256,
32168 IX86_BUILTIN_PMOVSDW128,
32169 IX86_BUILTIN_PMOVSDW256,
32170 IX86_BUILTIN_PMOVUSDW128,
32171 IX86_BUILTIN_PMOVUSDW256,
32172 IX86_BUILTIN_PMOVQB128,
32173 IX86_BUILTIN_PMOVQB256,
32174 IX86_BUILTIN_PMOVSQB128,
32175 IX86_BUILTIN_PMOVSQB256,
32176 IX86_BUILTIN_PMOVUSQB128,
32177 IX86_BUILTIN_PMOVUSQB256,
32178 IX86_BUILTIN_PMOVQW128,
32179 IX86_BUILTIN_PMOVQW256,
32180 IX86_BUILTIN_PMOVSQW128,
32181 IX86_BUILTIN_PMOVSQW256,
32182 IX86_BUILTIN_PMOVUSQW128,
32183 IX86_BUILTIN_PMOVUSQW256,
32184 IX86_BUILTIN_PMOVQD128,
32185 IX86_BUILTIN_PMOVQD256,
32186 IX86_BUILTIN_PMOVSQD128,
32187 IX86_BUILTIN_PMOVSQD256,
32188 IX86_BUILTIN_PMOVUSQD128,
32189 IX86_BUILTIN_PMOVUSQD256,
32190 IX86_BUILTIN_RANGEPD256,
32191 IX86_BUILTIN_RANGEPD128,
32192 IX86_BUILTIN_RANGEPS256,
32193 IX86_BUILTIN_RANGEPS128,
32194 IX86_BUILTIN_GETEXPPS256,
32195 IX86_BUILTIN_GETEXPPD256,
32196 IX86_BUILTIN_GETEXPPS128,
32197 IX86_BUILTIN_GETEXPPD128,
32198 IX86_BUILTIN_FIXUPIMMPD256_MASK,
32199 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
32200 IX86_BUILTIN_FIXUPIMMPS256_MASK,
32201 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
32202 IX86_BUILTIN_FIXUPIMMPD128_MASK,
32203 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
32204 IX86_BUILTIN_FIXUPIMMPS128_MASK,
32205 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
32206 IX86_BUILTIN_PABSQ256,
32207 IX86_BUILTIN_PABSQ128,
32208 IX86_BUILTIN_PABSD256_MASK,
32209 IX86_BUILTIN_PABSD128_MASK,
32210 IX86_BUILTIN_PMULHRSW256_MASK,
32211 IX86_BUILTIN_PMULHRSW128_MASK,
32212 IX86_BUILTIN_PMULHUW128_MASK,
32213 IX86_BUILTIN_PMULHUW256_MASK,
32214 IX86_BUILTIN_PMULHW256_MASK,
32215 IX86_BUILTIN_PMULHW128_MASK,
32216 IX86_BUILTIN_PMULLW256_MASK,
32217 IX86_BUILTIN_PMULLW128_MASK,
32218 IX86_BUILTIN_PMULLQ256,
32219 IX86_BUILTIN_PMULLQ128,
32220 IX86_BUILTIN_ANDPD256_MASK,
32221 IX86_BUILTIN_ANDPD128_MASK,
32222 IX86_BUILTIN_ANDPS256_MASK,
32223 IX86_BUILTIN_ANDPS128_MASK,
32224 IX86_BUILTIN_ANDNPD256_MASK,
32225 IX86_BUILTIN_ANDNPD128_MASK,
32226 IX86_BUILTIN_ANDNPS256_MASK,
32227 IX86_BUILTIN_ANDNPS128_MASK,
32228 IX86_BUILTIN_PSLLWI128_MASK,
32229 IX86_BUILTIN_PSLLDI128_MASK,
32230 IX86_BUILTIN_PSLLQI128_MASK,
32231 IX86_BUILTIN_PSLLW128_MASK,
32232 IX86_BUILTIN_PSLLD128_MASK,
32233 IX86_BUILTIN_PSLLQ128_MASK,
32234 IX86_BUILTIN_PSLLWI256_MASK ,
32235 IX86_BUILTIN_PSLLW256_MASK,
32236 IX86_BUILTIN_PSLLDI256_MASK,
32237 IX86_BUILTIN_PSLLD256_MASK,
32238 IX86_BUILTIN_PSLLQI256_MASK,
32239 IX86_BUILTIN_PSLLQ256_MASK,
32240 IX86_BUILTIN_PSRADI128_MASK,
32241 IX86_BUILTIN_PSRAD128_MASK,
32242 IX86_BUILTIN_PSRADI256_MASK,
32243 IX86_BUILTIN_PSRAD256_MASK,
32244 IX86_BUILTIN_PSRAQI128_MASK,
32245 IX86_BUILTIN_PSRAQ128_MASK,
32246 IX86_BUILTIN_PSRAQI256_MASK,
32247 IX86_BUILTIN_PSRAQ256_MASK,
32248 IX86_BUILTIN_PANDD256,
32249 IX86_BUILTIN_PANDD128,
32250 IX86_BUILTIN_PSRLDI128_MASK,
32251 IX86_BUILTIN_PSRLD128_MASK,
32252 IX86_BUILTIN_PSRLDI256_MASK,
32253 IX86_BUILTIN_PSRLD256_MASK,
32254 IX86_BUILTIN_PSRLQI128_MASK,
32255 IX86_BUILTIN_PSRLQ128_MASK,
32256 IX86_BUILTIN_PSRLQI256_MASK,
32257 IX86_BUILTIN_PSRLQ256_MASK,
32258 IX86_BUILTIN_PANDQ256,
32259 IX86_BUILTIN_PANDQ128,
32260 IX86_BUILTIN_PANDND256,
32261 IX86_BUILTIN_PANDND128,
32262 IX86_BUILTIN_PANDNQ256,
32263 IX86_BUILTIN_PANDNQ128,
32264 IX86_BUILTIN_PORD256,
32265 IX86_BUILTIN_PORD128,
32266 IX86_BUILTIN_PORQ256,
32267 IX86_BUILTIN_PORQ128,
32268 IX86_BUILTIN_PXORD256,
32269 IX86_BUILTIN_PXORD128,
32270 IX86_BUILTIN_PXORQ256,
32271 IX86_BUILTIN_PXORQ128,
32272 IX86_BUILTIN_PACKSSWB256_MASK,
32273 IX86_BUILTIN_PACKSSWB128_MASK,
32274 IX86_BUILTIN_PACKUSWB256_MASK,
32275 IX86_BUILTIN_PACKUSWB128_MASK,
32276 IX86_BUILTIN_RNDSCALEPS256,
32277 IX86_BUILTIN_RNDSCALEPD256,
32278 IX86_BUILTIN_RNDSCALEPS128,
32279 IX86_BUILTIN_RNDSCALEPD128,
32280 IX86_BUILTIN_VTERNLOGQ256_MASK,
32281 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
32282 IX86_BUILTIN_VTERNLOGD256_MASK,
32283 IX86_BUILTIN_VTERNLOGD256_MASKZ,
32284 IX86_BUILTIN_VTERNLOGQ128_MASK,
32285 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
32286 IX86_BUILTIN_VTERNLOGD128_MASK,
32287 IX86_BUILTIN_VTERNLOGD128_MASKZ,
32288 IX86_BUILTIN_SCALEFPD256,
32289 IX86_BUILTIN_SCALEFPS256,
32290 IX86_BUILTIN_SCALEFPD128,
32291 IX86_BUILTIN_SCALEFPS128,
32292 IX86_BUILTIN_VFMADDPD256_MASK,
32293 IX86_BUILTIN_VFMADDPD256_MASK3,
32294 IX86_BUILTIN_VFMADDPD256_MASKZ,
32295 IX86_BUILTIN_VFMADDPD128_MASK,
32296 IX86_BUILTIN_VFMADDPD128_MASK3,
32297 IX86_BUILTIN_VFMADDPD128_MASKZ,
32298 IX86_BUILTIN_VFMADDPS256_MASK,
32299 IX86_BUILTIN_VFMADDPS256_MASK3,
32300 IX86_BUILTIN_VFMADDPS256_MASKZ,
32301 IX86_BUILTIN_VFMADDPS128_MASK,
32302 IX86_BUILTIN_VFMADDPS128_MASK3,
32303 IX86_BUILTIN_VFMADDPS128_MASKZ,
32304 IX86_BUILTIN_VFMSUBPD256_MASK3,
32305 IX86_BUILTIN_VFMSUBPD128_MASK3,
32306 IX86_BUILTIN_VFMSUBPS256_MASK3,
32307 IX86_BUILTIN_VFMSUBPS128_MASK3,
32308 IX86_BUILTIN_VFNMADDPD256_MASK,
32309 IX86_BUILTIN_VFNMADDPD128_MASK,
32310 IX86_BUILTIN_VFNMADDPS256_MASK,
32311 IX86_BUILTIN_VFNMADDPS128_MASK,
32312 IX86_BUILTIN_VFNMSUBPD256_MASK,
32313 IX86_BUILTIN_VFNMSUBPD256_MASK3,
32314 IX86_BUILTIN_VFNMSUBPD128_MASK,
32315 IX86_BUILTIN_VFNMSUBPD128_MASK3,
32316 IX86_BUILTIN_VFNMSUBPS256_MASK,
32317 IX86_BUILTIN_VFNMSUBPS256_MASK3,
32318 IX86_BUILTIN_VFNMSUBPS128_MASK,
32319 IX86_BUILTIN_VFNMSUBPS128_MASK3,
32320 IX86_BUILTIN_VFMADDSUBPD256_MASK,
32321 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
32322 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
32323 IX86_BUILTIN_VFMADDSUBPD128_MASK,
32324 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
32325 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
32326 IX86_BUILTIN_VFMADDSUBPS256_MASK,
32327 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
32328 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
32329 IX86_BUILTIN_VFMADDSUBPS128_MASK,
32330 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
32331 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
32332 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
32333 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
32334 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
32335 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
32336 IX86_BUILTIN_INSERTF64X2_256,
32337 IX86_BUILTIN_INSERTI64X2_256,
32338 IX86_BUILTIN_PSRAVV16HI,
32339 IX86_BUILTIN_PSRAVV8HI,
32340 IX86_BUILTIN_PMADDUBSW256_MASK,
32341 IX86_BUILTIN_PMADDUBSW128_MASK,
32342 IX86_BUILTIN_PMADDWD256_MASK,
32343 IX86_BUILTIN_PMADDWD128_MASK,
32344 IX86_BUILTIN_PSRLVV16HI,
32345 IX86_BUILTIN_PSRLVV8HI,
32346 IX86_BUILTIN_CVTPS2DQ256_MASK,
32347 IX86_BUILTIN_CVTPS2DQ128_MASK,
32348 IX86_BUILTIN_CVTPS2UDQ256,
32349 IX86_BUILTIN_CVTPS2UDQ128,
32350 IX86_BUILTIN_CVTPS2QQ256,
32351 IX86_BUILTIN_CVTPS2QQ128,
32352 IX86_BUILTIN_CVTPS2UQQ256,
32353 IX86_BUILTIN_CVTPS2UQQ128,
32354 IX86_BUILTIN_GETMANTPS256,
32355 IX86_BUILTIN_GETMANTPS128,
32356 IX86_BUILTIN_GETMANTPD256,
32357 IX86_BUILTIN_GETMANTPD128,
32358 IX86_BUILTIN_MOVDDUP256_MASK,
32359 IX86_BUILTIN_MOVDDUP128_MASK,
32360 IX86_BUILTIN_MOVSHDUP256_MASK,
32361 IX86_BUILTIN_MOVSHDUP128_MASK,
32362 IX86_BUILTIN_MOVSLDUP256_MASK,
32363 IX86_BUILTIN_MOVSLDUP128_MASK,
32364 IX86_BUILTIN_CVTQQ2PS256,
32365 IX86_BUILTIN_CVTQQ2PS128,
32366 IX86_BUILTIN_CVTUQQ2PS256,
32367 IX86_BUILTIN_CVTUQQ2PS128,
32368 IX86_BUILTIN_CVTQQ2PD256,
32369 IX86_BUILTIN_CVTQQ2PD128,
32370 IX86_BUILTIN_CVTUQQ2PD256,
32371 IX86_BUILTIN_CVTUQQ2PD128,
32372 IX86_BUILTIN_VPERMT2VARQ256,
32373 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
32374 IX86_BUILTIN_VPERMT2VARD256,
32375 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
32376 IX86_BUILTIN_VPERMI2VARQ256,
32377 IX86_BUILTIN_VPERMI2VARD256,
32378 IX86_BUILTIN_VPERMT2VARPD256,
32379 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
32380 IX86_BUILTIN_VPERMT2VARPS256,
32381 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
32382 IX86_BUILTIN_VPERMI2VARPD256,
32383 IX86_BUILTIN_VPERMI2VARPS256,
32384 IX86_BUILTIN_VPERMT2VARQ128,
32385 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
32386 IX86_BUILTIN_VPERMT2VARD128,
32387 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
32388 IX86_BUILTIN_VPERMI2VARQ128,
32389 IX86_BUILTIN_VPERMI2VARD128,
32390 IX86_BUILTIN_VPERMT2VARPD128,
32391 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
32392 IX86_BUILTIN_VPERMT2VARPS128,
32393 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
32394 IX86_BUILTIN_VPERMI2VARPD128,
32395 IX86_BUILTIN_VPERMI2VARPS128,
32396 IX86_BUILTIN_PSHUFB256_MASK,
32397 IX86_BUILTIN_PSHUFB128_MASK,
32398 IX86_BUILTIN_PSHUFHW256_MASK,
32399 IX86_BUILTIN_PSHUFHW128_MASK,
32400 IX86_BUILTIN_PSHUFLW256_MASK,
32401 IX86_BUILTIN_PSHUFLW128_MASK,
32402 IX86_BUILTIN_PSHUFD256_MASK,
32403 IX86_BUILTIN_PSHUFD128_MASK,
32404 IX86_BUILTIN_SHUFPD256_MASK,
32405 IX86_BUILTIN_SHUFPD128_MASK,
32406 IX86_BUILTIN_SHUFPS256_MASK,
32407 IX86_BUILTIN_SHUFPS128_MASK,
32408 IX86_BUILTIN_PROLVQ256,
32409 IX86_BUILTIN_PROLVQ128,
32410 IX86_BUILTIN_PROLQ256,
32411 IX86_BUILTIN_PROLQ128,
32412 IX86_BUILTIN_PRORVQ256,
32413 IX86_BUILTIN_PRORVQ128,
32414 IX86_BUILTIN_PRORQ256,
32415 IX86_BUILTIN_PRORQ128,
32416 IX86_BUILTIN_PSRAVQ128,
32417 IX86_BUILTIN_PSRAVQ256,
32418 IX86_BUILTIN_PSLLVV4DI_MASK,
32419 IX86_BUILTIN_PSLLVV2DI_MASK,
32420 IX86_BUILTIN_PSLLVV8SI_MASK,
32421 IX86_BUILTIN_PSLLVV4SI_MASK,
32422 IX86_BUILTIN_PSRAVV8SI_MASK,
32423 IX86_BUILTIN_PSRAVV4SI_MASK,
32424 IX86_BUILTIN_PSRLVV4DI_MASK,
32425 IX86_BUILTIN_PSRLVV2DI_MASK,
32426 IX86_BUILTIN_PSRLVV8SI_MASK,
32427 IX86_BUILTIN_PSRLVV4SI_MASK,
32428 IX86_BUILTIN_PSRAWI256_MASK,
32429 IX86_BUILTIN_PSRAW256_MASK,
32430 IX86_BUILTIN_PSRAWI128_MASK,
32431 IX86_BUILTIN_PSRAW128_MASK,
32432 IX86_BUILTIN_PSRLWI256_MASK,
32433 IX86_BUILTIN_PSRLW256_MASK,
32434 IX86_BUILTIN_PSRLWI128_MASK,
32435 IX86_BUILTIN_PSRLW128_MASK,
32436 IX86_BUILTIN_PRORVD256,
32437 IX86_BUILTIN_PROLVD256,
32438 IX86_BUILTIN_PRORD256,
32439 IX86_BUILTIN_PROLD256,
32440 IX86_BUILTIN_PRORVD128,
32441 IX86_BUILTIN_PROLVD128,
32442 IX86_BUILTIN_PRORD128,
32443 IX86_BUILTIN_PROLD128,
32444 IX86_BUILTIN_FPCLASSPD256,
32445 IX86_BUILTIN_FPCLASSPD128,
32446 IX86_BUILTIN_FPCLASSSD,
32447 IX86_BUILTIN_FPCLASSPS256,
32448 IX86_BUILTIN_FPCLASSPS128,
32449 IX86_BUILTIN_FPCLASSSS,
32450 IX86_BUILTIN_CVTB2MASK128,
32451 IX86_BUILTIN_CVTB2MASK256,
32452 IX86_BUILTIN_CVTW2MASK128,
32453 IX86_BUILTIN_CVTW2MASK256,
32454 IX86_BUILTIN_CVTD2MASK128,
32455 IX86_BUILTIN_CVTD2MASK256,
32456 IX86_BUILTIN_CVTQ2MASK128,
32457 IX86_BUILTIN_CVTQ2MASK256,
32458 IX86_BUILTIN_CVTMASK2B128,
32459 IX86_BUILTIN_CVTMASK2B256,
32460 IX86_BUILTIN_CVTMASK2W128,
32461 IX86_BUILTIN_CVTMASK2W256,
32462 IX86_BUILTIN_CVTMASK2D128,
32463 IX86_BUILTIN_CVTMASK2D256,
32464 IX86_BUILTIN_CVTMASK2Q128,
32465 IX86_BUILTIN_CVTMASK2Q256,
32466 IX86_BUILTIN_PCMPEQB128_MASK,
32467 IX86_BUILTIN_PCMPEQB256_MASK,
32468 IX86_BUILTIN_PCMPEQW128_MASK,
32469 IX86_BUILTIN_PCMPEQW256_MASK,
32470 IX86_BUILTIN_PCMPEQD128_MASK,
32471 IX86_BUILTIN_PCMPEQD256_MASK,
32472 IX86_BUILTIN_PCMPEQQ128_MASK,
32473 IX86_BUILTIN_PCMPEQQ256_MASK,
32474 IX86_BUILTIN_PCMPGTB128_MASK,
32475 IX86_BUILTIN_PCMPGTB256_MASK,
32476 IX86_BUILTIN_PCMPGTW128_MASK,
32477 IX86_BUILTIN_PCMPGTW256_MASK,
32478 IX86_BUILTIN_PCMPGTD128_MASK,
32479 IX86_BUILTIN_PCMPGTD256_MASK,
32480 IX86_BUILTIN_PCMPGTQ128_MASK,
32481 IX86_BUILTIN_PCMPGTQ256_MASK,
32482 IX86_BUILTIN_PTESTMB128,
32483 IX86_BUILTIN_PTESTMB256,
32484 IX86_BUILTIN_PTESTMW128,
32485 IX86_BUILTIN_PTESTMW256,
32486 IX86_BUILTIN_PTESTMD128,
32487 IX86_BUILTIN_PTESTMD256,
32488 IX86_BUILTIN_PTESTMQ128,
32489 IX86_BUILTIN_PTESTMQ256,
32490 IX86_BUILTIN_PTESTNMB128,
32491 IX86_BUILTIN_PTESTNMB256,
32492 IX86_BUILTIN_PTESTNMW128,
32493 IX86_BUILTIN_PTESTNMW256,
32494 IX86_BUILTIN_PTESTNMD128,
32495 IX86_BUILTIN_PTESTNMD256,
32496 IX86_BUILTIN_PTESTNMQ128,
32497 IX86_BUILTIN_PTESTNMQ256,
32498 IX86_BUILTIN_PBROADCASTMB128,
32499 IX86_BUILTIN_PBROADCASTMB256,
32500 IX86_BUILTIN_PBROADCASTMW128,
32501 IX86_BUILTIN_PBROADCASTMW256,
32502 IX86_BUILTIN_COMPRESSPD256,
32503 IX86_BUILTIN_COMPRESSPD128,
32504 IX86_BUILTIN_COMPRESSPS256,
32505 IX86_BUILTIN_COMPRESSPS128,
32506 IX86_BUILTIN_PCOMPRESSQ256,
32507 IX86_BUILTIN_PCOMPRESSQ128,
32508 IX86_BUILTIN_PCOMPRESSD256,
32509 IX86_BUILTIN_PCOMPRESSD128,
32510 IX86_BUILTIN_EXPANDPD256,
32511 IX86_BUILTIN_EXPANDPD128,
32512 IX86_BUILTIN_EXPANDPS256,
32513 IX86_BUILTIN_EXPANDPS128,
32514 IX86_BUILTIN_PEXPANDQ256,
32515 IX86_BUILTIN_PEXPANDQ128,
32516 IX86_BUILTIN_PEXPANDD256,
32517 IX86_BUILTIN_PEXPANDD128,
32518 IX86_BUILTIN_EXPANDPD256Z,
32519 IX86_BUILTIN_EXPANDPD128Z,
32520 IX86_BUILTIN_EXPANDPS256Z,
32521 IX86_BUILTIN_EXPANDPS128Z,
32522 IX86_BUILTIN_PEXPANDQ256Z,
32523 IX86_BUILTIN_PEXPANDQ128Z,
32524 IX86_BUILTIN_PEXPANDD256Z,
32525 IX86_BUILTIN_PEXPANDD128Z,
32526 IX86_BUILTIN_PMAXSD256_MASK,
32527 IX86_BUILTIN_PMINSD256_MASK,
32528 IX86_BUILTIN_PMAXUD256_MASK,
32529 IX86_BUILTIN_PMINUD256_MASK,
32530 IX86_BUILTIN_PMAXSD128_MASK,
32531 IX86_BUILTIN_PMINSD128_MASK,
32532 IX86_BUILTIN_PMAXUD128_MASK,
32533 IX86_BUILTIN_PMINUD128_MASK,
32534 IX86_BUILTIN_PMAXSQ256_MASK,
32535 IX86_BUILTIN_PMINSQ256_MASK,
32536 IX86_BUILTIN_PMAXUQ256_MASK,
32537 IX86_BUILTIN_PMINUQ256_MASK,
32538 IX86_BUILTIN_PMAXSQ128_MASK,
32539 IX86_BUILTIN_PMINSQ128_MASK,
32540 IX86_BUILTIN_PMAXUQ128_MASK,
32541 IX86_BUILTIN_PMINUQ128_MASK,
32542 IX86_BUILTIN_PMINSB256_MASK,
32543 IX86_BUILTIN_PMINUB256_MASK,
32544 IX86_BUILTIN_PMAXSB256_MASK,
32545 IX86_BUILTIN_PMAXUB256_MASK,
32546 IX86_BUILTIN_PMINSB128_MASK,
32547 IX86_BUILTIN_PMINUB128_MASK,
32548 IX86_BUILTIN_PMAXSB128_MASK,
32549 IX86_BUILTIN_PMAXUB128_MASK,
32550 IX86_BUILTIN_PMINSW256_MASK,
32551 IX86_BUILTIN_PMINUW256_MASK,
32552 IX86_BUILTIN_PMAXSW256_MASK,
32553 IX86_BUILTIN_PMAXUW256_MASK,
32554 IX86_BUILTIN_PMINSW128_MASK,
32555 IX86_BUILTIN_PMINUW128_MASK,
32556 IX86_BUILTIN_PMAXSW128_MASK,
32557 IX86_BUILTIN_PMAXUW128_MASK,
32558 IX86_BUILTIN_VPCONFLICTQ256,
32559 IX86_BUILTIN_VPCONFLICTD256,
32560 IX86_BUILTIN_VPCLZCNTQ256,
32561 IX86_BUILTIN_VPCLZCNTD256,
32562 IX86_BUILTIN_UNPCKHPD256_MASK,
32563 IX86_BUILTIN_UNPCKHPD128_MASK,
32564 IX86_BUILTIN_UNPCKHPS256_MASK,
32565 IX86_BUILTIN_UNPCKHPS128_MASK,
32566 IX86_BUILTIN_UNPCKLPD256_MASK,
32567 IX86_BUILTIN_UNPCKLPD128_MASK,
32568 IX86_BUILTIN_UNPCKLPS256_MASK,
32569 IX86_BUILTIN_VPCONFLICTQ128,
32570 IX86_BUILTIN_VPCONFLICTD128,
32571 IX86_BUILTIN_VPCLZCNTQ128,
32572 IX86_BUILTIN_VPCLZCNTD128,
32573 IX86_BUILTIN_UNPCKLPS128_MASK,
32574 IX86_BUILTIN_ALIGND256,
32575 IX86_BUILTIN_ALIGNQ256,
32576 IX86_BUILTIN_ALIGND128,
32577 IX86_BUILTIN_ALIGNQ128,
32578 IX86_BUILTIN_CVTPS2PH256_MASK,
32579 IX86_BUILTIN_CVTPS2PH_MASK,
32580 IX86_BUILTIN_CVTPH2PS_MASK,
32581 IX86_BUILTIN_CVTPH2PS256_MASK,
32582 IX86_BUILTIN_PUNPCKHDQ128_MASK,
32583 IX86_BUILTIN_PUNPCKHDQ256_MASK,
32584 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
32585 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
32586 IX86_BUILTIN_PUNPCKLDQ128_MASK,
32587 IX86_BUILTIN_PUNPCKLDQ256_MASK,
32588 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
32589 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
32590 IX86_BUILTIN_PUNPCKHBW128_MASK,
32591 IX86_BUILTIN_PUNPCKHBW256_MASK,
32592 IX86_BUILTIN_PUNPCKHWD128_MASK,
32593 IX86_BUILTIN_PUNPCKHWD256_MASK,
32594 IX86_BUILTIN_PUNPCKLBW128_MASK,
32595 IX86_BUILTIN_PUNPCKLBW256_MASK,
32596 IX86_BUILTIN_PUNPCKLWD128_MASK,
32597 IX86_BUILTIN_PUNPCKLWD256_MASK,
32598 IX86_BUILTIN_PSLLVV16HI,
32599 IX86_BUILTIN_PSLLVV8HI,
32600 IX86_BUILTIN_PACKSSDW256_MASK,
32601 IX86_BUILTIN_PACKSSDW128_MASK,
32602 IX86_BUILTIN_PACKUSDW256_MASK,
32603 IX86_BUILTIN_PACKUSDW128_MASK,
32604 IX86_BUILTIN_PAVGB256_MASK,
32605 IX86_BUILTIN_PAVGW256_MASK,
32606 IX86_BUILTIN_PAVGB128_MASK,
32607 IX86_BUILTIN_PAVGW128_MASK,
32608 IX86_BUILTIN_VPERMVARSF256_MASK,
32609 IX86_BUILTIN_VPERMVARDF256_MASK,
32610 IX86_BUILTIN_VPERMDF256_MASK,
32611 IX86_BUILTIN_PABSB256_MASK,
32612 IX86_BUILTIN_PABSB128_MASK,
32613 IX86_BUILTIN_PABSW256_MASK,
32614 IX86_BUILTIN_PABSW128_MASK,
32615 IX86_BUILTIN_VPERMILVARPD_MASK,
32616 IX86_BUILTIN_VPERMILVARPS_MASK,
32617 IX86_BUILTIN_VPERMILVARPD256_MASK,
32618 IX86_BUILTIN_VPERMILVARPS256_MASK,
32619 IX86_BUILTIN_VPERMILPD_MASK,
32620 IX86_BUILTIN_VPERMILPS_MASK,
32621 IX86_BUILTIN_VPERMILPD256_MASK,
32622 IX86_BUILTIN_VPERMILPS256_MASK,
32623 IX86_BUILTIN_BLENDMQ256,
32624 IX86_BUILTIN_BLENDMD256,
32625 IX86_BUILTIN_BLENDMPD256,
32626 IX86_BUILTIN_BLENDMPS256,
32627 IX86_BUILTIN_BLENDMQ128,
32628 IX86_BUILTIN_BLENDMD128,
32629 IX86_BUILTIN_BLENDMPD128,
32630 IX86_BUILTIN_BLENDMPS128,
32631 IX86_BUILTIN_BLENDMW256,
32632 IX86_BUILTIN_BLENDMB256,
32633 IX86_BUILTIN_BLENDMW128,
32634 IX86_BUILTIN_BLENDMB128,
32635 IX86_BUILTIN_PMULLD256_MASK,
32636 IX86_BUILTIN_PMULLD128_MASK,
32637 IX86_BUILTIN_PMULUDQ256_MASK,
32638 IX86_BUILTIN_PMULDQ256_MASK,
32639 IX86_BUILTIN_PMULDQ128_MASK,
32640 IX86_BUILTIN_PMULUDQ128_MASK,
32641 IX86_BUILTIN_CVTPD2PS256_MASK,
32642 IX86_BUILTIN_CVTPD2PS_MASK,
32643 IX86_BUILTIN_VPERMVARSI256_MASK,
32644 IX86_BUILTIN_VPERMVARDI256_MASK,
32645 IX86_BUILTIN_VPERMDI256_MASK,
32646 IX86_BUILTIN_CMPQ256,
32647 IX86_BUILTIN_CMPD256,
32648 IX86_BUILTIN_UCMPQ256,
32649 IX86_BUILTIN_UCMPD256,
32650 IX86_BUILTIN_CMPB256,
32651 IX86_BUILTIN_CMPW256,
32652 IX86_BUILTIN_UCMPB256,
32653 IX86_BUILTIN_UCMPW256,
32654 IX86_BUILTIN_CMPPD256_MASK,
32655 IX86_BUILTIN_CMPPS256_MASK,
32656 IX86_BUILTIN_CMPQ128,
32657 IX86_BUILTIN_CMPD128,
32658 IX86_BUILTIN_UCMPQ128,
32659 IX86_BUILTIN_UCMPD128,
32660 IX86_BUILTIN_CMPB128,
32661 IX86_BUILTIN_CMPW128,
32662 IX86_BUILTIN_UCMPB128,
32663 IX86_BUILTIN_UCMPW128,
32664 IX86_BUILTIN_CMPPD128_MASK,
32665 IX86_BUILTIN_CMPPS128_MASK,
32666
32667 IX86_BUILTIN_GATHER3SIV8SF,
32668 IX86_BUILTIN_GATHER3SIV4SF,
32669 IX86_BUILTIN_GATHER3SIV4DF,
32670 IX86_BUILTIN_GATHER3SIV2DF,
32671 IX86_BUILTIN_GATHER3DIV8SF,
32672 IX86_BUILTIN_GATHER3DIV4SF,
32673 IX86_BUILTIN_GATHER3DIV4DF,
32674 IX86_BUILTIN_GATHER3DIV2DF,
32675 IX86_BUILTIN_GATHER3SIV8SI,
32676 IX86_BUILTIN_GATHER3SIV4SI,
32677 IX86_BUILTIN_GATHER3SIV4DI,
32678 IX86_BUILTIN_GATHER3SIV2DI,
32679 IX86_BUILTIN_GATHER3DIV8SI,
32680 IX86_BUILTIN_GATHER3DIV4SI,
32681 IX86_BUILTIN_GATHER3DIV4DI,
32682 IX86_BUILTIN_GATHER3DIV2DI,
32683 IX86_BUILTIN_SCATTERSIV8SF,
32684 IX86_BUILTIN_SCATTERSIV4SF,
32685 IX86_BUILTIN_SCATTERSIV4DF,
32686 IX86_BUILTIN_SCATTERSIV2DF,
32687 IX86_BUILTIN_SCATTERDIV8SF,
32688 IX86_BUILTIN_SCATTERDIV4SF,
32689 IX86_BUILTIN_SCATTERDIV4DF,
32690 IX86_BUILTIN_SCATTERDIV2DF,
32691 IX86_BUILTIN_SCATTERSIV8SI,
32692 IX86_BUILTIN_SCATTERSIV4SI,
32693 IX86_BUILTIN_SCATTERSIV4DI,
32694 IX86_BUILTIN_SCATTERSIV2DI,
32695 IX86_BUILTIN_SCATTERDIV8SI,
32696 IX86_BUILTIN_SCATTERDIV4SI,
32697 IX86_BUILTIN_SCATTERDIV4DI,
32698 IX86_BUILTIN_SCATTERDIV2DI,
32699
32700 /* AVX512DQ. */
32701 IX86_BUILTIN_RANGESD128,
32702 IX86_BUILTIN_RANGESS128,
32703 IX86_BUILTIN_KUNPCKWD,
32704 IX86_BUILTIN_KUNPCKDQ,
32705 IX86_BUILTIN_BROADCASTF32x2_512,
32706 IX86_BUILTIN_BROADCASTI32x2_512,
32707 IX86_BUILTIN_BROADCASTF64X2_512,
32708 IX86_BUILTIN_BROADCASTI64X2_512,
32709 IX86_BUILTIN_BROADCASTF32X8_512,
32710 IX86_BUILTIN_BROADCASTI32X8_512,
32711 IX86_BUILTIN_EXTRACTF64X2_512,
32712 IX86_BUILTIN_EXTRACTF32X8,
32713 IX86_BUILTIN_EXTRACTI64X2_512,
32714 IX86_BUILTIN_EXTRACTI32X8,
32715 IX86_BUILTIN_REDUCEPD512_MASK,
32716 IX86_BUILTIN_REDUCEPS512_MASK,
32717 IX86_BUILTIN_PMULLQ512,
32718 IX86_BUILTIN_XORPD512,
32719 IX86_BUILTIN_XORPS512,
32720 IX86_BUILTIN_ORPD512,
32721 IX86_BUILTIN_ORPS512,
32722 IX86_BUILTIN_ANDPD512,
32723 IX86_BUILTIN_ANDPS512,
32724 IX86_BUILTIN_ANDNPD512,
32725 IX86_BUILTIN_ANDNPS512,
32726 IX86_BUILTIN_INSERTF32X8,
32727 IX86_BUILTIN_INSERTI32X8,
32728 IX86_BUILTIN_INSERTF64X2_512,
32729 IX86_BUILTIN_INSERTI64X2_512,
32730 IX86_BUILTIN_FPCLASSPD512,
32731 IX86_BUILTIN_FPCLASSPS512,
32732 IX86_BUILTIN_CVTD2MASK512,
32733 IX86_BUILTIN_CVTQ2MASK512,
32734 IX86_BUILTIN_CVTMASK2D512,
32735 IX86_BUILTIN_CVTMASK2Q512,
32736 IX86_BUILTIN_CVTPD2QQ512,
32737 IX86_BUILTIN_CVTPS2QQ512,
32738 IX86_BUILTIN_CVTPD2UQQ512,
32739 IX86_BUILTIN_CVTPS2UQQ512,
32740 IX86_BUILTIN_CVTQQ2PS512,
32741 IX86_BUILTIN_CVTUQQ2PS512,
32742 IX86_BUILTIN_CVTQQ2PD512,
32743 IX86_BUILTIN_CVTUQQ2PD512,
32744 IX86_BUILTIN_CVTTPS2QQ512,
32745 IX86_BUILTIN_CVTTPS2UQQ512,
32746 IX86_BUILTIN_CVTTPD2QQ512,
32747 IX86_BUILTIN_CVTTPD2UQQ512,
32748 IX86_BUILTIN_RANGEPS512,
32749 IX86_BUILTIN_RANGEPD512,
32750
32751 /* AVX512BW. */
32752 IX86_BUILTIN_PACKUSDW512,
32753 IX86_BUILTIN_PACKSSDW512,
32754 IX86_BUILTIN_LOADDQUHI512_MASK,
32755 IX86_BUILTIN_LOADDQUQI512_MASK,
32756 IX86_BUILTIN_PSLLDQ512,
32757 IX86_BUILTIN_PSRLDQ512,
32758 IX86_BUILTIN_STOREDQUHI512_MASK,
32759 IX86_BUILTIN_STOREDQUQI512_MASK,
32760 IX86_BUILTIN_PALIGNR512,
32761 IX86_BUILTIN_PALIGNR512_MASK,
32762 IX86_BUILTIN_MOVDQUHI512_MASK,
32763 IX86_BUILTIN_MOVDQUQI512_MASK,
32764 IX86_BUILTIN_PSADBW512,
32765 IX86_BUILTIN_DBPSADBW512,
32766 IX86_BUILTIN_PBROADCASTB512,
32767 IX86_BUILTIN_PBROADCASTB512_GPR,
32768 IX86_BUILTIN_PBROADCASTW512,
32769 IX86_BUILTIN_PBROADCASTW512_GPR,
32770 IX86_BUILTIN_PMOVSXBW512_MASK,
32771 IX86_BUILTIN_PMOVZXBW512_MASK,
32772 IX86_BUILTIN_VPERMVARHI512_MASK,
32773 IX86_BUILTIN_VPERMT2VARHI512,
32774 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
32775 IX86_BUILTIN_VPERMI2VARHI512,
32776 IX86_BUILTIN_PAVGB512,
32777 IX86_BUILTIN_PAVGW512,
32778 IX86_BUILTIN_PADDB512,
32779 IX86_BUILTIN_PSUBB512,
32780 IX86_BUILTIN_PSUBSB512,
32781 IX86_BUILTIN_PADDSB512,
32782 IX86_BUILTIN_PSUBUSB512,
32783 IX86_BUILTIN_PADDUSB512,
32784 IX86_BUILTIN_PSUBW512,
32785 IX86_BUILTIN_PADDW512,
32786 IX86_BUILTIN_PSUBSW512,
32787 IX86_BUILTIN_PADDSW512,
32788 IX86_BUILTIN_PSUBUSW512,
32789 IX86_BUILTIN_PADDUSW512,
32790 IX86_BUILTIN_PMAXUW512,
32791 IX86_BUILTIN_PMAXSW512,
32792 IX86_BUILTIN_PMINUW512,
32793 IX86_BUILTIN_PMINSW512,
32794 IX86_BUILTIN_PMAXUB512,
32795 IX86_BUILTIN_PMAXSB512,
32796 IX86_BUILTIN_PMINUB512,
32797 IX86_BUILTIN_PMINSB512,
32798 IX86_BUILTIN_PMOVWB512,
32799 IX86_BUILTIN_PMOVSWB512,
32800 IX86_BUILTIN_PMOVUSWB512,
32801 IX86_BUILTIN_PMULHRSW512_MASK,
32802 IX86_BUILTIN_PMULHUW512_MASK,
32803 IX86_BUILTIN_PMULHW512_MASK,
32804 IX86_BUILTIN_PMULLW512_MASK,
32805 IX86_BUILTIN_PSLLWI512_MASK,
32806 IX86_BUILTIN_PSLLW512_MASK,
32807 IX86_BUILTIN_PACKSSWB512,
32808 IX86_BUILTIN_PACKUSWB512,
32809 IX86_BUILTIN_PSRAVV32HI,
32810 IX86_BUILTIN_PMADDUBSW512_MASK,
32811 IX86_BUILTIN_PMADDWD512_MASK,
32812 IX86_BUILTIN_PSRLVV32HI,
32813 IX86_BUILTIN_PUNPCKHBW512,
32814 IX86_BUILTIN_PUNPCKHWD512,
32815 IX86_BUILTIN_PUNPCKLBW512,
32816 IX86_BUILTIN_PUNPCKLWD512,
32817 IX86_BUILTIN_PSHUFB512,
32818 IX86_BUILTIN_PSHUFHW512,
32819 IX86_BUILTIN_PSHUFLW512,
32820 IX86_BUILTIN_PSRAWI512,
32821 IX86_BUILTIN_PSRAW512,
32822 IX86_BUILTIN_PSRLWI512,
32823 IX86_BUILTIN_PSRLW512,
32824 IX86_BUILTIN_CVTB2MASK512,
32825 IX86_BUILTIN_CVTW2MASK512,
32826 IX86_BUILTIN_CVTMASK2B512,
32827 IX86_BUILTIN_CVTMASK2W512,
32828 IX86_BUILTIN_PCMPEQB512_MASK,
32829 IX86_BUILTIN_PCMPEQW512_MASK,
32830 IX86_BUILTIN_PCMPGTB512_MASK,
32831 IX86_BUILTIN_PCMPGTW512_MASK,
32832 IX86_BUILTIN_PTESTMB512,
32833 IX86_BUILTIN_PTESTMW512,
32834 IX86_BUILTIN_PTESTNMB512,
32835 IX86_BUILTIN_PTESTNMW512,
32836 IX86_BUILTIN_PSLLVV32HI,
32837 IX86_BUILTIN_PABSB512,
32838 IX86_BUILTIN_PABSW512,
32839 IX86_BUILTIN_BLENDMW512,
32840 IX86_BUILTIN_BLENDMB512,
32841 IX86_BUILTIN_CMPB512,
32842 IX86_BUILTIN_CMPW512,
32843 IX86_BUILTIN_UCMPB512,
32844 IX86_BUILTIN_UCMPW512,
32845
32846 /* Alternate 4 and 8 element gather/scatter for the vectorizer
32847 where all operands are 32-byte or 64-byte wide respectively. */
32848 IX86_BUILTIN_GATHERALTSIV4DF,
32849 IX86_BUILTIN_GATHERALTDIV8SF,
32850 IX86_BUILTIN_GATHERALTSIV4DI,
32851 IX86_BUILTIN_GATHERALTDIV8SI,
32852 IX86_BUILTIN_GATHER3ALTDIV16SF,
32853 IX86_BUILTIN_GATHER3ALTDIV16SI,
32854 IX86_BUILTIN_GATHER3ALTSIV4DF,
32855 IX86_BUILTIN_GATHER3ALTDIV8SF,
32856 IX86_BUILTIN_GATHER3ALTSIV4DI,
32857 IX86_BUILTIN_GATHER3ALTDIV8SI,
32858 IX86_BUILTIN_GATHER3ALTSIV8DF,
32859 IX86_BUILTIN_GATHER3ALTSIV8DI,
32860 IX86_BUILTIN_GATHER3DIV16SF,
32861 IX86_BUILTIN_GATHER3DIV16SI,
32862 IX86_BUILTIN_GATHER3DIV8DF,
32863 IX86_BUILTIN_GATHER3DIV8DI,
32864 IX86_BUILTIN_GATHER3SIV16SF,
32865 IX86_BUILTIN_GATHER3SIV16SI,
32866 IX86_BUILTIN_GATHER3SIV8DF,
32867 IX86_BUILTIN_GATHER3SIV8DI,
32868 IX86_BUILTIN_SCATTERALTSIV8DF,
32869 IX86_BUILTIN_SCATTERALTDIV16SF,
32870 IX86_BUILTIN_SCATTERALTSIV8DI,
32871 IX86_BUILTIN_SCATTERALTDIV16SI,
32872 IX86_BUILTIN_SCATTERDIV16SF,
32873 IX86_BUILTIN_SCATTERDIV16SI,
32874 IX86_BUILTIN_SCATTERDIV8DF,
32875 IX86_BUILTIN_SCATTERDIV8DI,
32876 IX86_BUILTIN_SCATTERSIV16SF,
32877 IX86_BUILTIN_SCATTERSIV16SI,
32878 IX86_BUILTIN_SCATTERSIV8DF,
32879 IX86_BUILTIN_SCATTERSIV8DI,
32880
32881 /* AVX512PF */
32882 IX86_BUILTIN_GATHERPFQPD,
32883 IX86_BUILTIN_GATHERPFDPS,
32884 IX86_BUILTIN_GATHERPFDPD,
32885 IX86_BUILTIN_GATHERPFQPS,
32886 IX86_BUILTIN_SCATTERPFDPD,
32887 IX86_BUILTIN_SCATTERPFDPS,
32888 IX86_BUILTIN_SCATTERPFQPD,
32889 IX86_BUILTIN_SCATTERPFQPS,
32890
32891 /* AVX-512ER */
32892 IX86_BUILTIN_EXP2PD_MASK,
32893 IX86_BUILTIN_EXP2PS_MASK,
32894 IX86_BUILTIN_EXP2PS,
32895 IX86_BUILTIN_RCP28PD,
32896 IX86_BUILTIN_RCP28PS,
32897 IX86_BUILTIN_RCP28SD,
32898 IX86_BUILTIN_RCP28SS,
32899 IX86_BUILTIN_RSQRT28PD,
32900 IX86_BUILTIN_RSQRT28PS,
32901 IX86_BUILTIN_RSQRT28SD,
32902 IX86_BUILTIN_RSQRT28SS,
32903
32904 /* AVX-512IFMA */
32905 IX86_BUILTIN_VPMADD52LUQ512,
32906 IX86_BUILTIN_VPMADD52HUQ512,
32907 IX86_BUILTIN_VPMADD52LUQ256,
32908 IX86_BUILTIN_VPMADD52HUQ256,
32909 IX86_BUILTIN_VPMADD52LUQ128,
32910 IX86_BUILTIN_VPMADD52HUQ128,
32911 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
32912 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
32913 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
32914 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
32915 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
32916 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
32917
32918 /* AVX-512VBMI */
32919 IX86_BUILTIN_VPMULTISHIFTQB512,
32920 IX86_BUILTIN_VPMULTISHIFTQB256,
32921 IX86_BUILTIN_VPMULTISHIFTQB128,
32922 IX86_BUILTIN_VPERMVARQI512_MASK,
32923 IX86_BUILTIN_VPERMT2VARQI512,
32924 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
32925 IX86_BUILTIN_VPERMI2VARQI512,
32926 IX86_BUILTIN_VPERMVARQI256_MASK,
32927 IX86_BUILTIN_VPERMVARQI128_MASK,
32928 IX86_BUILTIN_VPERMT2VARQI256,
32929 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
32930 IX86_BUILTIN_VPERMT2VARQI128,
32931 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32932 IX86_BUILTIN_VPERMI2VARQI256,
32933 IX86_BUILTIN_VPERMI2VARQI128,
32934
32935 /* SHA builtins. */
32936 IX86_BUILTIN_SHA1MSG1,
32937 IX86_BUILTIN_SHA1MSG2,
32938 IX86_BUILTIN_SHA1NEXTE,
32939 IX86_BUILTIN_SHA1RNDS4,
32940 IX86_BUILTIN_SHA256MSG1,
32941 IX86_BUILTIN_SHA256MSG2,
32942 IX86_BUILTIN_SHA256RNDS2,
32943
32944 /* CLWB instructions. */
32945 IX86_BUILTIN_CLWB,
32946
32947 /* PCOMMIT instructions. */
32948 IX86_BUILTIN_PCOMMIT,
32949
32950 /* CLFLUSHOPT instructions. */
32951 IX86_BUILTIN_CLFLUSHOPT,
32952
32953 /* TFmode support builtins. */
32954 IX86_BUILTIN_INFQ,
32955 IX86_BUILTIN_HUGE_VALQ,
32956 IX86_BUILTIN_NANQ,
32957 IX86_BUILTIN_NANSQ,
32958 IX86_BUILTIN_FABSQ,
32959 IX86_BUILTIN_COPYSIGNQ,
32960
32961 /* Vectorizer support builtins. */
32962 IX86_BUILTIN_CPYSGNPS,
32963 IX86_BUILTIN_CPYSGNPD,
32964 IX86_BUILTIN_CPYSGNPS256,
32965 IX86_BUILTIN_CPYSGNPS512,
32966 IX86_BUILTIN_CPYSGNPD256,
32967 IX86_BUILTIN_CPYSGNPD512,
32968 IX86_BUILTIN_FLOORPS512,
32969 IX86_BUILTIN_FLOORPD512,
32970 IX86_BUILTIN_CEILPS512,
32971 IX86_BUILTIN_CEILPD512,
32972 IX86_BUILTIN_TRUNCPS512,
32973 IX86_BUILTIN_TRUNCPD512,
32974 IX86_BUILTIN_CVTPS2DQ512,
32975 IX86_BUILTIN_VEC_PACK_SFIX512,
32976 IX86_BUILTIN_FLOORPS_SFIX512,
32977 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32978 IX86_BUILTIN_CEILPS_SFIX512,
32979 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32980 IX86_BUILTIN_ROUNDPS_AZ_SFIX512,
32981 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32982
32983
32984 /* FMA4 instructions. */
32985 IX86_BUILTIN_VFMADDSS,
32986 IX86_BUILTIN_VFMADDSD,
32987 IX86_BUILTIN_VFMADDPS,
32988 IX86_BUILTIN_VFMADDPD,
32989 IX86_BUILTIN_VFMADDPS256,
32990 IX86_BUILTIN_VFMADDPD256,
32991 IX86_BUILTIN_VFMADDSUBPS,
32992 IX86_BUILTIN_VFMADDSUBPD,
32993 IX86_BUILTIN_VFMADDSUBPS256,
32994 IX86_BUILTIN_VFMADDSUBPD256,
32995
32996 /* FMA3 instructions. */
32997 IX86_BUILTIN_VFMADDSS3,
32998 IX86_BUILTIN_VFMADDSD3,
32999
33000 /* XOP instructions. */
33001 IX86_BUILTIN_VPCMOV,
33002 IX86_BUILTIN_VPCMOV_V2DI,
33003 IX86_BUILTIN_VPCMOV_V4SI,
33004 IX86_BUILTIN_VPCMOV_V8HI,
33005 IX86_BUILTIN_VPCMOV_V16QI,
33006 IX86_BUILTIN_VPCMOV_V4SF,
33007 IX86_BUILTIN_VPCMOV_V2DF,
33008 IX86_BUILTIN_VPCMOV256,
33009 IX86_BUILTIN_VPCMOV_V4DI256,
33010 IX86_BUILTIN_VPCMOV_V8SI256,
33011 IX86_BUILTIN_VPCMOV_V16HI256,
33012 IX86_BUILTIN_VPCMOV_V32QI256,
33013 IX86_BUILTIN_VPCMOV_V8SF256,
33014 IX86_BUILTIN_VPCMOV_V4DF256,
33015
33016 IX86_BUILTIN_VPPERM,
33017
33018 IX86_BUILTIN_VPMACSSWW,
33019 IX86_BUILTIN_VPMACSWW,
33020 IX86_BUILTIN_VPMACSSWD,
33021 IX86_BUILTIN_VPMACSWD,
33022 IX86_BUILTIN_VPMACSSDD,
33023 IX86_BUILTIN_VPMACSDD,
33024 IX86_BUILTIN_VPMACSSDQL,
33025 IX86_BUILTIN_VPMACSSDQH,
33026 IX86_BUILTIN_VPMACSDQL,
33027 IX86_BUILTIN_VPMACSDQH,
33028 IX86_BUILTIN_VPMADCSSWD,
33029 IX86_BUILTIN_VPMADCSWD,
33030
33031 IX86_BUILTIN_VPHADDBW,
33032 IX86_BUILTIN_VPHADDBD,
33033 IX86_BUILTIN_VPHADDBQ,
33034 IX86_BUILTIN_VPHADDWD,
33035 IX86_BUILTIN_VPHADDWQ,
33036 IX86_BUILTIN_VPHADDDQ,
33037 IX86_BUILTIN_VPHADDUBW,
33038 IX86_BUILTIN_VPHADDUBD,
33039 IX86_BUILTIN_VPHADDUBQ,
33040 IX86_BUILTIN_VPHADDUWD,
33041 IX86_BUILTIN_VPHADDUWQ,
33042 IX86_BUILTIN_VPHADDUDQ,
33043 IX86_BUILTIN_VPHSUBBW,
33044 IX86_BUILTIN_VPHSUBWD,
33045 IX86_BUILTIN_VPHSUBDQ,
33046
33047 IX86_BUILTIN_VPROTB,
33048 IX86_BUILTIN_VPROTW,
33049 IX86_BUILTIN_VPROTD,
33050 IX86_BUILTIN_VPROTQ,
33051 IX86_BUILTIN_VPROTB_IMM,
33052 IX86_BUILTIN_VPROTW_IMM,
33053 IX86_BUILTIN_VPROTD_IMM,
33054 IX86_BUILTIN_VPROTQ_IMM,
33055
33056 IX86_BUILTIN_VPSHLB,
33057 IX86_BUILTIN_VPSHLW,
33058 IX86_BUILTIN_VPSHLD,
33059 IX86_BUILTIN_VPSHLQ,
33060 IX86_BUILTIN_VPSHAB,
33061 IX86_BUILTIN_VPSHAW,
33062 IX86_BUILTIN_VPSHAD,
33063 IX86_BUILTIN_VPSHAQ,
33064
33065 IX86_BUILTIN_VFRCZSS,
33066 IX86_BUILTIN_VFRCZSD,
33067 IX86_BUILTIN_VFRCZPS,
33068 IX86_BUILTIN_VFRCZPD,
33069 IX86_BUILTIN_VFRCZPS256,
33070 IX86_BUILTIN_VFRCZPD256,
33071
33072 IX86_BUILTIN_VPCOMEQUB,
33073 IX86_BUILTIN_VPCOMNEUB,
33074 IX86_BUILTIN_VPCOMLTUB,
33075 IX86_BUILTIN_VPCOMLEUB,
33076 IX86_BUILTIN_VPCOMGTUB,
33077 IX86_BUILTIN_VPCOMGEUB,
33078 IX86_BUILTIN_VPCOMFALSEUB,
33079 IX86_BUILTIN_VPCOMTRUEUB,
33080
33081 IX86_BUILTIN_VPCOMEQUW,
33082 IX86_BUILTIN_VPCOMNEUW,
33083 IX86_BUILTIN_VPCOMLTUW,
33084 IX86_BUILTIN_VPCOMLEUW,
33085 IX86_BUILTIN_VPCOMGTUW,
33086 IX86_BUILTIN_VPCOMGEUW,
33087 IX86_BUILTIN_VPCOMFALSEUW,
33088 IX86_BUILTIN_VPCOMTRUEUW,
33089
33090 IX86_BUILTIN_VPCOMEQUD,
33091 IX86_BUILTIN_VPCOMNEUD,
33092 IX86_BUILTIN_VPCOMLTUD,
33093 IX86_BUILTIN_VPCOMLEUD,
33094 IX86_BUILTIN_VPCOMGTUD,
33095 IX86_BUILTIN_VPCOMGEUD,
33096 IX86_BUILTIN_VPCOMFALSEUD,
33097 IX86_BUILTIN_VPCOMTRUEUD,
33098
33099 IX86_BUILTIN_VPCOMEQUQ,
33100 IX86_BUILTIN_VPCOMNEUQ,
33101 IX86_BUILTIN_VPCOMLTUQ,
33102 IX86_BUILTIN_VPCOMLEUQ,
33103 IX86_BUILTIN_VPCOMGTUQ,
33104 IX86_BUILTIN_VPCOMGEUQ,
33105 IX86_BUILTIN_VPCOMFALSEUQ,
33106 IX86_BUILTIN_VPCOMTRUEUQ,
33107
33108 IX86_BUILTIN_VPCOMEQB,
33109 IX86_BUILTIN_VPCOMNEB,
33110 IX86_BUILTIN_VPCOMLTB,
33111 IX86_BUILTIN_VPCOMLEB,
33112 IX86_BUILTIN_VPCOMGTB,
33113 IX86_BUILTIN_VPCOMGEB,
33114 IX86_BUILTIN_VPCOMFALSEB,
33115 IX86_BUILTIN_VPCOMTRUEB,
33116
33117 IX86_BUILTIN_VPCOMEQW,
33118 IX86_BUILTIN_VPCOMNEW,
33119 IX86_BUILTIN_VPCOMLTW,
33120 IX86_BUILTIN_VPCOMLEW,
33121 IX86_BUILTIN_VPCOMGTW,
33122 IX86_BUILTIN_VPCOMGEW,
33123 IX86_BUILTIN_VPCOMFALSEW,
33124 IX86_BUILTIN_VPCOMTRUEW,
33125
33126 IX86_BUILTIN_VPCOMEQD,
33127 IX86_BUILTIN_VPCOMNED,
33128 IX86_BUILTIN_VPCOMLTD,
33129 IX86_BUILTIN_VPCOMLED,
33130 IX86_BUILTIN_VPCOMGTD,
33131 IX86_BUILTIN_VPCOMGED,
33132 IX86_BUILTIN_VPCOMFALSED,
33133 IX86_BUILTIN_VPCOMTRUED,
33134
33135 IX86_BUILTIN_VPCOMEQQ,
33136 IX86_BUILTIN_VPCOMNEQ,
33137 IX86_BUILTIN_VPCOMLTQ,
33138 IX86_BUILTIN_VPCOMLEQ,
33139 IX86_BUILTIN_VPCOMGTQ,
33140 IX86_BUILTIN_VPCOMGEQ,
33141 IX86_BUILTIN_VPCOMFALSEQ,
33142 IX86_BUILTIN_VPCOMTRUEQ,
33143
33144 /* LWP instructions. */
33145 IX86_BUILTIN_LLWPCB,
33146 IX86_BUILTIN_SLWPCB,
33147 IX86_BUILTIN_LWPVAL32,
33148 IX86_BUILTIN_LWPVAL64,
33149 IX86_BUILTIN_LWPINS32,
33150 IX86_BUILTIN_LWPINS64,
33151
33152 IX86_BUILTIN_CLZS,
33153
33154 /* RTM */
33155 IX86_BUILTIN_XBEGIN,
33156 IX86_BUILTIN_XEND,
33157 IX86_BUILTIN_XABORT,
33158 IX86_BUILTIN_XTEST,
33159
33160 /* MPX */
33161 IX86_BUILTIN_BNDMK,
33162 IX86_BUILTIN_BNDSTX,
33163 IX86_BUILTIN_BNDLDX,
33164 IX86_BUILTIN_BNDCL,
33165 IX86_BUILTIN_BNDCU,
33166 IX86_BUILTIN_BNDRET,
33167 IX86_BUILTIN_BNDNARROW,
33168 IX86_BUILTIN_BNDINT,
33169 IX86_BUILTIN_SIZEOF,
33170 IX86_BUILTIN_BNDLOWER,
33171 IX86_BUILTIN_BNDUPPER,
33172
33173 /* BMI instructions. */
33174 IX86_BUILTIN_BEXTR32,
33175 IX86_BUILTIN_BEXTR64,
33176 IX86_BUILTIN_CTZS,
33177
33178 /* TBM instructions. */
33179 IX86_BUILTIN_BEXTRI32,
33180 IX86_BUILTIN_BEXTRI64,
33181
33182 /* BMI2 instructions. */
33183 IX86_BUILTIN_BZHI32,
33184 IX86_BUILTIN_BZHI64,
33185 IX86_BUILTIN_PDEP32,
33186 IX86_BUILTIN_PDEP64,
33187 IX86_BUILTIN_PEXT32,
33188 IX86_BUILTIN_PEXT64,
33189
33190 /* ADX instructions. */
33191 IX86_BUILTIN_ADDCARRYX32,
33192 IX86_BUILTIN_ADDCARRYX64,
33193
33194 /* SBB instructions. */
33195 IX86_BUILTIN_SBB32,
33196 IX86_BUILTIN_SBB64,
33197
33198 /* FSGSBASE instructions. */
33199 IX86_BUILTIN_RDFSBASE32,
33200 IX86_BUILTIN_RDFSBASE64,
33201 IX86_BUILTIN_RDGSBASE32,
33202 IX86_BUILTIN_RDGSBASE64,
33203 IX86_BUILTIN_WRFSBASE32,
33204 IX86_BUILTIN_WRFSBASE64,
33205 IX86_BUILTIN_WRGSBASE32,
33206 IX86_BUILTIN_WRGSBASE64,
33207
33208 /* RDRND instructions. */
33209 IX86_BUILTIN_RDRAND16_STEP,
33210 IX86_BUILTIN_RDRAND32_STEP,
33211 IX86_BUILTIN_RDRAND64_STEP,
33212
33213 /* RDSEED instructions. */
33214 IX86_BUILTIN_RDSEED16_STEP,
33215 IX86_BUILTIN_RDSEED32_STEP,
33216 IX86_BUILTIN_RDSEED64_STEP,
33217
33218 /* F16C instructions. */
33219 IX86_BUILTIN_CVTPH2PS,
33220 IX86_BUILTIN_CVTPH2PS256,
33221 IX86_BUILTIN_CVTPS2PH,
33222 IX86_BUILTIN_CVTPS2PH256,
33223
33224 /* MONITORX and MWAITX instrucions. */
33225 IX86_BUILTIN_MONITORX,
33226 IX86_BUILTIN_MWAITX,
33227
33228 /* CFString built-in for darwin */
33229 IX86_BUILTIN_CFSTRING,
33230
33231 /* Builtins to get CPU type and supported features. */
33232 IX86_BUILTIN_CPU_INIT,
33233 IX86_BUILTIN_CPU_IS,
33234 IX86_BUILTIN_CPU_SUPPORTS,
33235
33236 /* Read/write FLAGS register built-ins. */
33237 IX86_BUILTIN_READ_FLAGS,
33238 IX86_BUILTIN_WRITE_FLAGS,
33239
33240 /* PKU instructions. */
33241 IX86_BUILTIN_RDPKRU,
33242 IX86_BUILTIN_WRPKRU,
33243
33244 IX86_BUILTIN_MAX
33245 };
33246
33247 /* Table for the ix86 builtin decls. */
33248 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
33249
33250 /* Table of all of the builtin functions that are possible with different ISA's
33251 but are waiting to be built until a function is declared to use that
33252 ISA. */
33253 struct builtin_isa {
33254 const char *name; /* function name */
33255 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
33256 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
33257 bool const_p; /* true if the declaration is constant */
33258 bool leaf_p; /* true if the declaration has leaf attribute */
33259 bool nothrow_p; /* true if the declaration has nothrow attribute */
33260 bool set_and_not_built_p;
33261 };
33262
33263 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
33264
33265 /* Bits that can still enable any inclusion of a builtin. */
33266 static HOST_WIDE_INT deferred_isa_values = 0;
33267
33268 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
33269 of which isa_flags to use in the ix86_builtins_isa array. Stores the
33270 function decl in the ix86_builtins array. Returns the function decl or
33271 NULL_TREE, if the builtin was not added.
33272
33273 If the front end has a special hook for builtin functions, delay adding
33274 builtin functions that aren't in the current ISA until the ISA is changed
33275 with function specific optimization. Doing so, can save about 300K for the
33276 default compiler. When the builtin is expanded, check at that time whether
33277 it is valid.
33278
33279 If the front end doesn't have a special hook, record all builtins, even if
33280 it isn't an instruction set in the current ISA in case the user uses
33281 function specific options for a different ISA, so that we don't get scope
33282 errors if a builtin is added in the middle of a function scope. */
33283
33284 static inline tree
33285 def_builtin (HOST_WIDE_INT mask, const char *name,
33286 enum ix86_builtin_func_type tcode,
33287 enum ix86_builtins code)
33288 {
33289 tree decl = NULL_TREE;
33290
33291 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
33292 {
33293 ix86_builtins_isa[(int) code].isa = mask;
33294
33295 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
33296 where any bit set means that built-in is enable, this bit must be *and-ed*
33297 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
33298 means that *both* cpuid bits must be set for the built-in to be available.
33299 Handle this here. */
33300 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
33301 mask &= ~OPTION_MASK_ISA_AVX512VL;
33302
33303 mask &= ~OPTION_MASK_ISA_64BIT;
33304 if (mask == 0
33305 || (mask & ix86_isa_flags) != 0
33306 || (lang_hooks.builtin_function
33307 == lang_hooks.builtin_function_ext_scope))
33308
33309 {
33310 tree type = ix86_get_builtin_func_type (tcode);
33311 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
33312 NULL, NULL_TREE);
33313 ix86_builtins[(int) code] = decl;
33314 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
33315 }
33316 else
33317 {
33318 /* Just a MASK where set_and_not_built_p == true can potentially
33319 include a builtin. */
33320 deferred_isa_values |= mask;
33321 ix86_builtins[(int) code] = NULL_TREE;
33322 ix86_builtins_isa[(int) code].tcode = tcode;
33323 ix86_builtins_isa[(int) code].name = name;
33324 ix86_builtins_isa[(int) code].leaf_p = false;
33325 ix86_builtins_isa[(int) code].nothrow_p = false;
33326 ix86_builtins_isa[(int) code].const_p = false;
33327 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
33328 }
33329 }
33330
33331 return decl;
33332 }
33333
33334 /* Like def_builtin, but also marks the function decl "const". */
33335
33336 static inline tree
33337 def_builtin_const (HOST_WIDE_INT mask, const char *name,
33338 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
33339 {
33340 tree decl = def_builtin (mask, name, tcode, code);
33341 if (decl)
33342 TREE_READONLY (decl) = 1;
33343 else
33344 ix86_builtins_isa[(int) code].const_p = true;
33345
33346 return decl;
33347 }
33348
33349 /* Add any new builtin functions for a given ISA that may not have been
33350 declared. This saves a bit of space compared to adding all of the
33351 declarations to the tree, even if we didn't use them. */
33352
33353 static void
33354 ix86_add_new_builtins (HOST_WIDE_INT isa)
33355 {
33356 if ((isa & deferred_isa_values) == 0)
33357 return;
33358
33359 /* Bits in ISA value can be removed from potential isa values. */
33360 deferred_isa_values &= ~isa;
33361
33362 int i;
33363 tree saved_current_target_pragma = current_target_pragma;
33364 current_target_pragma = NULL_TREE;
33365
33366 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
33367 {
33368 if ((ix86_builtins_isa[i].isa & isa) != 0
33369 && ix86_builtins_isa[i].set_and_not_built_p)
33370 {
33371 tree decl, type;
33372
33373 /* Don't define the builtin again. */
33374 ix86_builtins_isa[i].set_and_not_built_p = false;
33375
33376 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
33377 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
33378 type, i, BUILT_IN_MD, NULL,
33379 NULL_TREE);
33380
33381 ix86_builtins[i] = decl;
33382 if (ix86_builtins_isa[i].const_p)
33383 TREE_READONLY (decl) = 1;
33384 if (ix86_builtins_isa[i].leaf_p)
33385 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
33386 NULL_TREE);
33387 if (ix86_builtins_isa[i].nothrow_p)
33388 TREE_NOTHROW (decl) = 1;
33389 }
33390 }
33391
33392 current_target_pragma = saved_current_target_pragma;
33393 }
33394
33395 /* Bits for builtin_description.flag. */
33396
33397 /* Set when we don't support the comparison natively, and should
33398 swap_comparison in order to support it. */
33399 #define BUILTIN_DESC_SWAP_OPERANDS 1
33400
33401 struct builtin_description
33402 {
33403 const HOST_WIDE_INT mask;
33404 const enum insn_code icode;
33405 const char *const name;
33406 const enum ix86_builtins code;
33407 const enum rtx_code comparison;
33408 const int flag;
33409 };
33410
33411 static const struct builtin_description bdesc_comi[] =
33412 {
33413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
33414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
33415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
33416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
33417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
33418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
33419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
33420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
33421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
33422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
33423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
33424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
33425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
33426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
33427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
33428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
33429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
33430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
33431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
33432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
33433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
33434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
33435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
33436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
33437 };
33438
33439 static const struct builtin_description bdesc_pcmpestr[] =
33440 {
33441 /* SSE4.2 */
33442 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
33443 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
33444 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
33445 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
33446 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
33447 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
33448 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
33449 };
33450
33451 static const struct builtin_description bdesc_pcmpistr[] =
33452 {
33453 /* SSE4.2 */
33454 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
33455 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
33456 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
33457 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
33458 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
33459 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
33460 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
33461 };
33462
33463 /* Special builtins with variable number of arguments. */
33464 static const struct builtin_description bdesc_special_args[] =
33465 {
33466 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
33467 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
33468 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
33469
33470 /* 80387 (for use internally for atomic compound assignment). */
33471 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
33472 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
33473 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
33474 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
33475
33476 /* MMX */
33477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
33478
33479 /* 3DNow! */
33480 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
33481
33482 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
33483 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
33484 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
33485 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33486 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33487 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33488 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33489 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33490 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33491
33492 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
33493 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
33494 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33495 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33496 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33497 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33498 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33499 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
33500
33501 /* SSE */
33502 { OPTION_MASK_ISA_SSE, CODE_FOR_movv4sf_internal, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
33503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
33504 { OPTION_MASK_ISA_SSE, CODE_FOR_movv4sf_internal, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
33505
33506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
33507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
33508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
33509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
33510
33511 /* SSE or 3DNow!A */
33512 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
33513 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
33514
33515 /* SSE2 */
33516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
33517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
33518 { OPTION_MASK_ISA_SSE2, CODE_FOR_movv2df_internal, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
33519 { OPTION_MASK_ISA_SSE2, CODE_FOR_movv16qi_internal, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
33520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
33521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
33522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
33523 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
33524 { OPTION_MASK_ISA_SSE2, CODE_FOR_movv2df_internal, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
33525 { OPTION_MASK_ISA_SSE2, CODE_FOR_movv16qi_internal, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
33526
33527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
33528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
33529
33530 /* SSE3 */
33531 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
33532
33533 /* SSE4.1 */
33534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
33535
33536 /* SSE4A */
33537 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
33538 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
33539
33540 /* AVX */
33541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
33542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
33543
33544 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
33545 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
33546 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
33547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
33548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
33549
33550 { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df_internal, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
33551 { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf_internal, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
33552 { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df_internal, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
33553 { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf_internal, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
33554 { OPTION_MASK_ISA_AVX, CODE_FOR_movv32qi_internal, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
33555 { OPTION_MASK_ISA_AVX, CODE_FOR_movv32qi_internal, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
33556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
33557
33558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
33559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
33560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
33561
33562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
33563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
33564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
33565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
33566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
33567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
33568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
33569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
33570
33571 /* AVX2 */
33572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
33581
33582 /* AVX512F */
33583 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
33584 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
33585 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
33586 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
33587 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
33588 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
33589 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
33590 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
33591 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
33592 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
33593 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
33594 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
33595 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI },
33596 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI },
33597 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI },
33598 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI },
33599 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
33600 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
33601 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
33602 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
33603 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
33604 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
33605 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
33606 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
33607 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI },
33608 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI },
33609 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI },
33610 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
33611 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
33612 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
33613 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
33614 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
33615 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
33616 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
33617 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
33618 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
33619 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
33620 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
33621 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
33622 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
33623 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
33624 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
33625 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI },
33626 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
33627 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
33628 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
33629 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
33630
33631 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
33632 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
33633 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
33634 { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
33635 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
33636 { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
33637
33638 /* FSGSBASE */
33639 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
33640 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
33641 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
33642 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
33643 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
33644 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
33645 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
33646 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
33647
33648 /* RTM */
33649 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
33650 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
33651 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
33652
33653 /* AVX512BW */
33654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI },
33655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI },
33656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI },
33657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI },
33658
33659 /* AVX512VL */
33660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCSHORT_V16HI_UHI },
33661 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCSHORT_V8HI_UQI },
33662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCCHAR_V32QI_USI },
33663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCCHAR_V16QI_UHI },
33664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
33665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
33666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
33667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
33668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
33669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
33670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
33671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
33672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
33673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
33674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
33675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
33676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
33677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
33678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
33679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
33680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE_V4DF_UQI },
33681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI },
33682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT_V8SF_UQI },
33683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI },
33684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF_UQI },
33685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI },
33686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF_UQI },
33687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_UQI },
33688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCINT64_V4DI_UQI },
33689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCINT64_V2DI_UQI },
33690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCINT_V8SI_UQI },
33691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCINT_V4SI_UQI },
33692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PINT64_V4DI_UQI },
33693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PINT64_V2DI_UQI },
33694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PINT_V8SI_UQI },
33695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PINT_V4SI_UQI },
33696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V16HI_UHI },
33697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V8HI_UQI },
33698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI_USI },
33699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI_UHI },
33700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
33701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
33702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
33703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
33704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
33705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
33706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
33707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
33708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
33709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
33710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
33711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
33712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
33713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
33714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
33715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
33716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
33717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
33718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
33719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
33720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
33721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
33722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
33723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
33724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
33725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
33726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
33727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
33728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
33729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
33730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
33731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
33732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
33733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
33734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
33735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
33736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
33737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
33738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
33739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
33740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
33741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
33742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
33743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
33744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
33745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
33746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
33747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
33748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
33749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
33750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
33751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
33752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
33753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
33754
33755 /* PCOMMIT. */
33756 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
33757
33758 /* RDPKRU and WRPKRU. */
33759 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
33760 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
33761 };
33762
33763 /* Builtins with variable number of arguments. */
33764 static const struct builtin_description bdesc_args[] =
33765 {
33766 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
33767 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
33768 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
33769 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
33770 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
33771 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
33772 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
33773
33774 /* MMX */
33775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33781
33782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33790
33791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33793
33794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33798
33799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33805
33806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33809 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
33811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
33812
33813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
33814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
33815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
33816
33817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
33818
33819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
33820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
33821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
33822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
33823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
33824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
33825
33826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
33827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
33828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
33829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
33830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
33831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
33832
33833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
33834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
33835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
33836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
33837
33838 /* 3DNow! */
33839 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
33840 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
33841 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
33842 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
33843
33844 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33845 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33846 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33847 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
33848 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
33849 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
33850 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33851 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33852 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33853 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33854 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33855 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33856 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33857 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33858 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33859
33860 /* 3DNow!A */
33861 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
33862 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
33863 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33864 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
33865 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33866 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33867
33868 /* SSE */
33869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
33870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33871 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33873 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
33876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
33877 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
33878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
33879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
33880 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
33881
33882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33883
33884 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33885 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33886 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33892
33893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
33894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
33895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
33896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
33900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
33901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
33902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
33904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
33906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
33907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
33908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
33910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
33911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
33912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33913
33914 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33915 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33918
33919 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33921 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33922 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33923
33924 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33925
33926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33929 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33930 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33931
33932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
33933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
33934 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
33935
33936 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
33937
33938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33941
33942 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
33943 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
33944
33945 /* SSE MMX or 3Dnow!A */
33946 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33947 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33948 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33949
33950 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33951 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33952 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33953 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33954
33955 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33956 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33957
33958 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33959
33960 /* SSE2 */
33961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33962
33963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33967 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33968
33969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33974
33975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33976
33977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33979 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33980 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33981
33982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33984 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33985
33986 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33987 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33988 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33989 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33994
33995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
34000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
34001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
34002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
34003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
34004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
34005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
34006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
34007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
34008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
34009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
34010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
34011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
34012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
34013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
34014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
34015
34016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34017 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34020
34021 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34023 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34024 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34025
34026 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34027
34028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34029 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34030 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34031
34032 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
34033
34034 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34035 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34036 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34037 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34038 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34039 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34040 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34041 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34042
34043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34051
34052 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34053 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
34054
34055 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34057 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34058 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34059
34060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34062
34063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34069
34070 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34071 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34072 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34074
34075 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34076 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34077 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34078 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34079 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34080 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34081 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34082 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34083
34084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
34085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
34086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
34087
34088 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
34090
34091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
34092 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
34093
34094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
34095
34096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
34097 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
34098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
34099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
34100
34101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
34102 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
34103 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
34104 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
34105 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
34106 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
34107 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
34108
34109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
34110 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
34111 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
34112 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
34113 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
34114 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
34115 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
34116
34117 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
34118 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
34119 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
34120 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
34121
34122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
34123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
34124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
34125
34126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
34127
34128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
34129
34130 /* SSE2 MMX */
34131 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
34132 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
34133
34134 /* SSE3 */
34135 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
34136 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
34137
34138 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
34139 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34140 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
34141 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34142 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
34143 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34144
34145 /* SSSE3 */
34146 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
34147 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
34148 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
34149 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
34150 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
34151 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
34152
34153 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34154 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
34155 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34156 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
34157 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34158 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
34159 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34160 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
34161 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34162 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
34163 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34164 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
34165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
34166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
34167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
34169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
34171 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34172 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
34173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
34175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
34177
34178 /* SSSE3. */
34179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
34180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
34181
34182 /* SSE4.1 */
34183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34184 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34185 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
34186 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
34187 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34188 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34189 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34190 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
34191 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
34192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
34193
34194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
34195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
34196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
34197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
34198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
34199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
34200 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
34201 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
34202 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
34203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
34204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
34205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
34206 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
34207
34208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
34209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34210 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34212 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34213 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
34215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
34218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
34219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34220
34221 /* SSE4.1 */
34222 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
34223 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
34224 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34225 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34226
34227 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
34228 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
34229 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
34230 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
34231
34232 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
34233 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
34234
34235 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
34236 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
34237
34238 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
34239 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
34240 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
34241 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
34242
34243 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
34244 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
34245
34246 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
34247 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
34248
34249 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
34250 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
34251 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
34252
34253 /* SSE4.2 */
34254 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34255 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
34256 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
34257 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
34258 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
34259
34260 /* SSE4A */
34261 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
34262 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
34263 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
34264 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34265
34266 /* AES */
34267 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
34268 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
34269
34270 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34271 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34272 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34273 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34274
34275 /* PCLMUL */
34276 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
34277
34278 /* AVX */
34279 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34280 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34283 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34284 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34287 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34293 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34294 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34295 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34296 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34297 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34298 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34299 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34300 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34301 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34302 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34303 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34304 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34305
34306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
34307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
34308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
34309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
34310
34311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
34312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
34313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
34314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
34315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
34316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
34317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
34318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
34323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
34324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
34325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
34326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
34327 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
34328 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
34329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
34330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
34331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
34332 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
34333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
34334 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
34335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
34336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
34337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
34338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
34339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
34340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
34341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
34342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
34343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
34344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
34345
34346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
34349
34350 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
34351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34352 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34354 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34355
34356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34357
34358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
34359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
34360
34361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
34362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
34363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
34364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
34365
34366 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
34367 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
34368
34369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
34370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
34371
34372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
34373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
34374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
34375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
34376
34377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
34378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
34379
34380 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
34381 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
34382
34383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34387
34388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
34389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
34390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
34391 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
34392 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
34393 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
34394
34395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
34396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
34397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
34398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
34399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
34400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
34401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
34402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
34403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
34404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
34405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
34406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
34407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
34408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
34409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
34410
34411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
34412 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
34413
34414 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
34415 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
34416
34417 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
34418
34419 /* AVX2 */
34420 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
34421 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
34422 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
34423 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
34424 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
34425 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
34426 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
34427 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
34428 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34429 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34430 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34431 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34432 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34433 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34434 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34435 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34436 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
34437 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34438 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34439 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34440 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34441 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
34442 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
34443 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34444 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34445 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34446 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34447 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34448 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34449 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34450 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34451 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34452 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34453 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34454 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34455 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34456 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34457 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
34458 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
34459 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34460 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34461 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34462 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34463 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34464 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34465 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34466 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34467 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34468 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34469 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34470 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34471 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
34472 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
34473 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
34474 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
34475 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
34476 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
34477 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
34478 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
34479 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
34480 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
34481 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
34482 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
34483 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
34484 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
34485 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34486 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34487 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34488 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34489 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34490 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
34491 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34492 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
34493 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34494 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
34495 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
34496 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
34497 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34498 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34499 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34500 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
34501 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
34502 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
34503 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
34504 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
34505 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
34506 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
34507 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
34508 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
34509 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
34510 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
34511 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
34512 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
34513 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
34514 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
34515 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
34516 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
34517 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
34518 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34519 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34520 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34521 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34522 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34523 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34524 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34525 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34526 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34527 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34528 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34529 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
34531 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
34532 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34533 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34534 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34535 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
34536 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
34537 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
34538 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
34539 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
34540 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
34541 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
34542 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
34543 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
34544 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
34545 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
34546 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
34547 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
34548 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
34549 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34550 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
34551 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
34552 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
34553 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
34554 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
34555 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
34556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34557 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34559 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34560 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
34563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
34564 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
34565 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34566
34567 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
34568
34569 /* BMI */
34570 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
34571 { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
34572 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
34573
34574 /* TBM */
34575 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
34576 { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
34577
34578 /* F16C */
34579 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
34580 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
34581 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
34582 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
34583
34584 /* BMI2 */
34585 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
34586 { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
34587 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
34588 { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
34589 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
34590 { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
34591
34592 /* AVX512F */
34593 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
34594 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
34595 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
34596 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
34597 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
34598 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
34599 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
34600 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
34601 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34602 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34603 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34604 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34605 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34606 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
34607 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34608 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
34609 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34610 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34611 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
34612 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
34613 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34614 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34615 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
34616 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
34617 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
34618 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
34619 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34620 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34621 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34622 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34623 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
34624 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
34625 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
34626 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
34627 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
34628 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
34629 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
34630 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
34631 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34632 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34633 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34634 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34635 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34636 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34637 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34638 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34639 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
34648 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34649 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
34652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
34653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
34654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
34655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
34656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
34671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
34672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
34673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
34674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
34675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
34676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
34677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
34678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
34679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
34680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
34681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
34682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
34683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
34684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
34685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
34686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
34687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
34688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
34689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
34690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
34691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
34692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
34693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
34694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
34695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
34696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
34698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
34701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
34702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
34705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
34706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
34709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
34710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
34711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
34712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
34713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
34716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
34717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
34718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
34719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
34722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
34723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
34724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
34725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
34730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
34731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
34732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
34733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
34743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
34744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
34745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
34746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
34747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
34748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
34749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
34750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
34751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
34752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
34753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
34754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
34755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34759 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34760 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34761 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
34762 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
34763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
34765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
34767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
34768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
34772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
34773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
34776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
34777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
34778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
34779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
34782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
34784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
34785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
34786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
34787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
34788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
34789
34790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
34791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
34792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
34793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
34794 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
34795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND },
34796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND },
34797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND },
34798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND },
34799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND },
34800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND },
34801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF },
34802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
34803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF },
34804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND },
34805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND },
34806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
34807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
34808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
34809
34810 /* Mask arithmetic operations */
34811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
34814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
34820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
34821
34822 /* SHA */
34823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
34827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
34829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
34830
34831 /* AVX512VL. */
34832 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
34833 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
34834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34842 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34843 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34844 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34870 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34871 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34872 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34873 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34874 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34875 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34876 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34877 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34878 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34879 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34880 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34881 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34882 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
34886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
34887 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
34888 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
34889 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34890 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34891 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34892 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34893 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34894 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34895 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34896 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34899 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34900 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34901 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34902 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
34914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
34915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
34916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
34917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
34918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
34919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
34920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
34921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
34922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
34923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
34924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
34925 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34926 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
34927 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
34928 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
34929 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34930 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
34931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
34933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
34935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
34937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
34939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34942 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
34943 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
34944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
34945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
34946 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34947 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34970 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34971 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34972 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34973 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34974 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34975 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34976 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34977 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34978 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34979 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34980 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34981 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34982 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34996 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34997 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35000 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35001 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35004 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35025 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35026 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35027 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
35029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
35030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
35031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
35032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
35033 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
35034 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
35035 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
35036 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
35037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
35038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
35039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
35040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
35041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
35042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
35043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
35044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
35045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
35046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
35047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
35048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
35049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
35050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
35051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
35052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
35053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
35054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
35055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
35056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
35057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
35058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
35059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
35060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
35061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
35062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
35063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
35064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
35065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
35066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
35067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
35068 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
35069 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
35070 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
35071 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
35072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
35074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
35076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
35077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
35078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
35079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
35080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
35081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
35082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
35083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
35084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35088 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35090 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35091 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35092 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35093 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35094 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35095 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35096 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35097 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35098 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35099 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35100 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35101 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35102 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35103 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35104 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35105 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35106 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
35107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
35108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
35109 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35112 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
35113 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
35114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
35115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
35116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
35117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
35118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
35119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
35121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
35122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
35123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
35125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
35126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
35129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
35131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
35132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
35133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
35135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
35136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
35151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
35152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
35153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
35154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
35155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
35156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
35157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
35158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
35159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
35160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
35161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
35162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
35163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
35164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
35165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
35166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
35215 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
35216 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
35219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
35220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
35221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
35222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
35225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
35226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
35227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
35228 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
35229 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
35230 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
35231 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
35232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
35233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
35234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
35235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
35236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
35237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
35238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
35243 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
35244 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
35245 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
35246 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
35247 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
35248 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
35249 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
35250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
35257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
35258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
35259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
35260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
35261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
35262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
35269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
35270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
35271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
35272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
35273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
35274 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35275 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
35277 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
35278 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
35279 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
35280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
35281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
35282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
35283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
35284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
35285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
35286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
35289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
35290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
35293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
35294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35306 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
35307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
35308 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
35309 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
35311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
35312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
35313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
35317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
35318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
35321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
35322 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
35323 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
35324 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
35325 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
35326 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
35327 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
35328 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
35329 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
35330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
35331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
35332 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
35333 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
35334 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
35335 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
35336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
35337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
35338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
35339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
35340 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
35341 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
35342 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
35343 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
35344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
35345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
35346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
35347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
35348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
35349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
35350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
35351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
35352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
35353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
35354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
35355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
35356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
35357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
35358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
35359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
35360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
35361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
35362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
35363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
35364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
35365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
35366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
35367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
35368 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
35369 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
35370 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
35371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
35372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
35373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
35374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
35375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
35376 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
35377 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
35378 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
35379 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
35380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
35381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
35382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
35389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
35390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
35397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
35398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35432 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35434 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35435 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35436 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35437 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35438 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35439 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
35445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
35446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
35447 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35448 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35449 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35450 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
35452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
35453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
35454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
35455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
35456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
35457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
35458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
35459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
35460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35477 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
35479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
35480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
35481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
35482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
35484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
35486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
35487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
35488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
35489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
35490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
35491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
35492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
35493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
35494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
35495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
35496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
35497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
35498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
35499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
35500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
35501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
35502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
35503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
35504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
35505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
35506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
35507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
35508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
35509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
35510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
35511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
35512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
35513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
35515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
35516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
35517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
35518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
35519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
35520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
35521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
35522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
35524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
35525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
35526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
35527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
35528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
35529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
35530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
35531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
35532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
35533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
35534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
35535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
35536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
35537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
35538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
35539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
35540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
35541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
35542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
35543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
35544
35545 /* AVX512DQ. */
35546 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
35547 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
35548 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
35549 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
35550 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
35551 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
35552 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
35553 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
35554 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
35555 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
35556 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
35557 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
35558 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
35559 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
35560 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
35561 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
35562 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
35563 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
35564 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
35565 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
35566 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
35567 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
35568 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
35569 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
35570 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
35571 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
35572 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
35573 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
35574 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
35575 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
35576 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
35577
35578 /* AVX512BW. */
35579 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
35580 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
35581 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
35582 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
35583 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
35584 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
35585 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
35586 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
35587 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
35588 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
35589 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
35590 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
35591 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
35592 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
35593 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
35594 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
35595 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
35596 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
35597 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35598 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35599 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35600 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35601 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35602 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35603 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35604 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35605 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35606 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35607 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35608 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35609 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35610 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35611 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35612 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35613 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35614 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35615 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35616 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35617 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35618 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35619 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35620 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35621 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35622 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35623 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
35624 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
35625 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
35626 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35627 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35628 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35629 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35630 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
35631 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
35632 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
35633 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
35634 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35635 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
35636 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
35637 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35638 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35639 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35640 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35641 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35642 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35643 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
35644 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
35645 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
35646 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
35647 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
35648 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
35649 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
35650 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
35651 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
35652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
35653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
35654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
35655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
35656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
35657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
35658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
35659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
35660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
35661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
35662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
35663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
35664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
35665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
35666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
35667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
35668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
35669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
35670
35671 /* AVX512IFMA */
35672 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
35673 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
35674 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
35675 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
35676 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35677 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35678 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35679 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
35680 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35681 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35682 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35683 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
35684
35685 /* AVX512VBMI */
35686 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35687 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35688 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35689 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35690 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35691 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35692 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
35693 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35694 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35695 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35696 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35697 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35698 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35699 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
35700 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
35701 };
35702
35703 /* Builtins with rounding support. */
35704 static const struct builtin_description bdesc_round_args[] =
35705 {
35706 /* AVX512F */
35707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
35712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
35713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
35714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
35715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
35716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
35717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
35718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
35719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
35720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
35721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
35722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
35723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
35724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
35725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
35726 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
35727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
35728 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
35729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
35730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
35731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
35732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
35733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
35734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
35735 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
35736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
35737 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
35738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
35743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
35744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
35745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
35746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
35747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
35748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
35749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
35750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
35751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
35752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
35755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
35756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
35757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
35758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
35771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
35772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
35773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
35774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
35779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
35780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
35787 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
35788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
35789 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
35790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
35791 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
35792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
35793 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
35794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
35795 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
35796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
35797 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
35798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
35799 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
35800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
35801 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
35802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
35809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
35810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
35824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
35826
35827 /* AVX512ER */
35828 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
35829 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
35830 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
35831 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
35832 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35833 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35834 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
35835 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
35836 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
35837 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
35838
35839 /* AVX512DQ. */
35840 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
35841 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
35842 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
35843 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
35844 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
35845 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
35846 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
35847 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
35848 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
35849 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
35850 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
35851 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
35852 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
35853 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
35854 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
35855 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
35856 };
35857
35858 /* Bultins for MPX. */
35859 static const struct builtin_description bdesc_mpx[] =
35860 {
35861 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
35862 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
35863 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
35864 };
35865
35866 /* Const builtins for MPX. */
35867 static const struct builtin_description bdesc_mpx_const[] =
35868 {
35869 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
35870 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
35871 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
35872 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
35873 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
35874 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
35875 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
35876 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
35877 };
35878
35879 /* FMA4 and XOP. */
35880 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
35881 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
35882 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
35883 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
35884 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
35885 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
35886 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
35887 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
35888 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
35889 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
35890 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
35891 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
35892 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
35893 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
35894 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
35895 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
35896 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
35897 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
35898 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
35899 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
35900 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
35901 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
35902 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
35903 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
35904 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
35905 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
35906 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
35907 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
35908 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
35909 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
35910 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
35911 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
35912 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
35913 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
35914 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
35915 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
35916 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
35917 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
35918 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
35919 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
35920 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
35921 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
35922 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
35923 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
35924 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
35925 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
35926 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
35927 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
35928 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
35929 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
35930 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
35931 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
35932
35933 static const struct builtin_description bdesc_multi_arg[] =
35934 {
35935 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
35936 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
35937 UNKNOWN, (int)MULTI_ARG_3_SF },
35938 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
35939 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
35940 UNKNOWN, (int)MULTI_ARG_3_DF },
35941
35942 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
35943 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
35944 UNKNOWN, (int)MULTI_ARG_3_SF },
35945 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
35946 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
35947 UNKNOWN, (int)MULTI_ARG_3_DF },
35948
35949 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
35950 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
35951 UNKNOWN, (int)MULTI_ARG_3_SF },
35952 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
35953 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
35954 UNKNOWN, (int)MULTI_ARG_3_DF },
35955 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
35956 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
35957 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35958 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
35959 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
35960 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35961
35962 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
35963 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35964 UNKNOWN, (int)MULTI_ARG_3_SF },
35965 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35966 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35967 UNKNOWN, (int)MULTI_ARG_3_DF },
35968 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35969 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35970 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35971 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35972 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35973 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35974
35975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35982
35983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35990
35991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35992
35993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
36000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
36001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
36002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
36003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
36004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
36005
36006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
36007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
36008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
36009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
36010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
36011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
36012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
36013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
36014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
36015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
36016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
36017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
36018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
36019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
36020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
36021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
36022
36023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
36024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
36025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
36026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
36027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
36028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
36029
36030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
36031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
36032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
36033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
36034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
36035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
36036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
36037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
36038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
36039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
36040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
36041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
36042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
36043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
36044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
36045
36046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
36047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
36048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
36049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
36050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
36051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
36052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
36053
36054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
36055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
36056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
36057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
36058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
36059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
36060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
36061
36062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
36063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
36064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
36065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
36066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
36067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
36068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
36069
36070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
36071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
36072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
36073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
36074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
36075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
36076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
36077
36078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
36079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
36080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
36081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
36082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
36083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
36084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
36085
36086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
36087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
36088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
36089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
36090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
36091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
36092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
36093
36094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
36095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
36096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
36097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
36098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
36099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
36100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
36101
36102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
36103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
36104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
36105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
36106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
36107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
36108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
36109
36110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
36111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
36112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
36113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
36114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
36115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
36116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
36117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
36118
36119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
36120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
36121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
36122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
36123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
36124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
36125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
36126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
36127
36128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
36129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
36130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
36131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
36132
36133 };
36134 \f
36135 /* TM vector builtins. */
36136
36137 /* Reuse the existing x86-specific `struct builtin_description' cause
36138 we're lazy. Add casts to make them fit. */
36139 static const struct builtin_description bdesc_tm[] =
36140 {
36141 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
36142 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
36143 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
36144 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
36145 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
36146 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
36147 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
36148
36149 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
36150 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
36151 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
36152 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
36153 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
36154 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
36155 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
36156
36157 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
36158 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
36159 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
36160 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
36161 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
36162 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
36163 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
36164
36165 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
36166 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
36167 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
36168 };
36169
36170 /* Initialize the transactional memory vector load/store builtins. */
36171
36172 static void
36173 ix86_init_tm_builtins (void)
36174 {
36175 enum ix86_builtin_func_type ftype;
36176 const struct builtin_description *d;
36177 size_t i;
36178 tree decl;
36179 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
36180 tree attrs_log, attrs_type_log;
36181
36182 if (!flag_tm)
36183 return;
36184
36185 /* If there are no builtins defined, we must be compiling in a
36186 language without trans-mem support. */
36187 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
36188 return;
36189
36190 /* Use whatever attributes a normal TM load has. */
36191 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
36192 attrs_load = DECL_ATTRIBUTES (decl);
36193 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
36194 /* Use whatever attributes a normal TM store has. */
36195 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
36196 attrs_store = DECL_ATTRIBUTES (decl);
36197 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
36198 /* Use whatever attributes a normal TM log has. */
36199 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
36200 attrs_log = DECL_ATTRIBUTES (decl);
36201 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
36202
36203 for (i = 0, d = bdesc_tm;
36204 i < ARRAY_SIZE (bdesc_tm);
36205 i++, d++)
36206 {
36207 if ((d->mask & ix86_isa_flags) != 0
36208 || (lang_hooks.builtin_function
36209 == lang_hooks.builtin_function_ext_scope))
36210 {
36211 tree type, attrs, attrs_type;
36212 enum built_in_function code = (enum built_in_function) d->code;
36213
36214 ftype = (enum ix86_builtin_func_type) d->flag;
36215 type = ix86_get_builtin_func_type (ftype);
36216
36217 if (BUILTIN_TM_LOAD_P (code))
36218 {
36219 attrs = attrs_load;
36220 attrs_type = attrs_type_load;
36221 }
36222 else if (BUILTIN_TM_STORE_P (code))
36223 {
36224 attrs = attrs_store;
36225 attrs_type = attrs_type_store;
36226 }
36227 else
36228 {
36229 attrs = attrs_log;
36230 attrs_type = attrs_type_log;
36231 }
36232 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
36233 /* The builtin without the prefix for
36234 calling it directly. */
36235 d->name + strlen ("__builtin_"),
36236 attrs);
36237 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
36238 set the TYPE_ATTRIBUTES. */
36239 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
36240
36241 set_builtin_decl (code, decl, false);
36242 }
36243 }
36244 }
36245
36246 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
36247 in the current target ISA to allow the user to compile particular modules
36248 with different target specific options that differ from the command line
36249 options. */
36250 static void
36251 ix86_init_mmx_sse_builtins (void)
36252 {
36253 const struct builtin_description * d;
36254 enum ix86_builtin_func_type ftype;
36255 size_t i;
36256
36257 /* Add all special builtins with variable number of operands. */
36258 for (i = 0, d = bdesc_special_args;
36259 i < ARRAY_SIZE (bdesc_special_args);
36260 i++, d++)
36261 {
36262 if (d->name == 0)
36263 continue;
36264
36265 ftype = (enum ix86_builtin_func_type) d->flag;
36266 def_builtin (d->mask, d->name, ftype, d->code);
36267 }
36268
36269 /* Add all builtins with variable number of operands. */
36270 for (i = 0, d = bdesc_args;
36271 i < ARRAY_SIZE (bdesc_args);
36272 i++, d++)
36273 {
36274 if (d->name == 0)
36275 continue;
36276
36277 ftype = (enum ix86_builtin_func_type) d->flag;
36278 def_builtin_const (d->mask, d->name, ftype, d->code);
36279 }
36280
36281 /* Add all builtins with rounding. */
36282 for (i = 0, d = bdesc_round_args;
36283 i < ARRAY_SIZE (bdesc_round_args);
36284 i++, d++)
36285 {
36286 if (d->name == 0)
36287 continue;
36288
36289 ftype = (enum ix86_builtin_func_type) d->flag;
36290 def_builtin_const (d->mask, d->name, ftype, d->code);
36291 }
36292
36293 /* pcmpestr[im] insns. */
36294 for (i = 0, d = bdesc_pcmpestr;
36295 i < ARRAY_SIZE (bdesc_pcmpestr);
36296 i++, d++)
36297 {
36298 if (d->code == IX86_BUILTIN_PCMPESTRM128)
36299 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
36300 else
36301 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
36302 def_builtin_const (d->mask, d->name, ftype, d->code);
36303 }
36304
36305 /* pcmpistr[im] insns. */
36306 for (i = 0, d = bdesc_pcmpistr;
36307 i < ARRAY_SIZE (bdesc_pcmpistr);
36308 i++, d++)
36309 {
36310 if (d->code == IX86_BUILTIN_PCMPISTRM128)
36311 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
36312 else
36313 ftype = INT_FTYPE_V16QI_V16QI_INT;
36314 def_builtin_const (d->mask, d->name, ftype, d->code);
36315 }
36316
36317 /* comi/ucomi insns. */
36318 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36319 {
36320 if (d->mask == OPTION_MASK_ISA_SSE2)
36321 ftype = INT_FTYPE_V2DF_V2DF;
36322 else
36323 ftype = INT_FTYPE_V4SF_V4SF;
36324 def_builtin_const (d->mask, d->name, ftype, d->code);
36325 }
36326
36327 /* SSE */
36328 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
36329 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
36330 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
36331 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
36332
36333 /* SSE or 3DNow!A */
36334 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
36335 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
36336 IX86_BUILTIN_MASKMOVQ);
36337
36338 /* SSE2 */
36339 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
36340 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
36341
36342 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
36343 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
36344 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
36345 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
36346
36347 /* SSE3. */
36348 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
36349 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
36350 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
36351 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
36352
36353 /* AES */
36354 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
36355 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
36356 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
36357 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
36358 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
36359 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
36360 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
36361 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
36362 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
36363 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
36364 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
36365 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
36366
36367 /* PCLMUL */
36368 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
36369 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
36370
36371 /* RDRND */
36372 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
36373 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
36374 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
36375 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
36376 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
36377 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
36378 IX86_BUILTIN_RDRAND64_STEP);
36379
36380 /* AVX2 */
36381 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
36382 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
36383 IX86_BUILTIN_GATHERSIV2DF);
36384
36385 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
36386 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
36387 IX86_BUILTIN_GATHERSIV4DF);
36388
36389 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
36390 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
36391 IX86_BUILTIN_GATHERDIV2DF);
36392
36393 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
36394 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
36395 IX86_BUILTIN_GATHERDIV4DF);
36396
36397 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
36398 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
36399 IX86_BUILTIN_GATHERSIV4SF);
36400
36401 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
36402 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
36403 IX86_BUILTIN_GATHERSIV8SF);
36404
36405 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
36406 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
36407 IX86_BUILTIN_GATHERDIV4SF);
36408
36409 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
36410 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
36411 IX86_BUILTIN_GATHERDIV8SF);
36412
36413 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
36414 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
36415 IX86_BUILTIN_GATHERSIV2DI);
36416
36417 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
36418 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
36419 IX86_BUILTIN_GATHERSIV4DI);
36420
36421 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
36422 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
36423 IX86_BUILTIN_GATHERDIV2DI);
36424
36425 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
36426 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
36427 IX86_BUILTIN_GATHERDIV4DI);
36428
36429 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
36430 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
36431 IX86_BUILTIN_GATHERSIV4SI);
36432
36433 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
36434 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
36435 IX86_BUILTIN_GATHERSIV8SI);
36436
36437 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
36438 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
36439 IX86_BUILTIN_GATHERDIV4SI);
36440
36441 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
36442 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
36443 IX86_BUILTIN_GATHERDIV8SI);
36444
36445 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
36446 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
36447 IX86_BUILTIN_GATHERALTSIV4DF);
36448
36449 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
36450 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
36451 IX86_BUILTIN_GATHERALTDIV8SF);
36452
36453 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
36454 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
36455 IX86_BUILTIN_GATHERALTSIV4DI);
36456
36457 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
36458 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
36459 IX86_BUILTIN_GATHERALTDIV8SI);
36460
36461 /* AVX512F */
36462 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
36463 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
36464 IX86_BUILTIN_GATHER3SIV16SF);
36465
36466 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
36467 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
36468 IX86_BUILTIN_GATHER3SIV8DF);
36469
36470 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
36471 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
36472 IX86_BUILTIN_GATHER3DIV16SF);
36473
36474 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
36475 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
36476 IX86_BUILTIN_GATHER3DIV8DF);
36477
36478 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
36479 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
36480 IX86_BUILTIN_GATHER3SIV16SI);
36481
36482 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
36483 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
36484 IX86_BUILTIN_GATHER3SIV8DI);
36485
36486 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
36487 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
36488 IX86_BUILTIN_GATHER3DIV16SI);
36489
36490 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
36491 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
36492 IX86_BUILTIN_GATHER3DIV8DI);
36493
36494 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
36495 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
36496 IX86_BUILTIN_GATHER3ALTSIV8DF);
36497
36498 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
36499 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
36500 IX86_BUILTIN_GATHER3ALTDIV16SF);
36501
36502 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
36503 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
36504 IX86_BUILTIN_GATHER3ALTSIV8DI);
36505
36506 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
36507 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
36508 IX86_BUILTIN_GATHER3ALTDIV16SI);
36509
36510 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
36511 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
36512 IX86_BUILTIN_SCATTERSIV16SF);
36513
36514 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
36515 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
36516 IX86_BUILTIN_SCATTERSIV8DF);
36517
36518 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
36519 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
36520 IX86_BUILTIN_SCATTERDIV16SF);
36521
36522 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
36523 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
36524 IX86_BUILTIN_SCATTERDIV8DF);
36525
36526 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
36527 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
36528 IX86_BUILTIN_SCATTERSIV16SI);
36529
36530 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
36531 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
36532 IX86_BUILTIN_SCATTERSIV8DI);
36533
36534 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
36535 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
36536 IX86_BUILTIN_SCATTERDIV16SI);
36537
36538 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
36539 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
36540 IX86_BUILTIN_SCATTERDIV8DI);
36541
36542 /* AVX512VL */
36543 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
36544 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
36545 IX86_BUILTIN_GATHER3SIV2DF);
36546
36547 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
36548 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
36549 IX86_BUILTIN_GATHER3SIV4DF);
36550
36551 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
36552 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
36553 IX86_BUILTIN_GATHER3DIV2DF);
36554
36555 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
36556 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
36557 IX86_BUILTIN_GATHER3DIV4DF);
36558
36559 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
36560 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
36561 IX86_BUILTIN_GATHER3SIV4SF);
36562
36563 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
36564 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
36565 IX86_BUILTIN_GATHER3SIV8SF);
36566
36567 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
36568 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
36569 IX86_BUILTIN_GATHER3DIV4SF);
36570
36571 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
36572 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
36573 IX86_BUILTIN_GATHER3DIV8SF);
36574
36575 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
36576 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
36577 IX86_BUILTIN_GATHER3SIV2DI);
36578
36579 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
36580 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
36581 IX86_BUILTIN_GATHER3SIV4DI);
36582
36583 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
36584 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
36585 IX86_BUILTIN_GATHER3DIV2DI);
36586
36587 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
36588 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
36589 IX86_BUILTIN_GATHER3DIV4DI);
36590
36591 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
36592 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
36593 IX86_BUILTIN_GATHER3SIV4SI);
36594
36595 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
36596 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
36597 IX86_BUILTIN_GATHER3SIV8SI);
36598
36599 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
36600 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
36601 IX86_BUILTIN_GATHER3DIV4SI);
36602
36603 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
36604 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
36605 IX86_BUILTIN_GATHER3DIV8SI);
36606
36607 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
36608 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
36609 IX86_BUILTIN_GATHER3ALTSIV4DF);
36610
36611 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
36612 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
36613 IX86_BUILTIN_GATHER3ALTDIV8SF);
36614
36615 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
36616 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
36617 IX86_BUILTIN_GATHER3ALTSIV4DI);
36618
36619 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
36620 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
36621 IX86_BUILTIN_GATHER3ALTDIV8SI);
36622
36623 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
36624 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
36625 IX86_BUILTIN_SCATTERSIV8SF);
36626
36627 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
36628 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
36629 IX86_BUILTIN_SCATTERSIV4SF);
36630
36631 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
36632 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
36633 IX86_BUILTIN_SCATTERSIV4DF);
36634
36635 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
36636 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
36637 IX86_BUILTIN_SCATTERSIV2DF);
36638
36639 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
36640 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
36641 IX86_BUILTIN_SCATTERDIV8SF);
36642
36643 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
36644 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
36645 IX86_BUILTIN_SCATTERDIV4SF);
36646
36647 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
36648 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
36649 IX86_BUILTIN_SCATTERDIV4DF);
36650
36651 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
36652 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
36653 IX86_BUILTIN_SCATTERDIV2DF);
36654
36655 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
36656 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
36657 IX86_BUILTIN_SCATTERSIV8SI);
36658
36659 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
36660 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
36661 IX86_BUILTIN_SCATTERSIV4SI);
36662
36663 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
36664 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
36665 IX86_BUILTIN_SCATTERSIV4DI);
36666
36667 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
36668 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
36669 IX86_BUILTIN_SCATTERSIV2DI);
36670
36671 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
36672 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
36673 IX86_BUILTIN_SCATTERDIV8SI);
36674
36675 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
36676 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
36677 IX86_BUILTIN_SCATTERDIV4SI);
36678
36679 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
36680 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
36681 IX86_BUILTIN_SCATTERDIV4DI);
36682
36683 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
36684 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
36685 IX86_BUILTIN_SCATTERDIV2DI);
36686 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
36687 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
36688 IX86_BUILTIN_SCATTERALTSIV8DF);
36689
36690 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
36691 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
36692 IX86_BUILTIN_SCATTERALTDIV16SF);
36693
36694 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
36695 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
36696 IX86_BUILTIN_SCATTERALTSIV8DI);
36697
36698 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
36699 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
36700 IX86_BUILTIN_SCATTERALTDIV16SI);
36701
36702 /* AVX512PF */
36703 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
36704 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
36705 IX86_BUILTIN_GATHERPFDPD);
36706 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
36707 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
36708 IX86_BUILTIN_GATHERPFDPS);
36709 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
36710 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
36711 IX86_BUILTIN_GATHERPFQPD);
36712 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
36713 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
36714 IX86_BUILTIN_GATHERPFQPS);
36715 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
36716 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
36717 IX86_BUILTIN_SCATTERPFDPD);
36718 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
36719 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
36720 IX86_BUILTIN_SCATTERPFDPS);
36721 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
36722 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
36723 IX86_BUILTIN_SCATTERPFQPD);
36724 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
36725 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
36726 IX86_BUILTIN_SCATTERPFQPS);
36727
36728 /* SHA */
36729 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
36730 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
36731 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
36732 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
36733 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
36734 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
36735 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
36736 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
36737 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
36738 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
36739 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
36740 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
36741 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
36742 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
36743
36744 /* RTM. */
36745 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
36746 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
36747
36748 /* MMX access to the vec_init patterns. */
36749 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
36750 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
36751
36752 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
36753 V4HI_FTYPE_HI_HI_HI_HI,
36754 IX86_BUILTIN_VEC_INIT_V4HI);
36755
36756 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
36757 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
36758 IX86_BUILTIN_VEC_INIT_V8QI);
36759
36760 /* Access to the vec_extract patterns. */
36761 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
36762 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
36763 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
36764 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
36765 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
36766 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
36767 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
36768 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
36769 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
36770 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
36771
36772 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
36773 "__builtin_ia32_vec_ext_v4hi",
36774 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
36775
36776 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
36777 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
36778
36779 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
36780 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
36781
36782 /* Access to the vec_set patterns. */
36783 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
36784 "__builtin_ia32_vec_set_v2di",
36785 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
36786
36787 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
36788 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
36789
36790 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
36791 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
36792
36793 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
36794 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
36795
36796 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
36797 "__builtin_ia32_vec_set_v4hi",
36798 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
36799
36800 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
36801 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
36802
36803 /* RDSEED */
36804 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
36805 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
36806 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
36807 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
36808 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
36809 "__builtin_ia32_rdseed_di_step",
36810 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
36811
36812 /* ADCX */
36813 def_builtin (0, "__builtin_ia32_addcarryx_u32",
36814 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
36815 def_builtin (OPTION_MASK_ISA_64BIT,
36816 "__builtin_ia32_addcarryx_u64",
36817 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
36818 IX86_BUILTIN_ADDCARRYX64);
36819
36820 /* SBB */
36821 def_builtin (0, "__builtin_ia32_sbb_u32",
36822 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
36823 def_builtin (OPTION_MASK_ISA_64BIT,
36824 "__builtin_ia32_sbb_u64",
36825 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
36826 IX86_BUILTIN_SBB64);
36827
36828 /* Read/write FLAGS. */
36829 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
36830 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
36831 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
36832 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
36833 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
36834 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
36835 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
36836 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
36837
36838 /* CLFLUSHOPT. */
36839 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
36840 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
36841
36842 /* CLWB. */
36843 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
36844 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
36845
36846 /* MONITORX and MWAITX. */
36847 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
36848 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
36849 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
36850 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
36851
36852 /* CLZERO. */
36853 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
36854 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
36855
36856 /* Add FMA4 multi-arg argument instructions */
36857 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36858 {
36859 if (d->name == 0)
36860 continue;
36861
36862 ftype = (enum ix86_builtin_func_type) d->flag;
36863 def_builtin_const (d->mask, d->name, ftype, d->code);
36864 }
36865 }
36866
36867 static void
36868 ix86_init_mpx_builtins ()
36869 {
36870 const struct builtin_description * d;
36871 enum ix86_builtin_func_type ftype;
36872 tree decl;
36873 size_t i;
36874
36875 for (i = 0, d = bdesc_mpx;
36876 i < ARRAY_SIZE (bdesc_mpx);
36877 i++, d++)
36878 {
36879 if (d->name == 0)
36880 continue;
36881
36882 ftype = (enum ix86_builtin_func_type) d->flag;
36883 decl = def_builtin (d->mask, d->name, ftype, d->code);
36884
36885 /* With no leaf and nothrow flags for MPX builtins
36886 abnormal edges may follow its call when setjmp
36887 presents in the function. Since we may have a lot
36888 of MPX builtins calls it causes lots of useless
36889 edges and enormous PHI nodes. To avoid this we mark
36890 MPX builtins as leaf and nothrow. */
36891 if (decl)
36892 {
36893 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36894 NULL_TREE);
36895 TREE_NOTHROW (decl) = 1;
36896 }
36897 else
36898 {
36899 ix86_builtins_isa[(int)d->code].leaf_p = true;
36900 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36901 }
36902 }
36903
36904 for (i = 0, d = bdesc_mpx_const;
36905 i < ARRAY_SIZE (bdesc_mpx_const);
36906 i++, d++)
36907 {
36908 if (d->name == 0)
36909 continue;
36910
36911 ftype = (enum ix86_builtin_func_type) d->flag;
36912 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
36913
36914 if (decl)
36915 {
36916 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36917 NULL_TREE);
36918 TREE_NOTHROW (decl) = 1;
36919 }
36920 else
36921 {
36922 ix86_builtins_isa[(int)d->code].leaf_p = true;
36923 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36924 }
36925 }
36926 }
36927
36928 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36929 to return a pointer to VERSION_DECL if the outcome of the expression
36930 formed by PREDICATE_CHAIN is true. This function will be called during
36931 version dispatch to decide which function version to execute. It returns
36932 the basic block at the end, to which more conditions can be added. */
36933
36934 static basic_block
36935 add_condition_to_bb (tree function_decl, tree version_decl,
36936 tree predicate_chain, basic_block new_bb)
36937 {
36938 gimple *return_stmt;
36939 tree convert_expr, result_var;
36940 gimple *convert_stmt;
36941 gimple *call_cond_stmt;
36942 gimple *if_else_stmt;
36943
36944 basic_block bb1, bb2, bb3;
36945 edge e12, e23;
36946
36947 tree cond_var, and_expr_var = NULL_TREE;
36948 gimple_seq gseq;
36949
36950 tree predicate_decl, predicate_arg;
36951
36952 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36953
36954 gcc_assert (new_bb != NULL);
36955 gseq = bb_seq (new_bb);
36956
36957
36958 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36959 build_fold_addr_expr (version_decl));
36960 result_var = create_tmp_var (ptr_type_node);
36961 convert_stmt = gimple_build_assign (result_var, convert_expr);
36962 return_stmt = gimple_build_return (result_var);
36963
36964 if (predicate_chain == NULL_TREE)
36965 {
36966 gimple_seq_add_stmt (&gseq, convert_stmt);
36967 gimple_seq_add_stmt (&gseq, return_stmt);
36968 set_bb_seq (new_bb, gseq);
36969 gimple_set_bb (convert_stmt, new_bb);
36970 gimple_set_bb (return_stmt, new_bb);
36971 pop_cfun ();
36972 return new_bb;
36973 }
36974
36975 while (predicate_chain != NULL)
36976 {
36977 cond_var = create_tmp_var (integer_type_node);
36978 predicate_decl = TREE_PURPOSE (predicate_chain);
36979 predicate_arg = TREE_VALUE (predicate_chain);
36980 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36981 gimple_call_set_lhs (call_cond_stmt, cond_var);
36982
36983 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36984 gimple_set_bb (call_cond_stmt, new_bb);
36985 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36986
36987 predicate_chain = TREE_CHAIN (predicate_chain);
36988
36989 if (and_expr_var == NULL)
36990 and_expr_var = cond_var;
36991 else
36992 {
36993 gimple *assign_stmt;
36994 /* Use MIN_EXPR to check if any integer is zero?.
36995 and_expr_var = min_expr <cond_var, and_expr_var> */
36996 assign_stmt = gimple_build_assign (and_expr_var,
36997 build2 (MIN_EXPR, integer_type_node,
36998 cond_var, and_expr_var));
36999
37000 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
37001 gimple_set_bb (assign_stmt, new_bb);
37002 gimple_seq_add_stmt (&gseq, assign_stmt);
37003 }
37004 }
37005
37006 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
37007 integer_zero_node,
37008 NULL_TREE, NULL_TREE);
37009 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37010 gimple_set_bb (if_else_stmt, new_bb);
37011 gimple_seq_add_stmt (&gseq, if_else_stmt);
37012
37013 gimple_seq_add_stmt (&gseq, convert_stmt);
37014 gimple_seq_add_stmt (&gseq, return_stmt);
37015 set_bb_seq (new_bb, gseq);
37016
37017 bb1 = new_bb;
37018 e12 = split_block (bb1, if_else_stmt);
37019 bb2 = e12->dest;
37020 e12->flags &= ~EDGE_FALLTHRU;
37021 e12->flags |= EDGE_TRUE_VALUE;
37022
37023 e23 = split_block (bb2, return_stmt);
37024
37025 gimple_set_bb (convert_stmt, bb2);
37026 gimple_set_bb (return_stmt, bb2);
37027
37028 bb3 = e23->dest;
37029 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37030
37031 remove_edge (e23);
37032 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37033
37034 pop_cfun ();
37035
37036 return bb3;
37037 }
37038
37039 /* This parses the attribute arguments to target in DECL and determines
37040 the right builtin to use to match the platform specification.
37041 It returns the priority value for this version decl. If PREDICATE_LIST
37042 is not NULL, it stores the list of cpu features that need to be checked
37043 before dispatching this function. */
37044
37045 static unsigned int
37046 get_builtin_code_for_version (tree decl, tree *predicate_list)
37047 {
37048 tree attrs;
37049 struct cl_target_option cur_target;
37050 tree target_node;
37051 struct cl_target_option *new_target;
37052 const char *arg_str = NULL;
37053 const char *attrs_str = NULL;
37054 char *tok_str = NULL;
37055 char *token;
37056
37057 /* Priority of i386 features, greater value is higher priority. This is
37058 used to decide the order in which function dispatch must happen. For
37059 instance, a version specialized for SSE4.2 should be checked for dispatch
37060 before a version for SSE3, as SSE4.2 implies SSE3. */
37061 enum feature_priority
37062 {
37063 P_ZERO = 0,
37064 P_MMX,
37065 P_SSE,
37066 P_SSE2,
37067 P_SSE3,
37068 P_SSSE3,
37069 P_PROC_SSSE3,
37070 P_SSE4_A,
37071 P_PROC_SSE4_A,
37072 P_SSE4_1,
37073 P_SSE4_2,
37074 P_PROC_SSE4_2,
37075 P_POPCNT,
37076 P_AES,
37077 P_PCLMUL,
37078 P_AVX,
37079 P_PROC_AVX,
37080 P_BMI,
37081 P_PROC_BMI,
37082 P_FMA4,
37083 P_XOP,
37084 P_PROC_XOP,
37085 P_FMA,
37086 P_PROC_FMA,
37087 P_BMI2,
37088 P_AVX2,
37089 P_PROC_AVX2,
37090 P_AVX512F,
37091 P_PROC_AVX512F
37092 };
37093
37094 enum feature_priority priority = P_ZERO;
37095
37096 /* These are the target attribute strings for which a dispatcher is
37097 available, from fold_builtin_cpu. */
37098
37099 static struct _feature_list
37100 {
37101 const char *const name;
37102 const enum feature_priority priority;
37103 }
37104 const feature_list[] =
37105 {
37106 {"mmx", P_MMX},
37107 {"sse", P_SSE},
37108 {"sse2", P_SSE2},
37109 {"sse3", P_SSE3},
37110 {"sse4a", P_SSE4_A},
37111 {"ssse3", P_SSSE3},
37112 {"sse4.1", P_SSE4_1},
37113 {"sse4.2", P_SSE4_2},
37114 {"popcnt", P_POPCNT},
37115 {"aes", P_AES},
37116 {"pclmul", P_PCLMUL},
37117 {"avx", P_AVX},
37118 {"bmi", P_BMI},
37119 {"fma4", P_FMA4},
37120 {"xop", P_XOP},
37121 {"fma", P_FMA},
37122 {"bmi2", P_BMI2},
37123 {"avx2", P_AVX2},
37124 {"avx512f", P_AVX512F}
37125 };
37126
37127
37128 static unsigned int NUM_FEATURES
37129 = sizeof (feature_list) / sizeof (struct _feature_list);
37130
37131 unsigned int i;
37132
37133 tree predicate_chain = NULL_TREE;
37134 tree predicate_decl, predicate_arg;
37135
37136 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
37137 gcc_assert (attrs != NULL);
37138
37139 attrs = TREE_VALUE (TREE_VALUE (attrs));
37140
37141 gcc_assert (TREE_CODE (attrs) == STRING_CST);
37142 attrs_str = TREE_STRING_POINTER (attrs);
37143
37144 /* Return priority zero for default function. */
37145 if (strcmp (attrs_str, "default") == 0)
37146 return 0;
37147
37148 /* Handle arch= if specified. For priority, set it to be 1 more than
37149 the best instruction set the processor can handle. For instance, if
37150 there is a version for atom and a version for ssse3 (the highest ISA
37151 priority for atom), the atom version must be checked for dispatch
37152 before the ssse3 version. */
37153 if (strstr (attrs_str, "arch=") != NULL)
37154 {
37155 cl_target_option_save (&cur_target, &global_options);
37156 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
37157 &global_options_set);
37158
37159 gcc_assert (target_node);
37160 new_target = TREE_TARGET_OPTION (target_node);
37161 gcc_assert (new_target);
37162
37163 if (new_target->arch_specified && new_target->arch > 0)
37164 {
37165 switch (new_target->arch)
37166 {
37167 case PROCESSOR_CORE2:
37168 arg_str = "core2";
37169 priority = P_PROC_SSSE3;
37170 break;
37171 case PROCESSOR_NEHALEM:
37172 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
37173 arg_str = "westmere";
37174 else
37175 /* We translate "arch=corei7" and "arch=nehalem" to
37176 "corei7" so that it will be mapped to M_INTEL_COREI7
37177 as cpu type to cover all M_INTEL_COREI7_XXXs. */
37178 arg_str = "corei7";
37179 priority = P_PROC_SSE4_2;
37180 break;
37181 case PROCESSOR_SANDYBRIDGE:
37182 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
37183 arg_str = "ivybridge";
37184 else
37185 arg_str = "sandybridge";
37186 priority = P_PROC_AVX;
37187 break;
37188 case PROCESSOR_HASWELL:
37189 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
37190 arg_str = "skylake-avx512";
37191 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
37192 arg_str = "skylake";
37193 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
37194 arg_str = "broadwell";
37195 else
37196 arg_str = "haswell";
37197 priority = P_PROC_AVX2;
37198 break;
37199 case PROCESSOR_BONNELL:
37200 arg_str = "bonnell";
37201 priority = P_PROC_SSSE3;
37202 break;
37203 case PROCESSOR_KNL:
37204 arg_str = "knl";
37205 priority = P_PROC_AVX512F;
37206 break;
37207 case PROCESSOR_SILVERMONT:
37208 arg_str = "silvermont";
37209 priority = P_PROC_SSE4_2;
37210 break;
37211 case PROCESSOR_AMDFAM10:
37212 arg_str = "amdfam10h";
37213 priority = P_PROC_SSE4_A;
37214 break;
37215 case PROCESSOR_BTVER1:
37216 arg_str = "btver1";
37217 priority = P_PROC_SSE4_A;
37218 break;
37219 case PROCESSOR_BTVER2:
37220 arg_str = "btver2";
37221 priority = P_PROC_BMI;
37222 break;
37223 case PROCESSOR_BDVER1:
37224 arg_str = "bdver1";
37225 priority = P_PROC_XOP;
37226 break;
37227 case PROCESSOR_BDVER2:
37228 arg_str = "bdver2";
37229 priority = P_PROC_FMA;
37230 break;
37231 case PROCESSOR_BDVER3:
37232 arg_str = "bdver3";
37233 priority = P_PROC_FMA;
37234 break;
37235 case PROCESSOR_BDVER4:
37236 arg_str = "bdver4";
37237 priority = P_PROC_AVX2;
37238 break;
37239 case PROCESSOR_ZNVER1:
37240 arg_str = "znver1";
37241 priority = P_PROC_AVX2;
37242 break;
37243 }
37244 }
37245
37246 cl_target_option_restore (&global_options, &cur_target);
37247
37248 if (predicate_list && arg_str == NULL)
37249 {
37250 error_at (DECL_SOURCE_LOCATION (decl),
37251 "No dispatcher found for the versioning attributes");
37252 return 0;
37253 }
37254
37255 if (predicate_list)
37256 {
37257 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
37258 /* For a C string literal the length includes the trailing NULL. */
37259 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37260 predicate_chain = tree_cons (predicate_decl, predicate_arg,
37261 predicate_chain);
37262 }
37263 }
37264
37265 /* Process feature name. */
37266 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
37267 strcpy (tok_str, attrs_str);
37268 token = strtok (tok_str, ",");
37269 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
37270
37271 while (token != NULL)
37272 {
37273 /* Do not process "arch=" */
37274 if (strncmp (token, "arch=", 5) == 0)
37275 {
37276 token = strtok (NULL, ",");
37277 continue;
37278 }
37279 for (i = 0; i < NUM_FEATURES; ++i)
37280 {
37281 if (strcmp (token, feature_list[i].name) == 0)
37282 {
37283 if (predicate_list)
37284 {
37285 predicate_arg = build_string_literal (
37286 strlen (feature_list[i].name) + 1,
37287 feature_list[i].name);
37288 predicate_chain = tree_cons (predicate_decl, predicate_arg,
37289 predicate_chain);
37290 }
37291 /* Find the maximum priority feature. */
37292 if (feature_list[i].priority > priority)
37293 priority = feature_list[i].priority;
37294
37295 break;
37296 }
37297 }
37298 if (predicate_list && i == NUM_FEATURES)
37299 {
37300 error_at (DECL_SOURCE_LOCATION (decl),
37301 "No dispatcher found for %s", token);
37302 return 0;
37303 }
37304 token = strtok (NULL, ",");
37305 }
37306 free (tok_str);
37307
37308 if (predicate_list && predicate_chain == NULL_TREE)
37309 {
37310 error_at (DECL_SOURCE_LOCATION (decl),
37311 "No dispatcher found for the versioning attributes : %s",
37312 attrs_str);
37313 return 0;
37314 }
37315 else if (predicate_list)
37316 {
37317 predicate_chain = nreverse (predicate_chain);
37318 *predicate_list = predicate_chain;
37319 }
37320
37321 return priority;
37322 }
37323
37324 /* This compares the priority of target features in function DECL1
37325 and DECL2. It returns positive value if DECL1 is higher priority,
37326 negative value if DECL2 is higher priority and 0 if they are the
37327 same. */
37328
37329 static int
37330 ix86_compare_version_priority (tree decl1, tree decl2)
37331 {
37332 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
37333 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
37334
37335 return (int)priority1 - (int)priority2;
37336 }
37337
37338 /* V1 and V2 point to function versions with different priorities
37339 based on the target ISA. This function compares their priorities. */
37340
37341 static int
37342 feature_compare (const void *v1, const void *v2)
37343 {
37344 typedef struct _function_version_info
37345 {
37346 tree version_decl;
37347 tree predicate_chain;
37348 unsigned int dispatch_priority;
37349 } function_version_info;
37350
37351 const function_version_info c1 = *(const function_version_info *)v1;
37352 const function_version_info c2 = *(const function_version_info *)v2;
37353 return (c2.dispatch_priority - c1.dispatch_priority);
37354 }
37355
37356 /* This function generates the dispatch function for
37357 multi-versioned functions. DISPATCH_DECL is the function which will
37358 contain the dispatch logic. FNDECLS are the function choices for
37359 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
37360 in DISPATCH_DECL in which the dispatch code is generated. */
37361
37362 static int
37363 dispatch_function_versions (tree dispatch_decl,
37364 void *fndecls_p,
37365 basic_block *empty_bb)
37366 {
37367 tree default_decl;
37368 gimple *ifunc_cpu_init_stmt;
37369 gimple_seq gseq;
37370 int ix;
37371 tree ele;
37372 vec<tree> *fndecls;
37373 unsigned int num_versions = 0;
37374 unsigned int actual_versions = 0;
37375 unsigned int i;
37376
37377 struct _function_version_info
37378 {
37379 tree version_decl;
37380 tree predicate_chain;
37381 unsigned int dispatch_priority;
37382 }*function_version_info;
37383
37384 gcc_assert (dispatch_decl != NULL
37385 && fndecls_p != NULL
37386 && empty_bb != NULL);
37387
37388 /*fndecls_p is actually a vector. */
37389 fndecls = static_cast<vec<tree> *> (fndecls_p);
37390
37391 /* At least one more version other than the default. */
37392 num_versions = fndecls->length ();
37393 gcc_assert (num_versions >= 2);
37394
37395 function_version_info = (struct _function_version_info *)
37396 XNEWVEC (struct _function_version_info, (num_versions - 1));
37397
37398 /* The first version in the vector is the default decl. */
37399 default_decl = (*fndecls)[0];
37400
37401 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
37402
37403 gseq = bb_seq (*empty_bb);
37404 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
37405 constructors, so explicity call __builtin_cpu_init here. */
37406 ifunc_cpu_init_stmt = gimple_build_call_vec (
37407 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
37408 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
37409 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
37410 set_bb_seq (*empty_bb, gseq);
37411
37412 pop_cfun ();
37413
37414
37415 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37416 {
37417 tree version_decl = ele;
37418 tree predicate_chain = NULL_TREE;
37419 unsigned int priority;
37420 /* Get attribute string, parse it and find the right predicate decl.
37421 The predicate function could be a lengthy combination of many
37422 features, like arch-type and various isa-variants. */
37423 priority = get_builtin_code_for_version (version_decl,
37424 &predicate_chain);
37425
37426 if (predicate_chain == NULL_TREE)
37427 continue;
37428
37429 function_version_info [actual_versions].version_decl = version_decl;
37430 function_version_info [actual_versions].predicate_chain
37431 = predicate_chain;
37432 function_version_info [actual_versions].dispatch_priority = priority;
37433 actual_versions++;
37434 }
37435
37436 /* Sort the versions according to descending order of dispatch priority. The
37437 priority is based on the ISA. This is not a perfect solution. There
37438 could still be ambiguity. If more than one function version is suitable
37439 to execute, which one should be dispatched? In future, allow the user
37440 to specify a dispatch priority next to the version. */
37441 qsort (function_version_info, actual_versions,
37442 sizeof (struct _function_version_info), feature_compare);
37443
37444 for (i = 0; i < actual_versions; ++i)
37445 *empty_bb = add_condition_to_bb (dispatch_decl,
37446 function_version_info[i].version_decl,
37447 function_version_info[i].predicate_chain,
37448 *empty_bb);
37449
37450 /* dispatch default version at the end. */
37451 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
37452 NULL, *empty_bb);
37453
37454 free (function_version_info);
37455 return 0;
37456 }
37457
37458 /* Comparator function to be used in qsort routine to sort attribute
37459 specification strings to "target". */
37460
37461 static int
37462 attr_strcmp (const void *v1, const void *v2)
37463 {
37464 const char *c1 = *(char *const*)v1;
37465 const char *c2 = *(char *const*)v2;
37466 return strcmp (c1, c2);
37467 }
37468
37469 /* ARGLIST is the argument to target attribute. This function tokenizes
37470 the comma separated arguments, sorts them and returns a string which
37471 is a unique identifier for the comma separated arguments. It also
37472 replaces non-identifier characters "=,-" with "_". */
37473
37474 static char *
37475 sorted_attr_string (tree arglist)
37476 {
37477 tree arg;
37478 size_t str_len_sum = 0;
37479 char **args = NULL;
37480 char *attr_str, *ret_str;
37481 char *attr = NULL;
37482 unsigned int argnum = 1;
37483 unsigned int i;
37484
37485 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
37486 {
37487 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
37488 size_t len = strlen (str);
37489 str_len_sum += len + 1;
37490 if (arg != arglist)
37491 argnum++;
37492 for (i = 0; i < strlen (str); i++)
37493 if (str[i] == ',')
37494 argnum++;
37495 }
37496
37497 attr_str = XNEWVEC (char, str_len_sum);
37498 str_len_sum = 0;
37499 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
37500 {
37501 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
37502 size_t len = strlen (str);
37503 memcpy (attr_str + str_len_sum, str, len);
37504 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
37505 str_len_sum += len + 1;
37506 }
37507
37508 /* Replace "=,-" with "_". */
37509 for (i = 0; i < strlen (attr_str); i++)
37510 if (attr_str[i] == '=' || attr_str[i]== '-')
37511 attr_str[i] = '_';
37512
37513 if (argnum == 1)
37514 return attr_str;
37515
37516 args = XNEWVEC (char *, argnum);
37517
37518 i = 0;
37519 attr = strtok (attr_str, ",");
37520 while (attr != NULL)
37521 {
37522 args[i] = attr;
37523 i++;
37524 attr = strtok (NULL, ",");
37525 }
37526
37527 qsort (args, argnum, sizeof (char *), attr_strcmp);
37528
37529 ret_str = XNEWVEC (char, str_len_sum);
37530 str_len_sum = 0;
37531 for (i = 0; i < argnum; i++)
37532 {
37533 size_t len = strlen (args[i]);
37534 memcpy (ret_str + str_len_sum, args[i], len);
37535 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
37536 str_len_sum += len + 1;
37537 }
37538
37539 XDELETEVEC (args);
37540 XDELETEVEC (attr_str);
37541 return ret_str;
37542 }
37543
37544 /* This function changes the assembler name for functions that are
37545 versions. If DECL is a function version and has a "target"
37546 attribute, it appends the attribute string to its assembler name. */
37547
37548 static tree
37549 ix86_mangle_function_version_assembler_name (tree decl, tree id)
37550 {
37551 tree version_attr;
37552 const char *orig_name, *version_string;
37553 char *attr_str, *assembler_name;
37554
37555 if (DECL_DECLARED_INLINE_P (decl)
37556 && lookup_attribute ("gnu_inline",
37557 DECL_ATTRIBUTES (decl)))
37558 error_at (DECL_SOURCE_LOCATION (decl),
37559 "Function versions cannot be marked as gnu_inline,"
37560 " bodies have to be generated");
37561
37562 if (DECL_VIRTUAL_P (decl)
37563 || DECL_VINDEX (decl))
37564 sorry ("Virtual function multiversioning not supported");
37565
37566 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
37567
37568 /* target attribute string cannot be NULL. */
37569 gcc_assert (version_attr != NULL_TREE);
37570
37571 orig_name = IDENTIFIER_POINTER (id);
37572 version_string
37573 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
37574
37575 if (strcmp (version_string, "default") == 0)
37576 return id;
37577
37578 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
37579 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
37580
37581 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
37582
37583 /* Allow assembler name to be modified if already set. */
37584 if (DECL_ASSEMBLER_NAME_SET_P (decl))
37585 SET_DECL_RTL (decl, NULL);
37586
37587 tree ret = get_identifier (assembler_name);
37588 XDELETEVEC (attr_str);
37589 XDELETEVEC (assembler_name);
37590 return ret;
37591 }
37592
37593 /* This function returns true if FN1 and FN2 are versions of the same function,
37594 that is, the target strings of the function decls are different. This assumes
37595 that FN1 and FN2 have the same signature. */
37596
37597 static bool
37598 ix86_function_versions (tree fn1, tree fn2)
37599 {
37600 tree attr1, attr2;
37601 char *target1, *target2;
37602 bool result;
37603
37604 if (TREE_CODE (fn1) != FUNCTION_DECL
37605 || TREE_CODE (fn2) != FUNCTION_DECL)
37606 return false;
37607
37608 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
37609 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
37610
37611 /* At least one function decl should have the target attribute specified. */
37612 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
37613 return false;
37614
37615 /* Diagnose missing target attribute if one of the decls is already
37616 multi-versioned. */
37617 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
37618 {
37619 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
37620 {
37621 if (attr2 != NULL_TREE)
37622 {
37623 std::swap (fn1, fn2);
37624 attr1 = attr2;
37625 }
37626 error_at (DECL_SOURCE_LOCATION (fn2),
37627 "missing %<target%> attribute for multi-versioned %D",
37628 fn2);
37629 inform (DECL_SOURCE_LOCATION (fn1),
37630 "previous declaration of %D", fn1);
37631 /* Prevent diagnosing of the same error multiple times. */
37632 DECL_ATTRIBUTES (fn2)
37633 = tree_cons (get_identifier ("target"),
37634 copy_node (TREE_VALUE (attr1)),
37635 DECL_ATTRIBUTES (fn2));
37636 }
37637 return false;
37638 }
37639
37640 target1 = sorted_attr_string (TREE_VALUE (attr1));
37641 target2 = sorted_attr_string (TREE_VALUE (attr2));
37642
37643 /* The sorted target strings must be different for fn1 and fn2
37644 to be versions. */
37645 if (strcmp (target1, target2) == 0)
37646 result = false;
37647 else
37648 result = true;
37649
37650 XDELETEVEC (target1);
37651 XDELETEVEC (target2);
37652
37653 return result;
37654 }
37655
37656 static tree
37657 ix86_mangle_decl_assembler_name (tree decl, tree id)
37658 {
37659 /* For function version, add the target suffix to the assembler name. */
37660 if (TREE_CODE (decl) == FUNCTION_DECL
37661 && DECL_FUNCTION_VERSIONED (decl))
37662 id = ix86_mangle_function_version_assembler_name (decl, id);
37663 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
37664 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
37665 #endif
37666
37667 return id;
37668 }
37669
37670 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
37671 is true, append the full path name of the source file. */
37672
37673 static char *
37674 make_name (tree decl, const char *suffix, bool make_unique)
37675 {
37676 char *global_var_name;
37677 int name_len;
37678 const char *name;
37679 const char *unique_name = NULL;
37680
37681 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
37682
37683 /* Get a unique name that can be used globally without any chances
37684 of collision at link time. */
37685 if (make_unique)
37686 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
37687
37688 name_len = strlen (name) + strlen (suffix) + 2;
37689
37690 if (make_unique)
37691 name_len += strlen (unique_name) + 1;
37692 global_var_name = XNEWVEC (char, name_len);
37693
37694 /* Use '.' to concatenate names as it is demangler friendly. */
37695 if (make_unique)
37696 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
37697 suffix);
37698 else
37699 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
37700
37701 return global_var_name;
37702 }
37703
37704 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
37705
37706 /* Make a dispatcher declaration for the multi-versioned function DECL.
37707 Calls to DECL function will be replaced with calls to the dispatcher
37708 by the front-end. Return the decl created. */
37709
37710 static tree
37711 make_dispatcher_decl (const tree decl)
37712 {
37713 tree func_decl;
37714 char *func_name;
37715 tree fn_type, func_type;
37716 bool is_uniq = false;
37717
37718 if (TREE_PUBLIC (decl) == 0)
37719 is_uniq = true;
37720
37721 func_name = make_name (decl, "ifunc", is_uniq);
37722
37723 fn_type = TREE_TYPE (decl);
37724 func_type = build_function_type (TREE_TYPE (fn_type),
37725 TYPE_ARG_TYPES (fn_type));
37726
37727 func_decl = build_fn_decl (func_name, func_type);
37728 XDELETEVEC (func_name);
37729 TREE_USED (func_decl) = 1;
37730 DECL_CONTEXT (func_decl) = NULL_TREE;
37731 DECL_INITIAL (func_decl) = error_mark_node;
37732 DECL_ARTIFICIAL (func_decl) = 1;
37733 /* Mark this func as external, the resolver will flip it again if
37734 it gets generated. */
37735 DECL_EXTERNAL (func_decl) = 1;
37736 /* This will be of type IFUNCs have to be externally visible. */
37737 TREE_PUBLIC (func_decl) = 1;
37738
37739 return func_decl;
37740 }
37741
37742 #endif
37743
37744 /* Returns true if decl is multi-versioned and DECL is the default function,
37745 that is it is not tagged with target specific optimization. */
37746
37747 static bool
37748 is_function_default_version (const tree decl)
37749 {
37750 if (TREE_CODE (decl) != FUNCTION_DECL
37751 || !DECL_FUNCTION_VERSIONED (decl))
37752 return false;
37753 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
37754 gcc_assert (attr);
37755 attr = TREE_VALUE (TREE_VALUE (attr));
37756 return (TREE_CODE (attr) == STRING_CST
37757 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
37758 }
37759
37760 /* Make a dispatcher declaration for the multi-versioned function DECL.
37761 Calls to DECL function will be replaced with calls to the dispatcher
37762 by the front-end. Returns the decl of the dispatcher function. */
37763
37764 static tree
37765 ix86_get_function_versions_dispatcher (void *decl)
37766 {
37767 tree fn = (tree) decl;
37768 struct cgraph_node *node = NULL;
37769 struct cgraph_node *default_node = NULL;
37770 struct cgraph_function_version_info *node_v = NULL;
37771 struct cgraph_function_version_info *first_v = NULL;
37772
37773 tree dispatch_decl = NULL;
37774
37775 struct cgraph_function_version_info *default_version_info = NULL;
37776
37777 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37778
37779 node = cgraph_node::get (fn);
37780 gcc_assert (node != NULL);
37781
37782 node_v = node->function_version ();
37783 gcc_assert (node_v != NULL);
37784
37785 if (node_v->dispatcher_resolver != NULL)
37786 return node_v->dispatcher_resolver;
37787
37788 /* Find the default version and make it the first node. */
37789 first_v = node_v;
37790 /* Go to the beginning of the chain. */
37791 while (first_v->prev != NULL)
37792 first_v = first_v->prev;
37793 default_version_info = first_v;
37794 while (default_version_info != NULL)
37795 {
37796 if (is_function_default_version
37797 (default_version_info->this_node->decl))
37798 break;
37799 default_version_info = default_version_info->next;
37800 }
37801
37802 /* If there is no default node, just return NULL. */
37803 if (default_version_info == NULL)
37804 return NULL;
37805
37806 /* Make default info the first node. */
37807 if (first_v != default_version_info)
37808 {
37809 default_version_info->prev->next = default_version_info->next;
37810 if (default_version_info->next)
37811 default_version_info->next->prev = default_version_info->prev;
37812 first_v->prev = default_version_info;
37813 default_version_info->next = first_v;
37814 default_version_info->prev = NULL;
37815 }
37816
37817 default_node = default_version_info->this_node;
37818
37819 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
37820 if (targetm.has_ifunc_p ())
37821 {
37822 struct cgraph_function_version_info *it_v = NULL;
37823 struct cgraph_node *dispatcher_node = NULL;
37824 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37825
37826 /* Right now, the dispatching is done via ifunc. */
37827 dispatch_decl = make_dispatcher_decl (default_node->decl);
37828
37829 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37830 gcc_assert (dispatcher_node != NULL);
37831 dispatcher_node->dispatcher_function = 1;
37832 dispatcher_version_info
37833 = dispatcher_node->insert_new_function_version ();
37834 dispatcher_version_info->next = default_version_info;
37835 dispatcher_node->definition = 1;
37836
37837 /* Set the dispatcher for all the versions. */
37838 it_v = default_version_info;
37839 while (it_v != NULL)
37840 {
37841 it_v->dispatcher_resolver = dispatch_decl;
37842 it_v = it_v->next;
37843 }
37844 }
37845 else
37846 #endif
37847 {
37848 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37849 "multiversioning needs ifunc which is not supported "
37850 "on this target");
37851 }
37852
37853 return dispatch_decl;
37854 }
37855
37856 /* Make the resolver function decl to dispatch the versions of
37857 a multi-versioned function, DEFAULT_DECL. Create an
37858 empty basic block in the resolver and store the pointer in
37859 EMPTY_BB. Return the decl of the resolver function. */
37860
37861 static tree
37862 make_resolver_func (const tree default_decl,
37863 const tree dispatch_decl,
37864 basic_block *empty_bb)
37865 {
37866 char *resolver_name;
37867 tree decl, type, decl_name, t;
37868 bool is_uniq = false;
37869
37870 /* IFUNC's have to be globally visible. So, if the default_decl is
37871 not, then the name of the IFUNC should be made unique. */
37872 if (TREE_PUBLIC (default_decl) == 0)
37873 is_uniq = true;
37874
37875 /* Append the filename to the resolver function if the versions are
37876 not externally visible. This is because the resolver function has
37877 to be externally visible for the loader to find it. So, appending
37878 the filename will prevent conflicts with a resolver function from
37879 another module which is based on the same version name. */
37880 resolver_name = make_name (default_decl, "resolver", is_uniq);
37881
37882 /* The resolver function should return a (void *). */
37883 type = build_function_type_list (ptr_type_node, NULL_TREE);
37884
37885 decl = build_fn_decl (resolver_name, type);
37886 decl_name = get_identifier (resolver_name);
37887 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37888
37889 DECL_NAME (decl) = decl_name;
37890 TREE_USED (decl) = 1;
37891 DECL_ARTIFICIAL (decl) = 1;
37892 DECL_IGNORED_P (decl) = 0;
37893 /* IFUNC resolvers have to be externally visible. */
37894 TREE_PUBLIC (decl) = 1;
37895 DECL_UNINLINABLE (decl) = 1;
37896
37897 /* Resolver is not external, body is generated. */
37898 DECL_EXTERNAL (decl) = 0;
37899 DECL_EXTERNAL (dispatch_decl) = 0;
37900
37901 DECL_CONTEXT (decl) = NULL_TREE;
37902 DECL_INITIAL (decl) = make_node (BLOCK);
37903 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37904
37905 if (DECL_COMDAT_GROUP (default_decl)
37906 || TREE_PUBLIC (default_decl))
37907 {
37908 /* In this case, each translation unit with a call to this
37909 versioned function will put out a resolver. Ensure it
37910 is comdat to keep just one copy. */
37911 DECL_COMDAT (decl) = 1;
37912 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
37913 }
37914 /* Build result decl and add to function_decl. */
37915 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37916 DECL_ARTIFICIAL (t) = 1;
37917 DECL_IGNORED_P (t) = 1;
37918 DECL_RESULT (decl) = t;
37919
37920 gimplify_function_tree (decl);
37921 push_cfun (DECL_STRUCT_FUNCTION (decl));
37922 *empty_bb = init_lowered_empty_function (decl, false, 0);
37923
37924 cgraph_node::add_new_function (decl, true);
37925 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37926
37927 pop_cfun ();
37928
37929 gcc_assert (dispatch_decl != NULL);
37930 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37931 DECL_ATTRIBUTES (dispatch_decl)
37932 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37933
37934 /* Create the alias for dispatch to resolver here. */
37935 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37936 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37937 XDELETEVEC (resolver_name);
37938 return decl;
37939 }
37940
37941 /* Generate the dispatching code body to dispatch multi-versioned function
37942 DECL. The target hook is called to process the "target" attributes and
37943 provide the code to dispatch the right function at run-time. NODE points
37944 to the dispatcher decl whose body will be created. */
37945
37946 static tree
37947 ix86_generate_version_dispatcher_body (void *node_p)
37948 {
37949 tree resolver_decl;
37950 basic_block empty_bb;
37951 tree default_ver_decl;
37952 struct cgraph_node *versn;
37953 struct cgraph_node *node;
37954
37955 struct cgraph_function_version_info *node_version_info = NULL;
37956 struct cgraph_function_version_info *versn_info = NULL;
37957
37958 node = (cgraph_node *)node_p;
37959
37960 node_version_info = node->function_version ();
37961 gcc_assert (node->dispatcher_function
37962 && node_version_info != NULL);
37963
37964 if (node_version_info->dispatcher_resolver)
37965 return node_version_info->dispatcher_resolver;
37966
37967 /* The first version in the chain corresponds to the default version. */
37968 default_ver_decl = node_version_info->next->this_node->decl;
37969
37970 /* node is going to be an alias, so remove the finalized bit. */
37971 node->definition = false;
37972
37973 resolver_decl = make_resolver_func (default_ver_decl,
37974 node->decl, &empty_bb);
37975
37976 node_version_info->dispatcher_resolver = resolver_decl;
37977
37978 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37979
37980 auto_vec<tree, 2> fn_ver_vec;
37981
37982 for (versn_info = node_version_info->next; versn_info;
37983 versn_info = versn_info->next)
37984 {
37985 versn = versn_info->this_node;
37986 /* Check for virtual functions here again, as by this time it should
37987 have been determined if this function needs a vtable index or
37988 not. This happens for methods in derived classes that override
37989 virtual methods in base classes but are not explicitly marked as
37990 virtual. */
37991 if (DECL_VINDEX (versn->decl))
37992 sorry ("Virtual function multiversioning not supported");
37993
37994 fn_ver_vec.safe_push (versn->decl);
37995 }
37996
37997 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37998 cgraph_edge::rebuild_edges ();
37999 pop_cfun ();
38000 return resolver_decl;
38001 }
38002 /* This builds the processor_model struct type defined in
38003 libgcc/config/i386/cpuinfo.c */
38004
38005 static tree
38006 build_processor_model_struct (void)
38007 {
38008 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
38009 "__cpu_features"};
38010 tree field = NULL_TREE, field_chain = NULL_TREE;
38011 int i;
38012 tree type = make_node (RECORD_TYPE);
38013
38014 /* The first 3 fields are unsigned int. */
38015 for (i = 0; i < 3; ++i)
38016 {
38017 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
38018 get_identifier (field_name[i]), unsigned_type_node);
38019 if (field_chain != NULL_TREE)
38020 DECL_CHAIN (field) = field_chain;
38021 field_chain = field;
38022 }
38023
38024 /* The last field is an array of unsigned integers of size one. */
38025 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
38026 get_identifier (field_name[3]),
38027 build_array_type (unsigned_type_node,
38028 build_index_type (size_one_node)));
38029 if (field_chain != NULL_TREE)
38030 DECL_CHAIN (field) = field_chain;
38031 field_chain = field;
38032
38033 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
38034 return type;
38035 }
38036
38037 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
38038
38039 static tree
38040 make_var_decl (tree type, const char *name)
38041 {
38042 tree new_decl;
38043
38044 new_decl = build_decl (UNKNOWN_LOCATION,
38045 VAR_DECL,
38046 get_identifier(name),
38047 type);
38048
38049 DECL_EXTERNAL (new_decl) = 1;
38050 TREE_STATIC (new_decl) = 1;
38051 TREE_PUBLIC (new_decl) = 1;
38052 DECL_INITIAL (new_decl) = 0;
38053 DECL_ARTIFICIAL (new_decl) = 0;
38054 DECL_PRESERVE_P (new_decl) = 1;
38055
38056 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
38057 assemble_variable (new_decl, 0, 0, 0);
38058
38059 return new_decl;
38060 }
38061
38062 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
38063 into an integer defined in libgcc/config/i386/cpuinfo.c */
38064
38065 static tree
38066 fold_builtin_cpu (tree fndecl, tree *args)
38067 {
38068 unsigned int i;
38069 enum ix86_builtins fn_code = (enum ix86_builtins)
38070 DECL_FUNCTION_CODE (fndecl);
38071 tree param_string_cst = NULL;
38072
38073 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
38074 enum processor_features
38075 {
38076 F_CMOV = 0,
38077 F_MMX,
38078 F_POPCNT,
38079 F_SSE,
38080 F_SSE2,
38081 F_SSE3,
38082 F_SSSE3,
38083 F_SSE4_1,
38084 F_SSE4_2,
38085 F_AVX,
38086 F_AVX2,
38087 F_SSE4_A,
38088 F_FMA4,
38089 F_XOP,
38090 F_FMA,
38091 F_AVX512F,
38092 F_BMI,
38093 F_BMI2,
38094 F_AES,
38095 F_PCLMUL,
38096 F_AVX512VL,
38097 F_AVX512BW,
38098 F_AVX512DQ,
38099 F_AVX512CD,
38100 F_AVX512ER,
38101 F_AVX512PF,
38102 F_AVX512VBMI,
38103 F_AVX512IFMA,
38104 F_MAX
38105 };
38106
38107 /* These are the values for vendor types and cpu types and subtypes
38108 in cpuinfo.c. Cpu types and subtypes should be subtracted by
38109 the corresponding start value. */
38110 enum processor_model
38111 {
38112 M_INTEL = 1,
38113 M_AMD,
38114 M_CPU_TYPE_START,
38115 M_INTEL_BONNELL,
38116 M_INTEL_CORE2,
38117 M_INTEL_COREI7,
38118 M_AMDFAM10H,
38119 M_AMDFAM15H,
38120 M_INTEL_SILVERMONT,
38121 M_INTEL_KNL,
38122 M_AMD_BTVER1,
38123 M_AMD_BTVER2,
38124 M_CPU_SUBTYPE_START,
38125 M_INTEL_COREI7_NEHALEM,
38126 M_INTEL_COREI7_WESTMERE,
38127 M_INTEL_COREI7_SANDYBRIDGE,
38128 M_AMDFAM10H_BARCELONA,
38129 M_AMDFAM10H_SHANGHAI,
38130 M_AMDFAM10H_ISTANBUL,
38131 M_AMDFAM15H_BDVER1,
38132 M_AMDFAM15H_BDVER2,
38133 M_AMDFAM15H_BDVER3,
38134 M_AMDFAM15H_BDVER4,
38135 M_AMDFAM17H_ZNVER1,
38136 M_INTEL_COREI7_IVYBRIDGE,
38137 M_INTEL_COREI7_HASWELL,
38138 M_INTEL_COREI7_BROADWELL,
38139 M_INTEL_COREI7_SKYLAKE,
38140 M_INTEL_COREI7_SKYLAKE_AVX512
38141 };
38142
38143 static struct _arch_names_table
38144 {
38145 const char *const name;
38146 const enum processor_model model;
38147 }
38148 const arch_names_table[] =
38149 {
38150 {"amd", M_AMD},
38151 {"intel", M_INTEL},
38152 {"atom", M_INTEL_BONNELL},
38153 {"slm", M_INTEL_SILVERMONT},
38154 {"core2", M_INTEL_CORE2},
38155 {"corei7", M_INTEL_COREI7},
38156 {"nehalem", M_INTEL_COREI7_NEHALEM},
38157 {"westmere", M_INTEL_COREI7_WESTMERE},
38158 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
38159 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
38160 {"haswell", M_INTEL_COREI7_HASWELL},
38161 {"broadwell", M_INTEL_COREI7_BROADWELL},
38162 {"skylake", M_INTEL_COREI7_SKYLAKE},
38163 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
38164 {"bonnell", M_INTEL_BONNELL},
38165 {"silvermont", M_INTEL_SILVERMONT},
38166 {"knl", M_INTEL_KNL},
38167 {"amdfam10h", M_AMDFAM10H},
38168 {"barcelona", M_AMDFAM10H_BARCELONA},
38169 {"shanghai", M_AMDFAM10H_SHANGHAI},
38170 {"istanbul", M_AMDFAM10H_ISTANBUL},
38171 {"btver1", M_AMD_BTVER1},
38172 {"amdfam15h", M_AMDFAM15H},
38173 {"bdver1", M_AMDFAM15H_BDVER1},
38174 {"bdver2", M_AMDFAM15H_BDVER2},
38175 {"bdver3", M_AMDFAM15H_BDVER3},
38176 {"bdver4", M_AMDFAM15H_BDVER4},
38177 {"btver2", M_AMD_BTVER2},
38178 {"znver1", M_AMDFAM17H_ZNVER1},
38179 };
38180
38181 static struct _isa_names_table
38182 {
38183 const char *const name;
38184 const enum processor_features feature;
38185 }
38186 const isa_names_table[] =
38187 {
38188 {"cmov", F_CMOV},
38189 {"mmx", F_MMX},
38190 {"popcnt", F_POPCNT},
38191 {"sse", F_SSE},
38192 {"sse2", F_SSE2},
38193 {"sse3", F_SSE3},
38194 {"ssse3", F_SSSE3},
38195 {"sse4a", F_SSE4_A},
38196 {"sse4.1", F_SSE4_1},
38197 {"sse4.2", F_SSE4_2},
38198 {"avx", F_AVX},
38199 {"fma4", F_FMA4},
38200 {"xop", F_XOP},
38201 {"fma", F_FMA},
38202 {"avx2", F_AVX2},
38203 {"avx512f", F_AVX512F},
38204 {"bmi", F_BMI},
38205 {"bmi2", F_BMI2},
38206 {"aes", F_AES},
38207 {"pclmul", F_PCLMUL},
38208 {"avx512vl",F_AVX512VL},
38209 {"avx512bw",F_AVX512BW},
38210 {"avx512dq",F_AVX512DQ},
38211 {"avx512cd",F_AVX512CD},
38212 {"avx512er",F_AVX512ER},
38213 {"avx512pf",F_AVX512PF},
38214 {"avx512vbmi",F_AVX512VBMI},
38215 {"avx512ifma",F_AVX512IFMA},
38216 };
38217
38218 tree __processor_model_type = build_processor_model_struct ();
38219 tree __cpu_model_var = make_var_decl (__processor_model_type,
38220 "__cpu_model");
38221
38222
38223 varpool_node::add (__cpu_model_var);
38224
38225 gcc_assert ((args != NULL) && (*args != NULL));
38226
38227 param_string_cst = *args;
38228 while (param_string_cst
38229 && TREE_CODE (param_string_cst) != STRING_CST)
38230 {
38231 /* *args must be a expr that can contain other EXPRS leading to a
38232 STRING_CST. */
38233 if (!EXPR_P (param_string_cst))
38234 {
38235 error ("Parameter to builtin must be a string constant or literal");
38236 return integer_zero_node;
38237 }
38238 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
38239 }
38240
38241 gcc_assert (param_string_cst);
38242
38243 if (fn_code == IX86_BUILTIN_CPU_IS)
38244 {
38245 tree ref;
38246 tree field;
38247 tree final;
38248
38249 unsigned int field_val = 0;
38250 unsigned int NUM_ARCH_NAMES
38251 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
38252
38253 for (i = 0; i < NUM_ARCH_NAMES; i++)
38254 if (strcmp (arch_names_table[i].name,
38255 TREE_STRING_POINTER (param_string_cst)) == 0)
38256 break;
38257
38258 if (i == NUM_ARCH_NAMES)
38259 {
38260 error ("Parameter to builtin not valid: %s",
38261 TREE_STRING_POINTER (param_string_cst));
38262 return integer_zero_node;
38263 }
38264
38265 field = TYPE_FIELDS (__processor_model_type);
38266 field_val = arch_names_table[i].model;
38267
38268 /* CPU types are stored in the next field. */
38269 if (field_val > M_CPU_TYPE_START
38270 && field_val < M_CPU_SUBTYPE_START)
38271 {
38272 field = DECL_CHAIN (field);
38273 field_val -= M_CPU_TYPE_START;
38274 }
38275
38276 /* CPU subtypes are stored in the next field. */
38277 if (field_val > M_CPU_SUBTYPE_START)
38278 {
38279 field = DECL_CHAIN ( DECL_CHAIN (field));
38280 field_val -= M_CPU_SUBTYPE_START;
38281 }
38282
38283 /* Get the appropriate field in __cpu_model. */
38284 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
38285 field, NULL_TREE);
38286
38287 /* Check the value. */
38288 final = build2 (EQ_EXPR, unsigned_type_node, ref,
38289 build_int_cstu (unsigned_type_node, field_val));
38290 return build1 (CONVERT_EXPR, integer_type_node, final);
38291 }
38292 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
38293 {
38294 tree ref;
38295 tree array_elt;
38296 tree field;
38297 tree final;
38298
38299 unsigned int field_val = 0;
38300 unsigned int NUM_ISA_NAMES
38301 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
38302
38303 for (i = 0; i < NUM_ISA_NAMES; i++)
38304 if (strcmp (isa_names_table[i].name,
38305 TREE_STRING_POINTER (param_string_cst)) == 0)
38306 break;
38307
38308 if (i == NUM_ISA_NAMES)
38309 {
38310 error ("Parameter to builtin not valid: %s",
38311 TREE_STRING_POINTER (param_string_cst));
38312 return integer_zero_node;
38313 }
38314
38315 field = TYPE_FIELDS (__processor_model_type);
38316 /* Get the last field, which is __cpu_features. */
38317 while (DECL_CHAIN (field))
38318 field = DECL_CHAIN (field);
38319
38320 /* Get the appropriate field: __cpu_model.__cpu_features */
38321 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
38322 field, NULL_TREE);
38323
38324 /* Access the 0th element of __cpu_features array. */
38325 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
38326 integer_zero_node, NULL_TREE, NULL_TREE);
38327
38328 field_val = (1 << isa_names_table[i].feature);
38329 /* Return __cpu_model.__cpu_features[0] & field_val */
38330 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
38331 build_int_cstu (unsigned_type_node, field_val));
38332 return build1 (CONVERT_EXPR, integer_type_node, final);
38333 }
38334 gcc_unreachable ();
38335 }
38336
38337 static tree
38338 ix86_fold_builtin (tree fndecl, int n_args,
38339 tree *args, bool ignore ATTRIBUTE_UNUSED)
38340 {
38341 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
38342 {
38343 enum ix86_builtins fn_code = (enum ix86_builtins)
38344 DECL_FUNCTION_CODE (fndecl);
38345 switch (fn_code)
38346 {
38347 case IX86_BUILTIN_CPU_IS:
38348 case IX86_BUILTIN_CPU_SUPPORTS:
38349 gcc_assert (n_args == 1);
38350 return fold_builtin_cpu (fndecl, args);
38351
38352 case IX86_BUILTIN_NANQ:
38353 case IX86_BUILTIN_NANSQ:
38354 {
38355 tree type = TREE_TYPE (TREE_TYPE (fndecl));
38356 const char *str = c_getstr (*args);
38357 int quiet = fn_code == IX86_BUILTIN_NANQ;
38358 REAL_VALUE_TYPE real;
38359
38360 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
38361 return build_real (type, real);
38362 return NULL_TREE;
38363 }
38364
38365 default:
38366 break;
38367 }
38368 }
38369
38370 #ifdef SUBTARGET_FOLD_BUILTIN
38371 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
38372 #endif
38373
38374 return NULL_TREE;
38375 }
38376
38377 /* Make builtins to detect cpu type and features supported. NAME is
38378 the builtin name, CODE is the builtin code, and FTYPE is the function
38379 type of the builtin. */
38380
38381 static void
38382 make_cpu_type_builtin (const char* name, int code,
38383 enum ix86_builtin_func_type ftype, bool is_const)
38384 {
38385 tree decl;
38386 tree type;
38387
38388 type = ix86_get_builtin_func_type (ftype);
38389 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
38390 NULL, NULL_TREE);
38391 gcc_assert (decl != NULL_TREE);
38392 ix86_builtins[(int) code] = decl;
38393 TREE_READONLY (decl) = is_const;
38394 }
38395
38396 /* Make builtins to get CPU type and features supported. The created
38397 builtins are :
38398
38399 __builtin_cpu_init (), to detect cpu type and features,
38400 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
38401 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
38402 */
38403
38404 static void
38405 ix86_init_platform_type_builtins (void)
38406 {
38407 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
38408 INT_FTYPE_VOID, false);
38409 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
38410 INT_FTYPE_PCCHAR, true);
38411 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
38412 INT_FTYPE_PCCHAR, true);
38413 }
38414
38415 /* Internal method for ix86_init_builtins. */
38416
38417 static void
38418 ix86_init_builtins_va_builtins_abi (void)
38419 {
38420 tree ms_va_ref, sysv_va_ref;
38421 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
38422 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
38423 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
38424 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
38425
38426 if (!TARGET_64BIT)
38427 return;
38428 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
38429 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
38430 ms_va_ref = build_reference_type (ms_va_list_type_node);
38431 sysv_va_ref =
38432 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
38433
38434 fnvoid_va_end_ms =
38435 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
38436 fnvoid_va_start_ms =
38437 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
38438 fnvoid_va_end_sysv =
38439 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
38440 fnvoid_va_start_sysv =
38441 build_varargs_function_type_list (void_type_node, sysv_va_ref,
38442 NULL_TREE);
38443 fnvoid_va_copy_ms =
38444 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
38445 NULL_TREE);
38446 fnvoid_va_copy_sysv =
38447 build_function_type_list (void_type_node, sysv_va_ref,
38448 sysv_va_ref, NULL_TREE);
38449
38450 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
38451 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
38452 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
38453 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
38454 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
38455 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
38456 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
38457 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
38458 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
38459 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
38460 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
38461 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
38462 }
38463
38464 static void
38465 ix86_init_builtin_types (void)
38466 {
38467 tree float128_type_node, float80_type_node, const_string_type_node;
38468
38469 /* The __float80 type. */
38470 float80_type_node = long_double_type_node;
38471 if (TYPE_MODE (float80_type_node) != XFmode)
38472 {
38473 /* The __float80 type. */
38474 float80_type_node = make_node (REAL_TYPE);
38475
38476 TYPE_PRECISION (float80_type_node) = 80;
38477 layout_type (float80_type_node);
38478 }
38479 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
38480
38481 /* The __float128 type. */
38482 float128_type_node = make_node (REAL_TYPE);
38483 TYPE_PRECISION (float128_type_node) = 128;
38484 layout_type (float128_type_node);
38485 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
38486
38487 const_string_type_node
38488 = build_pointer_type (build_qualified_type
38489 (char_type_node, TYPE_QUAL_CONST));
38490
38491 /* This macro is built by i386-builtin-types.awk. */
38492 DEFINE_BUILTIN_PRIMITIVE_TYPES;
38493 }
38494
38495 static void
38496 ix86_init_builtins (void)
38497 {
38498 tree ftype, decl;
38499
38500 ix86_init_builtin_types ();
38501
38502 /* Builtins to get CPU type and features. */
38503 ix86_init_platform_type_builtins ();
38504
38505 /* TFmode support builtins. */
38506 def_builtin_const (0, "__builtin_infq",
38507 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
38508 def_builtin_const (0, "__builtin_huge_valq",
38509 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
38510
38511 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
38512 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
38513 BUILT_IN_MD, "nanq", NULL_TREE);
38514 TREE_READONLY (decl) = 1;
38515 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
38516
38517 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
38518 BUILT_IN_MD, "nansq", NULL_TREE);
38519 TREE_READONLY (decl) = 1;
38520 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
38521
38522 /* We will expand them to normal call if SSE isn't available since
38523 they are used by libgcc. */
38524 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
38525 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
38526 BUILT_IN_MD, "__fabstf2", NULL_TREE);
38527 TREE_READONLY (decl) = 1;
38528 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
38529
38530 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
38531 decl = add_builtin_function ("__builtin_copysignq", ftype,
38532 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
38533 "__copysigntf3", NULL_TREE);
38534 TREE_READONLY (decl) = 1;
38535 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
38536
38537 ix86_init_tm_builtins ();
38538 ix86_init_mmx_sse_builtins ();
38539 ix86_init_mpx_builtins ();
38540
38541 if (TARGET_LP64)
38542 ix86_init_builtins_va_builtins_abi ();
38543
38544 #ifdef SUBTARGET_INIT_BUILTINS
38545 SUBTARGET_INIT_BUILTINS;
38546 #endif
38547 }
38548
38549 /* Return the ix86 builtin for CODE. */
38550
38551 static tree
38552 ix86_builtin_decl (unsigned code, bool)
38553 {
38554 if (code >= IX86_BUILTIN_MAX)
38555 return error_mark_node;
38556
38557 return ix86_builtins[code];
38558 }
38559
38560 /* Errors in the source file can cause expand_expr to return const0_rtx
38561 where we expect a vector. To avoid crashing, use one of the vector
38562 clear instructions. */
38563 static rtx
38564 safe_vector_operand (rtx x, machine_mode mode)
38565 {
38566 if (x == const0_rtx)
38567 x = CONST0_RTX (mode);
38568 return x;
38569 }
38570
38571 /* Fixup modeless constants to fit required mode. */
38572 static rtx
38573 fixup_modeless_constant (rtx x, machine_mode mode)
38574 {
38575 if (GET_MODE (x) == VOIDmode)
38576 x = convert_to_mode (mode, x, 1);
38577 return x;
38578 }
38579
38580 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
38581
38582 static rtx
38583 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
38584 {
38585 rtx pat;
38586 tree arg0 = CALL_EXPR_ARG (exp, 0);
38587 tree arg1 = CALL_EXPR_ARG (exp, 1);
38588 rtx op0 = expand_normal (arg0);
38589 rtx op1 = expand_normal (arg1);
38590 machine_mode tmode = insn_data[icode].operand[0].mode;
38591 machine_mode mode0 = insn_data[icode].operand[1].mode;
38592 machine_mode mode1 = insn_data[icode].operand[2].mode;
38593
38594 if (VECTOR_MODE_P (mode0))
38595 op0 = safe_vector_operand (op0, mode0);
38596 if (VECTOR_MODE_P (mode1))
38597 op1 = safe_vector_operand (op1, mode1);
38598
38599 if (optimize || !target
38600 || GET_MODE (target) != tmode
38601 || !insn_data[icode].operand[0].predicate (target, tmode))
38602 target = gen_reg_rtx (tmode);
38603
38604 if (GET_MODE (op1) == SImode && mode1 == TImode)
38605 {
38606 rtx x = gen_reg_rtx (V4SImode);
38607 emit_insn (gen_sse2_loadd (x, op1));
38608 op1 = gen_lowpart (TImode, x);
38609 }
38610
38611 if (!insn_data[icode].operand[1].predicate (op0, mode0))
38612 op0 = copy_to_mode_reg (mode0, op0);
38613 if (!insn_data[icode].operand[2].predicate (op1, mode1))
38614 op1 = copy_to_mode_reg (mode1, op1);
38615
38616 pat = GEN_FCN (icode) (target, op0, op1);
38617 if (! pat)
38618 return 0;
38619
38620 emit_insn (pat);
38621
38622 return target;
38623 }
38624
38625 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
38626
38627 static rtx
38628 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
38629 enum ix86_builtin_func_type m_type,
38630 enum rtx_code sub_code)
38631 {
38632 rtx pat;
38633 int i;
38634 int nargs;
38635 bool comparison_p = false;
38636 bool tf_p = false;
38637 bool last_arg_constant = false;
38638 int num_memory = 0;
38639 struct {
38640 rtx op;
38641 machine_mode mode;
38642 } args[4];
38643
38644 machine_mode tmode = insn_data[icode].operand[0].mode;
38645
38646 switch (m_type)
38647 {
38648 case MULTI_ARG_4_DF2_DI_I:
38649 case MULTI_ARG_4_DF2_DI_I1:
38650 case MULTI_ARG_4_SF2_SI_I:
38651 case MULTI_ARG_4_SF2_SI_I1:
38652 nargs = 4;
38653 last_arg_constant = true;
38654 break;
38655
38656 case MULTI_ARG_3_SF:
38657 case MULTI_ARG_3_DF:
38658 case MULTI_ARG_3_SF2:
38659 case MULTI_ARG_3_DF2:
38660 case MULTI_ARG_3_DI:
38661 case MULTI_ARG_3_SI:
38662 case MULTI_ARG_3_SI_DI:
38663 case MULTI_ARG_3_HI:
38664 case MULTI_ARG_3_HI_SI:
38665 case MULTI_ARG_3_QI:
38666 case MULTI_ARG_3_DI2:
38667 case MULTI_ARG_3_SI2:
38668 case MULTI_ARG_3_HI2:
38669 case MULTI_ARG_3_QI2:
38670 nargs = 3;
38671 break;
38672
38673 case MULTI_ARG_2_SF:
38674 case MULTI_ARG_2_DF:
38675 case MULTI_ARG_2_DI:
38676 case MULTI_ARG_2_SI:
38677 case MULTI_ARG_2_HI:
38678 case MULTI_ARG_2_QI:
38679 nargs = 2;
38680 break;
38681
38682 case MULTI_ARG_2_DI_IMM:
38683 case MULTI_ARG_2_SI_IMM:
38684 case MULTI_ARG_2_HI_IMM:
38685 case MULTI_ARG_2_QI_IMM:
38686 nargs = 2;
38687 last_arg_constant = true;
38688 break;
38689
38690 case MULTI_ARG_1_SF:
38691 case MULTI_ARG_1_DF:
38692 case MULTI_ARG_1_SF2:
38693 case MULTI_ARG_1_DF2:
38694 case MULTI_ARG_1_DI:
38695 case MULTI_ARG_1_SI:
38696 case MULTI_ARG_1_HI:
38697 case MULTI_ARG_1_QI:
38698 case MULTI_ARG_1_SI_DI:
38699 case MULTI_ARG_1_HI_DI:
38700 case MULTI_ARG_1_HI_SI:
38701 case MULTI_ARG_1_QI_DI:
38702 case MULTI_ARG_1_QI_SI:
38703 case MULTI_ARG_1_QI_HI:
38704 nargs = 1;
38705 break;
38706
38707 case MULTI_ARG_2_DI_CMP:
38708 case MULTI_ARG_2_SI_CMP:
38709 case MULTI_ARG_2_HI_CMP:
38710 case MULTI_ARG_2_QI_CMP:
38711 nargs = 2;
38712 comparison_p = true;
38713 break;
38714
38715 case MULTI_ARG_2_SF_TF:
38716 case MULTI_ARG_2_DF_TF:
38717 case MULTI_ARG_2_DI_TF:
38718 case MULTI_ARG_2_SI_TF:
38719 case MULTI_ARG_2_HI_TF:
38720 case MULTI_ARG_2_QI_TF:
38721 nargs = 2;
38722 tf_p = true;
38723 break;
38724
38725 default:
38726 gcc_unreachable ();
38727 }
38728
38729 if (optimize || !target
38730 || GET_MODE (target) != tmode
38731 || !insn_data[icode].operand[0].predicate (target, tmode))
38732 target = gen_reg_rtx (tmode);
38733
38734 gcc_assert (nargs <= 4);
38735
38736 for (i = 0; i < nargs; i++)
38737 {
38738 tree arg = CALL_EXPR_ARG (exp, i);
38739 rtx op = expand_normal (arg);
38740 int adjust = (comparison_p) ? 1 : 0;
38741 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
38742
38743 if (last_arg_constant && i == nargs - 1)
38744 {
38745 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
38746 {
38747 enum insn_code new_icode = icode;
38748 switch (icode)
38749 {
38750 case CODE_FOR_xop_vpermil2v2df3:
38751 case CODE_FOR_xop_vpermil2v4sf3:
38752 case CODE_FOR_xop_vpermil2v4df3:
38753 case CODE_FOR_xop_vpermil2v8sf3:
38754 error ("the last argument must be a 2-bit immediate");
38755 return gen_reg_rtx (tmode);
38756 case CODE_FOR_xop_rotlv2di3:
38757 new_icode = CODE_FOR_rotlv2di3;
38758 goto xop_rotl;
38759 case CODE_FOR_xop_rotlv4si3:
38760 new_icode = CODE_FOR_rotlv4si3;
38761 goto xop_rotl;
38762 case CODE_FOR_xop_rotlv8hi3:
38763 new_icode = CODE_FOR_rotlv8hi3;
38764 goto xop_rotl;
38765 case CODE_FOR_xop_rotlv16qi3:
38766 new_icode = CODE_FOR_rotlv16qi3;
38767 xop_rotl:
38768 if (CONST_INT_P (op))
38769 {
38770 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
38771 op = GEN_INT (INTVAL (op) & mask);
38772 gcc_checking_assert
38773 (insn_data[icode].operand[i + 1].predicate (op, mode));
38774 }
38775 else
38776 {
38777 gcc_checking_assert
38778 (nargs == 2
38779 && insn_data[new_icode].operand[0].mode == tmode
38780 && insn_data[new_icode].operand[1].mode == tmode
38781 && insn_data[new_icode].operand[2].mode == mode
38782 && insn_data[new_icode].operand[0].predicate
38783 == insn_data[icode].operand[0].predicate
38784 && insn_data[new_icode].operand[1].predicate
38785 == insn_data[icode].operand[1].predicate);
38786 icode = new_icode;
38787 goto non_constant;
38788 }
38789 break;
38790 default:
38791 gcc_unreachable ();
38792 }
38793 }
38794 }
38795 else
38796 {
38797 non_constant:
38798 if (VECTOR_MODE_P (mode))
38799 op = safe_vector_operand (op, mode);
38800
38801 /* If we aren't optimizing, only allow one memory operand to be
38802 generated. */
38803 if (memory_operand (op, mode))
38804 num_memory++;
38805
38806 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
38807
38808 if (optimize
38809 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
38810 || num_memory > 1)
38811 op = force_reg (mode, op);
38812 }
38813
38814 args[i].op = op;
38815 args[i].mode = mode;
38816 }
38817
38818 switch (nargs)
38819 {
38820 case 1:
38821 pat = GEN_FCN (icode) (target, args[0].op);
38822 break;
38823
38824 case 2:
38825 if (tf_p)
38826 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38827 GEN_INT ((int)sub_code));
38828 else if (! comparison_p)
38829 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38830 else
38831 {
38832 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
38833 args[0].op,
38834 args[1].op);
38835
38836 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
38837 }
38838 break;
38839
38840 case 3:
38841 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38842 break;
38843
38844 case 4:
38845 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
38846 break;
38847
38848 default:
38849 gcc_unreachable ();
38850 }
38851
38852 if (! pat)
38853 return 0;
38854
38855 emit_insn (pat);
38856 return target;
38857 }
38858
38859 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
38860 insns with vec_merge. */
38861
38862 static rtx
38863 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
38864 rtx target)
38865 {
38866 rtx pat;
38867 tree arg0 = CALL_EXPR_ARG (exp, 0);
38868 rtx op1, op0 = expand_normal (arg0);
38869 machine_mode tmode = insn_data[icode].operand[0].mode;
38870 machine_mode mode0 = insn_data[icode].operand[1].mode;
38871
38872 if (optimize || !target
38873 || GET_MODE (target) != tmode
38874 || !insn_data[icode].operand[0].predicate (target, tmode))
38875 target = gen_reg_rtx (tmode);
38876
38877 if (VECTOR_MODE_P (mode0))
38878 op0 = safe_vector_operand (op0, mode0);
38879
38880 if ((optimize && !register_operand (op0, mode0))
38881 || !insn_data[icode].operand[1].predicate (op0, mode0))
38882 op0 = copy_to_mode_reg (mode0, op0);
38883
38884 op1 = op0;
38885 if (!insn_data[icode].operand[2].predicate (op1, mode0))
38886 op1 = copy_to_mode_reg (mode0, op1);
38887
38888 pat = GEN_FCN (icode) (target, op0, op1);
38889 if (! pat)
38890 return 0;
38891 emit_insn (pat);
38892 return target;
38893 }
38894
38895 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
38896
38897 static rtx
38898 ix86_expand_sse_compare (const struct builtin_description *d,
38899 tree exp, rtx target, bool swap)
38900 {
38901 rtx pat;
38902 tree arg0 = CALL_EXPR_ARG (exp, 0);
38903 tree arg1 = CALL_EXPR_ARG (exp, 1);
38904 rtx op0 = expand_normal (arg0);
38905 rtx op1 = expand_normal (arg1);
38906 rtx op2;
38907 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38908 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38909 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38910 enum rtx_code comparison = d->comparison;
38911
38912 if (VECTOR_MODE_P (mode0))
38913 op0 = safe_vector_operand (op0, mode0);
38914 if (VECTOR_MODE_P (mode1))
38915 op1 = safe_vector_operand (op1, mode1);
38916
38917 /* Swap operands if we have a comparison that isn't available in
38918 hardware. */
38919 if (swap)
38920 std::swap (op0, op1);
38921
38922 if (optimize || !target
38923 || GET_MODE (target) != tmode
38924 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38925 target = gen_reg_rtx (tmode);
38926
38927 if ((optimize && !register_operand (op0, mode0))
38928 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
38929 op0 = copy_to_mode_reg (mode0, op0);
38930 if ((optimize && !register_operand (op1, mode1))
38931 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
38932 op1 = copy_to_mode_reg (mode1, op1);
38933
38934 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
38935 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38936 if (! pat)
38937 return 0;
38938 emit_insn (pat);
38939 return target;
38940 }
38941
38942 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
38943
38944 static rtx
38945 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
38946 rtx target)
38947 {
38948 rtx pat;
38949 tree arg0 = CALL_EXPR_ARG (exp, 0);
38950 tree arg1 = CALL_EXPR_ARG (exp, 1);
38951 rtx op0 = expand_normal (arg0);
38952 rtx op1 = expand_normal (arg1);
38953 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38954 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38955 enum rtx_code comparison = d->comparison;
38956
38957 if (VECTOR_MODE_P (mode0))
38958 op0 = safe_vector_operand (op0, mode0);
38959 if (VECTOR_MODE_P (mode1))
38960 op1 = safe_vector_operand (op1, mode1);
38961
38962 /* Swap operands if we have a comparison that isn't available in
38963 hardware. */
38964 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38965 std::swap (op0, op1);
38966
38967 target = gen_reg_rtx (SImode);
38968 emit_move_insn (target, const0_rtx);
38969 target = gen_rtx_SUBREG (QImode, target, 0);
38970
38971 if ((optimize && !register_operand (op0, mode0))
38972 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38973 op0 = copy_to_mode_reg (mode0, op0);
38974 if ((optimize && !register_operand (op1, mode1))
38975 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38976 op1 = copy_to_mode_reg (mode1, op1);
38977
38978 pat = GEN_FCN (d->icode) (op0, op1);
38979 if (! pat)
38980 return 0;
38981 emit_insn (pat);
38982 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38983 gen_rtx_fmt_ee (comparison, QImode,
38984 SET_DEST (pat),
38985 const0_rtx)));
38986
38987 return SUBREG_REG (target);
38988 }
38989
38990 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38991
38992 static rtx
38993 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38994 rtx target)
38995 {
38996 rtx pat;
38997 tree arg0 = CALL_EXPR_ARG (exp, 0);
38998 rtx op1, op0 = expand_normal (arg0);
38999 machine_mode tmode = insn_data[d->icode].operand[0].mode;
39000 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
39001
39002 if (optimize || target == 0
39003 || GET_MODE (target) != tmode
39004 || !insn_data[d->icode].operand[0].predicate (target, tmode))
39005 target = gen_reg_rtx (tmode);
39006
39007 if (VECTOR_MODE_P (mode0))
39008 op0 = safe_vector_operand (op0, mode0);
39009
39010 if ((optimize && !register_operand (op0, mode0))
39011 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
39012 op0 = copy_to_mode_reg (mode0, op0);
39013
39014 op1 = GEN_INT (d->comparison);
39015
39016 pat = GEN_FCN (d->icode) (target, op0, op1);
39017 if (! pat)
39018 return 0;
39019 emit_insn (pat);
39020 return target;
39021 }
39022
39023 static rtx
39024 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
39025 tree exp, rtx target)
39026 {
39027 rtx pat;
39028 tree arg0 = CALL_EXPR_ARG (exp, 0);
39029 tree arg1 = CALL_EXPR_ARG (exp, 1);
39030 rtx op0 = expand_normal (arg0);
39031 rtx op1 = expand_normal (arg1);
39032 rtx op2;
39033 machine_mode tmode = insn_data[d->icode].operand[0].mode;
39034 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
39035 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
39036
39037 if (optimize || target == 0
39038 || GET_MODE (target) != tmode
39039 || !insn_data[d->icode].operand[0].predicate (target, tmode))
39040 target = gen_reg_rtx (tmode);
39041
39042 op0 = safe_vector_operand (op0, mode0);
39043 op1 = safe_vector_operand (op1, mode1);
39044
39045 if ((optimize && !register_operand (op0, mode0))
39046 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
39047 op0 = copy_to_mode_reg (mode0, op0);
39048 if ((optimize && !register_operand (op1, mode1))
39049 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
39050 op1 = copy_to_mode_reg (mode1, op1);
39051
39052 op2 = GEN_INT (d->comparison);
39053
39054 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
39055 if (! pat)
39056 return 0;
39057 emit_insn (pat);
39058 return target;
39059 }
39060
39061 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
39062
39063 static rtx
39064 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
39065 rtx target)
39066 {
39067 rtx pat;
39068 tree arg0 = CALL_EXPR_ARG (exp, 0);
39069 tree arg1 = CALL_EXPR_ARG (exp, 1);
39070 rtx op0 = expand_normal (arg0);
39071 rtx op1 = expand_normal (arg1);
39072 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
39073 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
39074 enum rtx_code comparison = d->comparison;
39075
39076 if (VECTOR_MODE_P (mode0))
39077 op0 = safe_vector_operand (op0, mode0);
39078 if (VECTOR_MODE_P (mode1))
39079 op1 = safe_vector_operand (op1, mode1);
39080
39081 target = gen_reg_rtx (SImode);
39082 emit_move_insn (target, const0_rtx);
39083 target = gen_rtx_SUBREG (QImode, target, 0);
39084
39085 if ((optimize && !register_operand (op0, mode0))
39086 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
39087 op0 = copy_to_mode_reg (mode0, op0);
39088 if ((optimize && !register_operand (op1, mode1))
39089 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
39090 op1 = copy_to_mode_reg (mode1, op1);
39091
39092 pat = GEN_FCN (d->icode) (op0, op1);
39093 if (! pat)
39094 return 0;
39095 emit_insn (pat);
39096 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39097 gen_rtx_fmt_ee (comparison, QImode,
39098 SET_DEST (pat),
39099 const0_rtx)));
39100
39101 return SUBREG_REG (target);
39102 }
39103
39104 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
39105
39106 static rtx
39107 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
39108 tree exp, rtx target)
39109 {
39110 rtx pat;
39111 tree arg0 = CALL_EXPR_ARG (exp, 0);
39112 tree arg1 = CALL_EXPR_ARG (exp, 1);
39113 tree arg2 = CALL_EXPR_ARG (exp, 2);
39114 tree arg3 = CALL_EXPR_ARG (exp, 3);
39115 tree arg4 = CALL_EXPR_ARG (exp, 4);
39116 rtx scratch0, scratch1;
39117 rtx op0 = expand_normal (arg0);
39118 rtx op1 = expand_normal (arg1);
39119 rtx op2 = expand_normal (arg2);
39120 rtx op3 = expand_normal (arg3);
39121 rtx op4 = expand_normal (arg4);
39122 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
39123
39124 tmode0 = insn_data[d->icode].operand[0].mode;
39125 tmode1 = insn_data[d->icode].operand[1].mode;
39126 modev2 = insn_data[d->icode].operand[2].mode;
39127 modei3 = insn_data[d->icode].operand[3].mode;
39128 modev4 = insn_data[d->icode].operand[4].mode;
39129 modei5 = insn_data[d->icode].operand[5].mode;
39130 modeimm = insn_data[d->icode].operand[6].mode;
39131
39132 if (VECTOR_MODE_P (modev2))
39133 op0 = safe_vector_operand (op0, modev2);
39134 if (VECTOR_MODE_P (modev4))
39135 op2 = safe_vector_operand (op2, modev4);
39136
39137 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
39138 op0 = copy_to_mode_reg (modev2, op0);
39139 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
39140 op1 = copy_to_mode_reg (modei3, op1);
39141 if ((optimize && !register_operand (op2, modev4))
39142 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
39143 op2 = copy_to_mode_reg (modev4, op2);
39144 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
39145 op3 = copy_to_mode_reg (modei5, op3);
39146
39147 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
39148 {
39149 error ("the fifth argument must be an 8-bit immediate");
39150 return const0_rtx;
39151 }
39152
39153 if (d->code == IX86_BUILTIN_PCMPESTRI128)
39154 {
39155 if (optimize || !target
39156 || GET_MODE (target) != tmode0
39157 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
39158 target = gen_reg_rtx (tmode0);
39159
39160 scratch1 = gen_reg_rtx (tmode1);
39161
39162 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
39163 }
39164 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
39165 {
39166 if (optimize || !target
39167 || GET_MODE (target) != tmode1
39168 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
39169 target = gen_reg_rtx (tmode1);
39170
39171 scratch0 = gen_reg_rtx (tmode0);
39172
39173 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
39174 }
39175 else
39176 {
39177 gcc_assert (d->flag);
39178
39179 scratch0 = gen_reg_rtx (tmode0);
39180 scratch1 = gen_reg_rtx (tmode1);
39181
39182 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
39183 }
39184
39185 if (! pat)
39186 return 0;
39187
39188 emit_insn (pat);
39189
39190 if (d->flag)
39191 {
39192 target = gen_reg_rtx (SImode);
39193 emit_move_insn (target, const0_rtx);
39194 target = gen_rtx_SUBREG (QImode, target, 0);
39195
39196 emit_insn
39197 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39198 gen_rtx_fmt_ee (EQ, QImode,
39199 gen_rtx_REG ((machine_mode) d->flag,
39200 FLAGS_REG),
39201 const0_rtx)));
39202 return SUBREG_REG (target);
39203 }
39204 else
39205 return target;
39206 }
39207
39208
39209 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
39210
39211 static rtx
39212 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
39213 tree exp, rtx target)
39214 {
39215 rtx pat;
39216 tree arg0 = CALL_EXPR_ARG (exp, 0);
39217 tree arg1 = CALL_EXPR_ARG (exp, 1);
39218 tree arg2 = CALL_EXPR_ARG (exp, 2);
39219 rtx scratch0, scratch1;
39220 rtx op0 = expand_normal (arg0);
39221 rtx op1 = expand_normal (arg1);
39222 rtx op2 = expand_normal (arg2);
39223 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
39224
39225 tmode0 = insn_data[d->icode].operand[0].mode;
39226 tmode1 = insn_data[d->icode].operand[1].mode;
39227 modev2 = insn_data[d->icode].operand[2].mode;
39228 modev3 = insn_data[d->icode].operand[3].mode;
39229 modeimm = insn_data[d->icode].operand[4].mode;
39230
39231 if (VECTOR_MODE_P (modev2))
39232 op0 = safe_vector_operand (op0, modev2);
39233 if (VECTOR_MODE_P (modev3))
39234 op1 = safe_vector_operand (op1, modev3);
39235
39236 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
39237 op0 = copy_to_mode_reg (modev2, op0);
39238 if ((optimize && !register_operand (op1, modev3))
39239 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
39240 op1 = copy_to_mode_reg (modev3, op1);
39241
39242 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
39243 {
39244 error ("the third argument must be an 8-bit immediate");
39245 return const0_rtx;
39246 }
39247
39248 if (d->code == IX86_BUILTIN_PCMPISTRI128)
39249 {
39250 if (optimize || !target
39251 || GET_MODE (target) != tmode0
39252 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
39253 target = gen_reg_rtx (tmode0);
39254
39255 scratch1 = gen_reg_rtx (tmode1);
39256
39257 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
39258 }
39259 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
39260 {
39261 if (optimize || !target
39262 || GET_MODE (target) != tmode1
39263 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
39264 target = gen_reg_rtx (tmode1);
39265
39266 scratch0 = gen_reg_rtx (tmode0);
39267
39268 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
39269 }
39270 else
39271 {
39272 gcc_assert (d->flag);
39273
39274 scratch0 = gen_reg_rtx (tmode0);
39275 scratch1 = gen_reg_rtx (tmode1);
39276
39277 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
39278 }
39279
39280 if (! pat)
39281 return 0;
39282
39283 emit_insn (pat);
39284
39285 if (d->flag)
39286 {
39287 target = gen_reg_rtx (SImode);
39288 emit_move_insn (target, const0_rtx);
39289 target = gen_rtx_SUBREG (QImode, target, 0);
39290
39291 emit_insn
39292 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39293 gen_rtx_fmt_ee (EQ, QImode,
39294 gen_rtx_REG ((machine_mode) d->flag,
39295 FLAGS_REG),
39296 const0_rtx)));
39297 return SUBREG_REG (target);
39298 }
39299 else
39300 return target;
39301 }
39302
39303 /* Subroutine of ix86_expand_builtin to take care of insns with
39304 variable number of operands. */
39305
39306 static rtx
39307 ix86_expand_args_builtin (const struct builtin_description *d,
39308 tree exp, rtx target)
39309 {
39310 rtx pat, real_target;
39311 unsigned int i, nargs;
39312 unsigned int nargs_constant = 0;
39313 unsigned int mask_pos = 0;
39314 int num_memory = 0;
39315 struct
39316 {
39317 rtx op;
39318 machine_mode mode;
39319 } args[6];
39320 bool last_arg_count = false;
39321 enum insn_code icode = d->icode;
39322 const struct insn_data_d *insn_p = &insn_data[icode];
39323 machine_mode tmode = insn_p->operand[0].mode;
39324 machine_mode rmode = VOIDmode;
39325 bool swap = false;
39326 enum rtx_code comparison = d->comparison;
39327
39328 switch ((enum ix86_builtin_func_type) d->flag)
39329 {
39330 case V2DF_FTYPE_V2DF_ROUND:
39331 case V4DF_FTYPE_V4DF_ROUND:
39332 case V8DF_FTYPE_V8DF_ROUND:
39333 case V4SF_FTYPE_V4SF_ROUND:
39334 case V8SF_FTYPE_V8SF_ROUND:
39335 case V16SF_FTYPE_V16SF_ROUND:
39336 case V4SI_FTYPE_V4SF_ROUND:
39337 case V8SI_FTYPE_V8SF_ROUND:
39338 case V16SI_FTYPE_V16SF_ROUND:
39339 return ix86_expand_sse_round (d, exp, target);
39340 case V4SI_FTYPE_V2DF_V2DF_ROUND:
39341 case V8SI_FTYPE_V4DF_V4DF_ROUND:
39342 case V16SI_FTYPE_V8DF_V8DF_ROUND:
39343 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
39344 case INT_FTYPE_V8SF_V8SF_PTEST:
39345 case INT_FTYPE_V4DI_V4DI_PTEST:
39346 case INT_FTYPE_V4DF_V4DF_PTEST:
39347 case INT_FTYPE_V4SF_V4SF_PTEST:
39348 case INT_FTYPE_V2DI_V2DI_PTEST:
39349 case INT_FTYPE_V2DF_V2DF_PTEST:
39350 return ix86_expand_sse_ptest (d, exp, target);
39351 case FLOAT128_FTYPE_FLOAT128:
39352 case FLOAT_FTYPE_FLOAT:
39353 case INT_FTYPE_INT:
39354 case UINT64_FTYPE_INT:
39355 case UINT16_FTYPE_UINT16:
39356 case INT64_FTYPE_INT64:
39357 case INT64_FTYPE_V4SF:
39358 case INT64_FTYPE_V2DF:
39359 case INT_FTYPE_V16QI:
39360 case INT_FTYPE_V8QI:
39361 case INT_FTYPE_V8SF:
39362 case INT_FTYPE_V4DF:
39363 case INT_FTYPE_V4SF:
39364 case INT_FTYPE_V2DF:
39365 case INT_FTYPE_V32QI:
39366 case V16QI_FTYPE_V16QI:
39367 case V8SI_FTYPE_V8SF:
39368 case V8SI_FTYPE_V4SI:
39369 case V8HI_FTYPE_V8HI:
39370 case V8HI_FTYPE_V16QI:
39371 case V8QI_FTYPE_V8QI:
39372 case V8SF_FTYPE_V8SF:
39373 case V8SF_FTYPE_V8SI:
39374 case V8SF_FTYPE_V4SF:
39375 case V8SF_FTYPE_V8HI:
39376 case V4SI_FTYPE_V4SI:
39377 case V4SI_FTYPE_V16QI:
39378 case V4SI_FTYPE_V4SF:
39379 case V4SI_FTYPE_V8SI:
39380 case V4SI_FTYPE_V8HI:
39381 case V4SI_FTYPE_V4DF:
39382 case V4SI_FTYPE_V2DF:
39383 case V4HI_FTYPE_V4HI:
39384 case V4DF_FTYPE_V4DF:
39385 case V4DF_FTYPE_V4SI:
39386 case V4DF_FTYPE_V4SF:
39387 case V4DF_FTYPE_V2DF:
39388 case V4SF_FTYPE_V4SF:
39389 case V4SF_FTYPE_V4SI:
39390 case V4SF_FTYPE_V8SF:
39391 case V4SF_FTYPE_V4DF:
39392 case V4SF_FTYPE_V8HI:
39393 case V4SF_FTYPE_V2DF:
39394 case V2DI_FTYPE_V2DI:
39395 case V2DI_FTYPE_V16QI:
39396 case V2DI_FTYPE_V8HI:
39397 case V2DI_FTYPE_V4SI:
39398 case V2DF_FTYPE_V2DF:
39399 case V2DF_FTYPE_V4SI:
39400 case V2DF_FTYPE_V4DF:
39401 case V2DF_FTYPE_V4SF:
39402 case V2DF_FTYPE_V2SI:
39403 case V2SI_FTYPE_V2SI:
39404 case V2SI_FTYPE_V4SF:
39405 case V2SI_FTYPE_V2SF:
39406 case V2SI_FTYPE_V2DF:
39407 case V2SF_FTYPE_V2SF:
39408 case V2SF_FTYPE_V2SI:
39409 case V32QI_FTYPE_V32QI:
39410 case V32QI_FTYPE_V16QI:
39411 case V16HI_FTYPE_V16HI:
39412 case V16HI_FTYPE_V8HI:
39413 case V8SI_FTYPE_V8SI:
39414 case V16HI_FTYPE_V16QI:
39415 case V8SI_FTYPE_V16QI:
39416 case V4DI_FTYPE_V16QI:
39417 case V8SI_FTYPE_V8HI:
39418 case V4DI_FTYPE_V8HI:
39419 case V4DI_FTYPE_V4SI:
39420 case V4DI_FTYPE_V2DI:
39421 case UHI_FTYPE_UHI:
39422 case UHI_FTYPE_V16QI:
39423 case USI_FTYPE_V32QI:
39424 case UDI_FTYPE_V64QI:
39425 case V16QI_FTYPE_UHI:
39426 case V32QI_FTYPE_USI:
39427 case V64QI_FTYPE_UDI:
39428 case V8HI_FTYPE_UQI:
39429 case V16HI_FTYPE_UHI:
39430 case V32HI_FTYPE_USI:
39431 case V4SI_FTYPE_UQI:
39432 case V8SI_FTYPE_UQI:
39433 case V4SI_FTYPE_UHI:
39434 case V8SI_FTYPE_UHI:
39435 case UQI_FTYPE_V8HI:
39436 case UHI_FTYPE_V16HI:
39437 case USI_FTYPE_V32HI:
39438 case UQI_FTYPE_V4SI:
39439 case UQI_FTYPE_V8SI:
39440 case UHI_FTYPE_V16SI:
39441 case UQI_FTYPE_V2DI:
39442 case UQI_FTYPE_V4DI:
39443 case UQI_FTYPE_V8DI:
39444 case V16SI_FTYPE_UHI:
39445 case V2DI_FTYPE_UQI:
39446 case V4DI_FTYPE_UQI:
39447 case V16SI_FTYPE_INT:
39448 case V16SF_FTYPE_V8SF:
39449 case V16SI_FTYPE_V8SI:
39450 case V16SF_FTYPE_V4SF:
39451 case V16SI_FTYPE_V4SI:
39452 case V16SI_FTYPE_V16SF:
39453 case V16SF_FTYPE_V16SF:
39454 case V8DI_FTYPE_UQI:
39455 case V8DF_FTYPE_V4DF:
39456 case V8DF_FTYPE_V2DF:
39457 case V8DF_FTYPE_V8DF:
39458 nargs = 1;
39459 break;
39460 case V4SF_FTYPE_V4SF_VEC_MERGE:
39461 case V2DF_FTYPE_V2DF_VEC_MERGE:
39462 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
39463 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
39464 case V16QI_FTYPE_V16QI_V16QI:
39465 case V16QI_FTYPE_V8HI_V8HI:
39466 case V16SF_FTYPE_V16SF_V16SF:
39467 case V8QI_FTYPE_V8QI_V8QI:
39468 case V8QI_FTYPE_V4HI_V4HI:
39469 case V8HI_FTYPE_V8HI_V8HI:
39470 case V8HI_FTYPE_V16QI_V16QI:
39471 case V8HI_FTYPE_V4SI_V4SI:
39472 case V8SF_FTYPE_V8SF_V8SF:
39473 case V8SF_FTYPE_V8SF_V8SI:
39474 case V8DF_FTYPE_V8DF_V8DF:
39475 case V4SI_FTYPE_V4SI_V4SI:
39476 case V4SI_FTYPE_V8HI_V8HI:
39477 case V4SI_FTYPE_V2DF_V2DF:
39478 case V4HI_FTYPE_V4HI_V4HI:
39479 case V4HI_FTYPE_V8QI_V8QI:
39480 case V4HI_FTYPE_V2SI_V2SI:
39481 case V4DF_FTYPE_V4DF_V4DF:
39482 case V4DF_FTYPE_V4DF_V4DI:
39483 case V4SF_FTYPE_V4SF_V4SF:
39484 case V4SF_FTYPE_V4SF_V4SI:
39485 case V4SF_FTYPE_V4SF_V2SI:
39486 case V4SF_FTYPE_V4SF_V2DF:
39487 case V4SF_FTYPE_V4SF_UINT:
39488 case V4SF_FTYPE_V4SF_DI:
39489 case V4SF_FTYPE_V4SF_SI:
39490 case V2DI_FTYPE_V2DI_V2DI:
39491 case V2DI_FTYPE_V16QI_V16QI:
39492 case V2DI_FTYPE_V4SI_V4SI:
39493 case V2DI_FTYPE_V2DI_V16QI:
39494 case V2SI_FTYPE_V2SI_V2SI:
39495 case V2SI_FTYPE_V4HI_V4HI:
39496 case V2SI_FTYPE_V2SF_V2SF:
39497 case V2DF_FTYPE_V2DF_V2DF:
39498 case V2DF_FTYPE_V2DF_V4SF:
39499 case V2DF_FTYPE_V2DF_V2DI:
39500 case V2DF_FTYPE_V2DF_DI:
39501 case V2DF_FTYPE_V2DF_SI:
39502 case V2DF_FTYPE_V2DF_UINT:
39503 case V2SF_FTYPE_V2SF_V2SF:
39504 case V1DI_FTYPE_V1DI_V1DI:
39505 case V1DI_FTYPE_V8QI_V8QI:
39506 case V1DI_FTYPE_V2SI_V2SI:
39507 case V32QI_FTYPE_V16HI_V16HI:
39508 case V16HI_FTYPE_V8SI_V8SI:
39509 case V32QI_FTYPE_V32QI_V32QI:
39510 case V16HI_FTYPE_V32QI_V32QI:
39511 case V16HI_FTYPE_V16HI_V16HI:
39512 case V8SI_FTYPE_V4DF_V4DF:
39513 case V8SI_FTYPE_V8SI_V8SI:
39514 case V8SI_FTYPE_V16HI_V16HI:
39515 case V4DI_FTYPE_V4DI_V4DI:
39516 case V4DI_FTYPE_V8SI_V8SI:
39517 case V8DI_FTYPE_V64QI_V64QI:
39518 if (comparison == UNKNOWN)
39519 return ix86_expand_binop_builtin (icode, exp, target);
39520 nargs = 2;
39521 break;
39522 case V4SF_FTYPE_V4SF_V4SF_SWAP:
39523 case V2DF_FTYPE_V2DF_V2DF_SWAP:
39524 gcc_assert (comparison != UNKNOWN);
39525 nargs = 2;
39526 swap = true;
39527 break;
39528 case V16HI_FTYPE_V16HI_V8HI_COUNT:
39529 case V16HI_FTYPE_V16HI_SI_COUNT:
39530 case V8SI_FTYPE_V8SI_V4SI_COUNT:
39531 case V8SI_FTYPE_V8SI_SI_COUNT:
39532 case V4DI_FTYPE_V4DI_V2DI_COUNT:
39533 case V4DI_FTYPE_V4DI_INT_COUNT:
39534 case V8HI_FTYPE_V8HI_V8HI_COUNT:
39535 case V8HI_FTYPE_V8HI_SI_COUNT:
39536 case V4SI_FTYPE_V4SI_V4SI_COUNT:
39537 case V4SI_FTYPE_V4SI_SI_COUNT:
39538 case V4HI_FTYPE_V4HI_V4HI_COUNT:
39539 case V4HI_FTYPE_V4HI_SI_COUNT:
39540 case V2DI_FTYPE_V2DI_V2DI_COUNT:
39541 case V2DI_FTYPE_V2DI_SI_COUNT:
39542 case V2SI_FTYPE_V2SI_V2SI_COUNT:
39543 case V2SI_FTYPE_V2SI_SI_COUNT:
39544 case V1DI_FTYPE_V1DI_V1DI_COUNT:
39545 case V1DI_FTYPE_V1DI_SI_COUNT:
39546 nargs = 2;
39547 last_arg_count = true;
39548 break;
39549 case UINT64_FTYPE_UINT64_UINT64:
39550 case UINT_FTYPE_UINT_UINT:
39551 case UINT_FTYPE_UINT_USHORT:
39552 case UINT_FTYPE_UINT_UCHAR:
39553 case UINT16_FTYPE_UINT16_INT:
39554 case UINT8_FTYPE_UINT8_INT:
39555 case UHI_FTYPE_UHI_UHI:
39556 case USI_FTYPE_USI_USI:
39557 case UDI_FTYPE_UDI_UDI:
39558 case V16SI_FTYPE_V8DF_V8DF:
39559 nargs = 2;
39560 break;
39561 case V2DI_FTYPE_V2DI_INT_CONVERT:
39562 nargs = 2;
39563 rmode = V1TImode;
39564 nargs_constant = 1;
39565 break;
39566 case V4DI_FTYPE_V4DI_INT_CONVERT:
39567 nargs = 2;
39568 rmode = V2TImode;
39569 nargs_constant = 1;
39570 break;
39571 case V8DI_FTYPE_V8DI_INT_CONVERT:
39572 nargs = 2;
39573 rmode = V4TImode;
39574 nargs_constant = 1;
39575 break;
39576 case V8HI_FTYPE_V8HI_INT:
39577 case V8HI_FTYPE_V8SF_INT:
39578 case V16HI_FTYPE_V16SF_INT:
39579 case V8HI_FTYPE_V4SF_INT:
39580 case V8SF_FTYPE_V8SF_INT:
39581 case V4SF_FTYPE_V16SF_INT:
39582 case V16SF_FTYPE_V16SF_INT:
39583 case V4SI_FTYPE_V4SI_INT:
39584 case V4SI_FTYPE_V8SI_INT:
39585 case V4HI_FTYPE_V4HI_INT:
39586 case V4DF_FTYPE_V4DF_INT:
39587 case V4DF_FTYPE_V8DF_INT:
39588 case V4SF_FTYPE_V4SF_INT:
39589 case V4SF_FTYPE_V8SF_INT:
39590 case V2DI_FTYPE_V2DI_INT:
39591 case V2DF_FTYPE_V2DF_INT:
39592 case V2DF_FTYPE_V4DF_INT:
39593 case V16HI_FTYPE_V16HI_INT:
39594 case V8SI_FTYPE_V8SI_INT:
39595 case V16SI_FTYPE_V16SI_INT:
39596 case V4SI_FTYPE_V16SI_INT:
39597 case V4DI_FTYPE_V4DI_INT:
39598 case V2DI_FTYPE_V4DI_INT:
39599 case V4DI_FTYPE_V8DI_INT:
39600 case QI_FTYPE_V4SF_INT:
39601 case QI_FTYPE_V2DF_INT:
39602 nargs = 2;
39603 nargs_constant = 1;
39604 break;
39605 case V16QI_FTYPE_V16QI_V16QI_V16QI:
39606 case V8SF_FTYPE_V8SF_V8SF_V8SF:
39607 case V4DF_FTYPE_V4DF_V4DF_V4DF:
39608 case V4SF_FTYPE_V4SF_V4SF_V4SF:
39609 case V2DF_FTYPE_V2DF_V2DF_V2DF:
39610 case V32QI_FTYPE_V32QI_V32QI_V32QI:
39611 case UHI_FTYPE_V16SI_V16SI_UHI:
39612 case UQI_FTYPE_V8DI_V8DI_UQI:
39613 case V16HI_FTYPE_V16SI_V16HI_UHI:
39614 case V16QI_FTYPE_V16SI_V16QI_UHI:
39615 case V16QI_FTYPE_V8DI_V16QI_UQI:
39616 case V16SF_FTYPE_V16SF_V16SF_UHI:
39617 case V16SF_FTYPE_V4SF_V16SF_UHI:
39618 case V16SI_FTYPE_SI_V16SI_UHI:
39619 case V16SI_FTYPE_V16HI_V16SI_UHI:
39620 case V16SI_FTYPE_V16QI_V16SI_UHI:
39621 case V8SF_FTYPE_V4SF_V8SF_UQI:
39622 case V4DF_FTYPE_V2DF_V4DF_UQI:
39623 case V8SI_FTYPE_V4SI_V8SI_UQI:
39624 case V8SI_FTYPE_SI_V8SI_UQI:
39625 case V4SI_FTYPE_V4SI_V4SI_UQI:
39626 case V4SI_FTYPE_SI_V4SI_UQI:
39627 case V4DI_FTYPE_V2DI_V4DI_UQI:
39628 case V4DI_FTYPE_DI_V4DI_UQI:
39629 case V2DI_FTYPE_V2DI_V2DI_UQI:
39630 case V2DI_FTYPE_DI_V2DI_UQI:
39631 case V64QI_FTYPE_V64QI_V64QI_UDI:
39632 case V64QI_FTYPE_V16QI_V64QI_UDI:
39633 case V64QI_FTYPE_QI_V64QI_UDI:
39634 case V32QI_FTYPE_V32QI_V32QI_USI:
39635 case V32QI_FTYPE_V16QI_V32QI_USI:
39636 case V32QI_FTYPE_QI_V32QI_USI:
39637 case V16QI_FTYPE_V16QI_V16QI_UHI:
39638 case V16QI_FTYPE_QI_V16QI_UHI:
39639 case V32HI_FTYPE_V8HI_V32HI_USI:
39640 case V32HI_FTYPE_HI_V32HI_USI:
39641 case V16HI_FTYPE_V8HI_V16HI_UHI:
39642 case V16HI_FTYPE_HI_V16HI_UHI:
39643 case V8HI_FTYPE_V8HI_V8HI_UQI:
39644 case V8HI_FTYPE_HI_V8HI_UQI:
39645 case V8SF_FTYPE_V8HI_V8SF_UQI:
39646 case V4SF_FTYPE_V8HI_V4SF_UQI:
39647 case V8SI_FTYPE_V8SF_V8SI_UQI:
39648 case V4SI_FTYPE_V4SF_V4SI_UQI:
39649 case V4DI_FTYPE_V4SF_V4DI_UQI:
39650 case V2DI_FTYPE_V4SF_V2DI_UQI:
39651 case V4SF_FTYPE_V4DI_V4SF_UQI:
39652 case V4SF_FTYPE_V2DI_V4SF_UQI:
39653 case V4DF_FTYPE_V4DI_V4DF_UQI:
39654 case V2DF_FTYPE_V2DI_V2DF_UQI:
39655 case V16QI_FTYPE_V8HI_V16QI_UQI:
39656 case V16QI_FTYPE_V16HI_V16QI_UHI:
39657 case V16QI_FTYPE_V4SI_V16QI_UQI:
39658 case V16QI_FTYPE_V8SI_V16QI_UQI:
39659 case V8HI_FTYPE_V4SI_V8HI_UQI:
39660 case V8HI_FTYPE_V8SI_V8HI_UQI:
39661 case V16QI_FTYPE_V2DI_V16QI_UQI:
39662 case V16QI_FTYPE_V4DI_V16QI_UQI:
39663 case V8HI_FTYPE_V2DI_V8HI_UQI:
39664 case V8HI_FTYPE_V4DI_V8HI_UQI:
39665 case V4SI_FTYPE_V2DI_V4SI_UQI:
39666 case V4SI_FTYPE_V4DI_V4SI_UQI:
39667 case V32QI_FTYPE_V32HI_V32QI_USI:
39668 case UHI_FTYPE_V16QI_V16QI_UHI:
39669 case USI_FTYPE_V32QI_V32QI_USI:
39670 case UDI_FTYPE_V64QI_V64QI_UDI:
39671 case UQI_FTYPE_V8HI_V8HI_UQI:
39672 case UHI_FTYPE_V16HI_V16HI_UHI:
39673 case USI_FTYPE_V32HI_V32HI_USI:
39674 case UQI_FTYPE_V4SI_V4SI_UQI:
39675 case UQI_FTYPE_V8SI_V8SI_UQI:
39676 case UQI_FTYPE_V2DI_V2DI_UQI:
39677 case UQI_FTYPE_V4DI_V4DI_UQI:
39678 case V4SF_FTYPE_V2DF_V4SF_UQI:
39679 case V4SF_FTYPE_V4DF_V4SF_UQI:
39680 case V16SI_FTYPE_V16SI_V16SI_UHI:
39681 case V16SI_FTYPE_V4SI_V16SI_UHI:
39682 case V2DI_FTYPE_V4SI_V2DI_UQI:
39683 case V2DI_FTYPE_V8HI_V2DI_UQI:
39684 case V2DI_FTYPE_V16QI_V2DI_UQI:
39685 case V4DI_FTYPE_V4DI_V4DI_UQI:
39686 case V4DI_FTYPE_V4SI_V4DI_UQI:
39687 case V4DI_FTYPE_V8HI_V4DI_UQI:
39688 case V4DI_FTYPE_V16QI_V4DI_UQI:
39689 case V4DI_FTYPE_V4DF_V4DI_UQI:
39690 case V2DI_FTYPE_V2DF_V2DI_UQI:
39691 case V4SI_FTYPE_V4DF_V4SI_UQI:
39692 case V4SI_FTYPE_V2DF_V4SI_UQI:
39693 case V4SI_FTYPE_V8HI_V4SI_UQI:
39694 case V4SI_FTYPE_V16QI_V4SI_UQI:
39695 case V4DI_FTYPE_V4DI_V4DI_V4DI:
39696 case V8DF_FTYPE_V2DF_V8DF_UQI:
39697 case V8DF_FTYPE_V4DF_V8DF_UQI:
39698 case V8DF_FTYPE_V8DF_V8DF_UQI:
39699 case V8SF_FTYPE_V8SF_V8SF_UQI:
39700 case V8SF_FTYPE_V8SI_V8SF_UQI:
39701 case V4DF_FTYPE_V4DF_V4DF_UQI:
39702 case V4SF_FTYPE_V4SF_V4SF_UQI:
39703 case V2DF_FTYPE_V2DF_V2DF_UQI:
39704 case V2DF_FTYPE_V4SF_V2DF_UQI:
39705 case V2DF_FTYPE_V4SI_V2DF_UQI:
39706 case V4SF_FTYPE_V4SI_V4SF_UQI:
39707 case V4DF_FTYPE_V4SF_V4DF_UQI:
39708 case V4DF_FTYPE_V4SI_V4DF_UQI:
39709 case V8SI_FTYPE_V8SI_V8SI_UQI:
39710 case V8SI_FTYPE_V8HI_V8SI_UQI:
39711 case V8SI_FTYPE_V16QI_V8SI_UQI:
39712 case V8DF_FTYPE_V8SI_V8DF_UQI:
39713 case V8DI_FTYPE_DI_V8DI_UQI:
39714 case V16SF_FTYPE_V8SF_V16SF_UHI:
39715 case V16SI_FTYPE_V8SI_V16SI_UHI:
39716 case V16HI_FTYPE_V16HI_V16HI_UHI:
39717 case V8HI_FTYPE_V16QI_V8HI_UQI:
39718 case V16HI_FTYPE_V16QI_V16HI_UHI:
39719 case V32HI_FTYPE_V32HI_V32HI_USI:
39720 case V32HI_FTYPE_V32QI_V32HI_USI:
39721 case V8DI_FTYPE_V16QI_V8DI_UQI:
39722 case V8DI_FTYPE_V2DI_V8DI_UQI:
39723 case V8DI_FTYPE_V4DI_V8DI_UQI:
39724 case V8DI_FTYPE_V8DI_V8DI_UQI:
39725 case V8DI_FTYPE_V8HI_V8DI_UQI:
39726 case V8DI_FTYPE_V8SI_V8DI_UQI:
39727 case V8HI_FTYPE_V8DI_V8HI_UQI:
39728 case V8SI_FTYPE_V8DI_V8SI_UQI:
39729 case V4SI_FTYPE_V4SI_V4SI_V4SI:
39730 nargs = 3;
39731 break;
39732 case V32QI_FTYPE_V32QI_V32QI_INT:
39733 case V16HI_FTYPE_V16HI_V16HI_INT:
39734 case V16QI_FTYPE_V16QI_V16QI_INT:
39735 case V4DI_FTYPE_V4DI_V4DI_INT:
39736 case V8HI_FTYPE_V8HI_V8HI_INT:
39737 case V8SI_FTYPE_V8SI_V8SI_INT:
39738 case V8SI_FTYPE_V8SI_V4SI_INT:
39739 case V8SF_FTYPE_V8SF_V8SF_INT:
39740 case V8SF_FTYPE_V8SF_V4SF_INT:
39741 case V4SI_FTYPE_V4SI_V4SI_INT:
39742 case V4DF_FTYPE_V4DF_V4DF_INT:
39743 case V16SF_FTYPE_V16SF_V16SF_INT:
39744 case V16SF_FTYPE_V16SF_V4SF_INT:
39745 case V16SI_FTYPE_V16SI_V4SI_INT:
39746 case V4DF_FTYPE_V4DF_V2DF_INT:
39747 case V4SF_FTYPE_V4SF_V4SF_INT:
39748 case V2DI_FTYPE_V2DI_V2DI_INT:
39749 case V4DI_FTYPE_V4DI_V2DI_INT:
39750 case V2DF_FTYPE_V2DF_V2DF_INT:
39751 case UQI_FTYPE_V8DI_V8UDI_INT:
39752 case UQI_FTYPE_V8DF_V8DF_INT:
39753 case UQI_FTYPE_V2DF_V2DF_INT:
39754 case UQI_FTYPE_V4SF_V4SF_INT:
39755 case UHI_FTYPE_V16SI_V16SI_INT:
39756 case UHI_FTYPE_V16SF_V16SF_INT:
39757 nargs = 3;
39758 nargs_constant = 1;
39759 break;
39760 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
39761 nargs = 3;
39762 rmode = V4DImode;
39763 nargs_constant = 1;
39764 break;
39765 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
39766 nargs = 3;
39767 rmode = V2DImode;
39768 nargs_constant = 1;
39769 break;
39770 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
39771 nargs = 3;
39772 rmode = DImode;
39773 nargs_constant = 1;
39774 break;
39775 case V2DI_FTYPE_V2DI_UINT_UINT:
39776 nargs = 3;
39777 nargs_constant = 2;
39778 break;
39779 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
39780 nargs = 3;
39781 rmode = V8DImode;
39782 nargs_constant = 1;
39783 break;
39784 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
39785 nargs = 5;
39786 rmode = V8DImode;
39787 mask_pos = 2;
39788 nargs_constant = 1;
39789 break;
39790 case QI_FTYPE_V8DF_INT_UQI:
39791 case QI_FTYPE_V4DF_INT_UQI:
39792 case QI_FTYPE_V2DF_INT_UQI:
39793 case HI_FTYPE_V16SF_INT_UHI:
39794 case QI_FTYPE_V8SF_INT_UQI:
39795 case QI_FTYPE_V4SF_INT_UQI:
39796 nargs = 3;
39797 mask_pos = 1;
39798 nargs_constant = 1;
39799 break;
39800 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
39801 nargs = 5;
39802 rmode = V4DImode;
39803 mask_pos = 2;
39804 nargs_constant = 1;
39805 break;
39806 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
39807 nargs = 5;
39808 rmode = V2DImode;
39809 mask_pos = 2;
39810 nargs_constant = 1;
39811 break;
39812 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
39813 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
39814 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
39815 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
39816 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
39817 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
39818 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
39819 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
39820 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
39821 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
39822 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
39823 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
39824 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
39825 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
39826 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
39827 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
39828 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
39829 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
39830 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
39831 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
39832 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
39833 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
39834 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
39835 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
39836 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
39837 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
39838 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
39839 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
39840 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
39841 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
39842 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
39843 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
39844 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
39845 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
39846 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
39847 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
39848 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
39849 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
39850 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
39851 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
39852 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
39853 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
39854 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
39855 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
39856 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
39857 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
39858 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
39859 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
39860 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
39861 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
39862 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
39863 nargs = 4;
39864 break;
39865 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
39866 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
39867 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
39868 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
39869 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
39870 nargs = 4;
39871 nargs_constant = 1;
39872 break;
39873 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
39874 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
39875 case QI_FTYPE_V4DF_V4DF_INT_UQI:
39876 case QI_FTYPE_V8SF_V8SF_INT_UQI:
39877 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
39878 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
39879 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
39880 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
39881 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
39882 case USI_FTYPE_V32QI_V32QI_INT_USI:
39883 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
39884 case USI_FTYPE_V32HI_V32HI_INT_USI:
39885 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
39886 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
39887 nargs = 4;
39888 mask_pos = 1;
39889 nargs_constant = 1;
39890 break;
39891 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
39892 nargs = 4;
39893 nargs_constant = 2;
39894 break;
39895 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
39896 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
39897 nargs = 4;
39898 break;
39899 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
39900 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
39901 mask_pos = 1;
39902 nargs = 4;
39903 nargs_constant = 1;
39904 break;
39905 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
39906 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
39907 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
39908 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
39909 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
39910 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
39911 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
39912 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
39913 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
39914 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
39915 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
39916 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
39917 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
39918 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
39919 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
39920 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
39921 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
39922 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
39923 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
39924 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
39925 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
39926 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
39927 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
39928 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
39929 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
39930 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
39931 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
39932 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
39933 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
39934 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
39935 nargs = 4;
39936 mask_pos = 2;
39937 nargs_constant = 1;
39938 break;
39939 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
39940 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
39941 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
39942 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
39943 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
39944 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
39945 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
39946 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
39947 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
39948 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
39949 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
39950 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
39951 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
39952 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
39953 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
39954 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
39955 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
39956 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
39957 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
39958 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
39959 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
39960 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
39961 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
39962 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
39963 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
39964 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
39965 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39966 nargs = 5;
39967 mask_pos = 2;
39968 nargs_constant = 1;
39969 break;
39970 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39971 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39972 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39973 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39974 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39975 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39976 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39977 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39978 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39979 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39980 nargs = 5;
39981 nargs = 5;
39982 mask_pos = 1;
39983 nargs_constant = 1;
39984 break;
39985
39986 default:
39987 gcc_unreachable ();
39988 }
39989
39990 gcc_assert (nargs <= ARRAY_SIZE (args));
39991
39992 if (comparison != UNKNOWN)
39993 {
39994 gcc_assert (nargs == 2);
39995 return ix86_expand_sse_compare (d, exp, target, swap);
39996 }
39997
39998 if (rmode == VOIDmode || rmode == tmode)
39999 {
40000 if (optimize
40001 || target == 0
40002 || GET_MODE (target) != tmode
40003 || !insn_p->operand[0].predicate (target, tmode))
40004 target = gen_reg_rtx (tmode);
40005 real_target = target;
40006 }
40007 else
40008 {
40009 real_target = gen_reg_rtx (tmode);
40010 target = lowpart_subreg (rmode, real_target, tmode);
40011 }
40012
40013 for (i = 0; i < nargs; i++)
40014 {
40015 tree arg = CALL_EXPR_ARG (exp, i);
40016 rtx op = expand_normal (arg);
40017 machine_mode mode = insn_p->operand[i + 1].mode;
40018 bool match = insn_p->operand[i + 1].predicate (op, mode);
40019
40020 if (last_arg_count && (i + 1) == nargs)
40021 {
40022 /* SIMD shift insns take either an 8-bit immediate or
40023 register as count. But builtin functions take int as
40024 count. If count doesn't match, we put it in register. */
40025 if (!match)
40026 {
40027 op = lowpart_subreg (SImode, op, GET_MODE (op));
40028 if (!insn_p->operand[i + 1].predicate (op, mode))
40029 op = copy_to_reg (op);
40030 }
40031 }
40032 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
40033 (!mask_pos && (nargs - i) <= nargs_constant))
40034 {
40035 if (!match)
40036 switch (icode)
40037 {
40038 case CODE_FOR_avx_vinsertf128v4di:
40039 case CODE_FOR_avx_vextractf128v4di:
40040 error ("the last argument must be an 1-bit immediate");
40041 return const0_rtx;
40042
40043 case CODE_FOR_avx512f_cmpv8di3_mask:
40044 case CODE_FOR_avx512f_cmpv16si3_mask:
40045 case CODE_FOR_avx512f_ucmpv8di3_mask:
40046 case CODE_FOR_avx512f_ucmpv16si3_mask:
40047 case CODE_FOR_avx512vl_cmpv4di3_mask:
40048 case CODE_FOR_avx512vl_cmpv8si3_mask:
40049 case CODE_FOR_avx512vl_ucmpv4di3_mask:
40050 case CODE_FOR_avx512vl_ucmpv8si3_mask:
40051 case CODE_FOR_avx512vl_cmpv2di3_mask:
40052 case CODE_FOR_avx512vl_cmpv4si3_mask:
40053 case CODE_FOR_avx512vl_ucmpv2di3_mask:
40054 case CODE_FOR_avx512vl_ucmpv4si3_mask:
40055 error ("the last argument must be a 3-bit immediate");
40056 return const0_rtx;
40057
40058 case CODE_FOR_sse4_1_roundsd:
40059 case CODE_FOR_sse4_1_roundss:
40060
40061 case CODE_FOR_sse4_1_roundpd:
40062 case CODE_FOR_sse4_1_roundps:
40063 case CODE_FOR_avx_roundpd256:
40064 case CODE_FOR_avx_roundps256:
40065
40066 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
40067 case CODE_FOR_sse4_1_roundps_sfix:
40068 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
40069 case CODE_FOR_avx_roundps_sfix256:
40070
40071 case CODE_FOR_sse4_1_blendps:
40072 case CODE_FOR_avx_blendpd256:
40073 case CODE_FOR_avx_vpermilv4df:
40074 case CODE_FOR_avx_vpermilv4df_mask:
40075 case CODE_FOR_avx512f_getmantv8df_mask:
40076 case CODE_FOR_avx512f_getmantv16sf_mask:
40077 case CODE_FOR_avx512vl_getmantv8sf_mask:
40078 case CODE_FOR_avx512vl_getmantv4df_mask:
40079 case CODE_FOR_avx512vl_getmantv4sf_mask:
40080 case CODE_FOR_avx512vl_getmantv2df_mask:
40081 case CODE_FOR_avx512dq_rangepv8df_mask_round:
40082 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
40083 case CODE_FOR_avx512dq_rangepv4df_mask:
40084 case CODE_FOR_avx512dq_rangepv8sf_mask:
40085 case CODE_FOR_avx512dq_rangepv2df_mask:
40086 case CODE_FOR_avx512dq_rangepv4sf_mask:
40087 case CODE_FOR_avx_shufpd256_mask:
40088 error ("the last argument must be a 4-bit immediate");
40089 return const0_rtx;
40090
40091 case CODE_FOR_sha1rnds4:
40092 case CODE_FOR_sse4_1_blendpd:
40093 case CODE_FOR_avx_vpermilv2df:
40094 case CODE_FOR_avx_vpermilv2df_mask:
40095 case CODE_FOR_xop_vpermil2v2df3:
40096 case CODE_FOR_xop_vpermil2v4sf3:
40097 case CODE_FOR_xop_vpermil2v4df3:
40098 case CODE_FOR_xop_vpermil2v8sf3:
40099 case CODE_FOR_avx512f_vinsertf32x4_mask:
40100 case CODE_FOR_avx512f_vinserti32x4_mask:
40101 case CODE_FOR_avx512f_vextractf32x4_mask:
40102 case CODE_FOR_avx512f_vextracti32x4_mask:
40103 case CODE_FOR_sse2_shufpd:
40104 case CODE_FOR_sse2_shufpd_mask:
40105 case CODE_FOR_avx512dq_shuf_f64x2_mask:
40106 case CODE_FOR_avx512dq_shuf_i64x2_mask:
40107 case CODE_FOR_avx512vl_shuf_i32x4_mask:
40108 case CODE_FOR_avx512vl_shuf_f32x4_mask:
40109 error ("the last argument must be a 2-bit immediate");
40110 return const0_rtx;
40111
40112 case CODE_FOR_avx_vextractf128v4df:
40113 case CODE_FOR_avx_vextractf128v8sf:
40114 case CODE_FOR_avx_vextractf128v8si:
40115 case CODE_FOR_avx_vinsertf128v4df:
40116 case CODE_FOR_avx_vinsertf128v8sf:
40117 case CODE_FOR_avx_vinsertf128v8si:
40118 case CODE_FOR_avx512f_vinsertf64x4_mask:
40119 case CODE_FOR_avx512f_vinserti64x4_mask:
40120 case CODE_FOR_avx512f_vextractf64x4_mask:
40121 case CODE_FOR_avx512f_vextracti64x4_mask:
40122 case CODE_FOR_avx512dq_vinsertf32x8_mask:
40123 case CODE_FOR_avx512dq_vinserti32x8_mask:
40124 case CODE_FOR_avx512vl_vinsertv4df:
40125 case CODE_FOR_avx512vl_vinsertv4di:
40126 case CODE_FOR_avx512vl_vinsertv8sf:
40127 case CODE_FOR_avx512vl_vinsertv8si:
40128 error ("the last argument must be a 1-bit immediate");
40129 return const0_rtx;
40130
40131 case CODE_FOR_avx_vmcmpv2df3:
40132 case CODE_FOR_avx_vmcmpv4sf3:
40133 case CODE_FOR_avx_cmpv2df3:
40134 case CODE_FOR_avx_cmpv4sf3:
40135 case CODE_FOR_avx_cmpv4df3:
40136 case CODE_FOR_avx_cmpv8sf3:
40137 case CODE_FOR_avx512f_cmpv8df3_mask:
40138 case CODE_FOR_avx512f_cmpv16sf3_mask:
40139 case CODE_FOR_avx512f_vmcmpv2df3_mask:
40140 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
40141 error ("the last argument must be a 5-bit immediate");
40142 return const0_rtx;
40143
40144 default:
40145 switch (nargs_constant)
40146 {
40147 case 2:
40148 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
40149 (!mask_pos && (nargs - i) == nargs_constant))
40150 {
40151 error ("the next to last argument must be an 8-bit immediate");
40152 break;
40153 }
40154 /* FALLTHRU */
40155 case 1:
40156 error ("the last argument must be an 8-bit immediate");
40157 break;
40158 default:
40159 gcc_unreachable ();
40160 }
40161 return const0_rtx;
40162 }
40163 }
40164 else
40165 {
40166 if (VECTOR_MODE_P (mode))
40167 op = safe_vector_operand (op, mode);
40168
40169 /* If we aren't optimizing, only allow one memory operand to
40170 be generated. */
40171 if (memory_operand (op, mode))
40172 num_memory++;
40173
40174 op = fixup_modeless_constant (op, mode);
40175
40176 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
40177 {
40178 if (optimize || !match || num_memory > 1)
40179 op = copy_to_mode_reg (mode, op);
40180 }
40181 else
40182 {
40183 op = copy_to_reg (op);
40184 op = lowpart_subreg (mode, op, GET_MODE (op));
40185 }
40186 }
40187
40188 args[i].op = op;
40189 args[i].mode = mode;
40190 }
40191
40192 switch (nargs)
40193 {
40194 case 1:
40195 pat = GEN_FCN (icode) (real_target, args[0].op);
40196 break;
40197 case 2:
40198 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
40199 break;
40200 case 3:
40201 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
40202 args[2].op);
40203 break;
40204 case 4:
40205 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
40206 args[2].op, args[3].op);
40207 break;
40208 case 5:
40209 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
40210 args[2].op, args[3].op, args[4].op);
40211 break;
40212 case 6:
40213 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
40214 args[2].op, args[3].op, args[4].op,
40215 args[5].op);
40216 break;
40217 default:
40218 gcc_unreachable ();
40219 }
40220
40221 if (! pat)
40222 return 0;
40223
40224 emit_insn (pat);
40225 return target;
40226 }
40227
40228 /* Transform pattern of following layout:
40229 (parallel [
40230 set (A B)
40231 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
40232 ])
40233 into:
40234 (set (A B))
40235
40236 Or:
40237 (parallel [ A B
40238 ...
40239 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
40240 ...
40241 ])
40242 into:
40243 (parallel [ A B ... ]) */
40244
40245 static rtx
40246 ix86_erase_embedded_rounding (rtx pat)
40247 {
40248 if (GET_CODE (pat) == INSN)
40249 pat = PATTERN (pat);
40250
40251 gcc_assert (GET_CODE (pat) == PARALLEL);
40252
40253 if (XVECLEN (pat, 0) == 2)
40254 {
40255 rtx p0 = XVECEXP (pat, 0, 0);
40256 rtx p1 = XVECEXP (pat, 0, 1);
40257
40258 gcc_assert (GET_CODE (p0) == SET
40259 && GET_CODE (p1) == UNSPEC
40260 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
40261
40262 return p0;
40263 }
40264 else
40265 {
40266 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
40267 int i = 0;
40268 int j = 0;
40269
40270 for (; i < XVECLEN (pat, 0); ++i)
40271 {
40272 rtx elem = XVECEXP (pat, 0, i);
40273 if (GET_CODE (elem) != UNSPEC
40274 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
40275 res [j++] = elem;
40276 }
40277
40278 /* No more than 1 occurence was removed. */
40279 gcc_assert (j >= XVECLEN (pat, 0) - 1);
40280
40281 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
40282 }
40283 }
40284
40285 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
40286 with rounding. */
40287 static rtx
40288 ix86_expand_sse_comi_round (const struct builtin_description *d,
40289 tree exp, rtx target)
40290 {
40291 rtx pat, set_dst;
40292 tree arg0 = CALL_EXPR_ARG (exp, 0);
40293 tree arg1 = CALL_EXPR_ARG (exp, 1);
40294 tree arg2 = CALL_EXPR_ARG (exp, 2);
40295 tree arg3 = CALL_EXPR_ARG (exp, 3);
40296 rtx op0 = expand_normal (arg0);
40297 rtx op1 = expand_normal (arg1);
40298 rtx op2 = expand_normal (arg2);
40299 rtx op3 = expand_normal (arg3);
40300 enum insn_code icode = d->icode;
40301 const struct insn_data_d *insn_p = &insn_data[icode];
40302 machine_mode mode0 = insn_p->operand[0].mode;
40303 machine_mode mode1 = insn_p->operand[1].mode;
40304 enum rtx_code comparison = UNEQ;
40305 bool need_ucomi = false;
40306
40307 /* See avxintrin.h for values. */
40308 enum rtx_code comi_comparisons[32] =
40309 {
40310 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
40311 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
40312 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
40313 };
40314 bool need_ucomi_values[32] =
40315 {
40316 true, false, false, true, true, false, false, true,
40317 true, false, false, true, true, false, false, true,
40318 false, true, true, false, false, true, true, false,
40319 false, true, true, false, false, true, true, false
40320 };
40321
40322 if (!CONST_INT_P (op2))
40323 {
40324 error ("the third argument must be comparison constant");
40325 return const0_rtx;
40326 }
40327 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
40328 {
40329 error ("incorrect comparison mode");
40330 return const0_rtx;
40331 }
40332
40333 if (!insn_p->operand[2].predicate (op3, SImode))
40334 {
40335 error ("incorrect rounding operand");
40336 return const0_rtx;
40337 }
40338
40339 comparison = comi_comparisons[INTVAL (op2)];
40340 need_ucomi = need_ucomi_values[INTVAL (op2)];
40341
40342 if (VECTOR_MODE_P (mode0))
40343 op0 = safe_vector_operand (op0, mode0);
40344 if (VECTOR_MODE_P (mode1))
40345 op1 = safe_vector_operand (op1, mode1);
40346
40347 target = gen_reg_rtx (SImode);
40348 emit_move_insn (target, const0_rtx);
40349 target = gen_rtx_SUBREG (QImode, target, 0);
40350
40351 if ((optimize && !register_operand (op0, mode0))
40352 || !insn_p->operand[0].predicate (op0, mode0))
40353 op0 = copy_to_mode_reg (mode0, op0);
40354 if ((optimize && !register_operand (op1, mode1))
40355 || !insn_p->operand[1].predicate (op1, mode1))
40356 op1 = copy_to_mode_reg (mode1, op1);
40357
40358 if (need_ucomi)
40359 icode = icode == CODE_FOR_sse_comi_round
40360 ? CODE_FOR_sse_ucomi_round
40361 : CODE_FOR_sse2_ucomi_round;
40362
40363 pat = GEN_FCN (icode) (op0, op1, op3);
40364 if (! pat)
40365 return 0;
40366
40367 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
40368 if (INTVAL (op3) == NO_ROUND)
40369 {
40370 pat = ix86_erase_embedded_rounding (pat);
40371 if (! pat)
40372 return 0;
40373
40374 set_dst = SET_DEST (pat);
40375 }
40376 else
40377 {
40378 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
40379 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
40380 }
40381
40382 emit_insn (pat);
40383 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
40384 gen_rtx_fmt_ee (comparison, QImode,
40385 set_dst,
40386 const0_rtx)));
40387
40388 return SUBREG_REG (target);
40389 }
40390
40391 static rtx
40392 ix86_expand_round_builtin (const struct builtin_description *d,
40393 tree exp, rtx target)
40394 {
40395 rtx pat;
40396 unsigned int i, nargs;
40397 struct
40398 {
40399 rtx op;
40400 machine_mode mode;
40401 } args[6];
40402 enum insn_code icode = d->icode;
40403 const struct insn_data_d *insn_p = &insn_data[icode];
40404 machine_mode tmode = insn_p->operand[0].mode;
40405 unsigned int nargs_constant = 0;
40406 unsigned int redundant_embed_rnd = 0;
40407
40408 switch ((enum ix86_builtin_func_type) d->flag)
40409 {
40410 case UINT64_FTYPE_V2DF_INT:
40411 case UINT64_FTYPE_V4SF_INT:
40412 case UINT_FTYPE_V2DF_INT:
40413 case UINT_FTYPE_V4SF_INT:
40414 case INT64_FTYPE_V2DF_INT:
40415 case INT64_FTYPE_V4SF_INT:
40416 case INT_FTYPE_V2DF_INT:
40417 case INT_FTYPE_V4SF_INT:
40418 nargs = 2;
40419 break;
40420 case V4SF_FTYPE_V4SF_UINT_INT:
40421 case V4SF_FTYPE_V4SF_UINT64_INT:
40422 case V2DF_FTYPE_V2DF_UINT64_INT:
40423 case V4SF_FTYPE_V4SF_INT_INT:
40424 case V4SF_FTYPE_V4SF_INT64_INT:
40425 case V2DF_FTYPE_V2DF_INT64_INT:
40426 case V4SF_FTYPE_V4SF_V4SF_INT:
40427 case V2DF_FTYPE_V2DF_V2DF_INT:
40428 case V4SF_FTYPE_V4SF_V2DF_INT:
40429 case V2DF_FTYPE_V2DF_V4SF_INT:
40430 nargs = 3;
40431 break;
40432 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
40433 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
40434 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
40435 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
40436 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
40437 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
40438 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
40439 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
40440 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
40441 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
40442 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
40443 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
40444 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
40445 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
40446 nargs = 4;
40447 break;
40448 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
40449 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
40450 nargs_constant = 2;
40451 nargs = 4;
40452 break;
40453 case INT_FTYPE_V4SF_V4SF_INT_INT:
40454 case INT_FTYPE_V2DF_V2DF_INT_INT:
40455 return ix86_expand_sse_comi_round (d, exp, target);
40456 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
40457 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
40458 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
40459 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
40460 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
40461 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
40462 nargs = 5;
40463 break;
40464 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
40465 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
40466 nargs_constant = 4;
40467 nargs = 5;
40468 break;
40469 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
40470 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
40471 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
40472 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
40473 nargs_constant = 3;
40474 nargs = 5;
40475 break;
40476 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
40477 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
40478 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
40479 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
40480 nargs = 6;
40481 nargs_constant = 4;
40482 break;
40483 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
40484 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
40485 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
40486 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
40487 nargs = 6;
40488 nargs_constant = 3;
40489 break;
40490 default:
40491 gcc_unreachable ();
40492 }
40493 gcc_assert (nargs <= ARRAY_SIZE (args));
40494
40495 if (optimize
40496 || target == 0
40497 || GET_MODE (target) != tmode
40498 || !insn_p->operand[0].predicate (target, tmode))
40499 target = gen_reg_rtx (tmode);
40500
40501 for (i = 0; i < nargs; i++)
40502 {
40503 tree arg = CALL_EXPR_ARG (exp, i);
40504 rtx op = expand_normal (arg);
40505 machine_mode mode = insn_p->operand[i + 1].mode;
40506 bool match = insn_p->operand[i + 1].predicate (op, mode);
40507
40508 if (i == nargs - nargs_constant)
40509 {
40510 if (!match)
40511 {
40512 switch (icode)
40513 {
40514 case CODE_FOR_avx512f_getmantv8df_mask_round:
40515 case CODE_FOR_avx512f_getmantv16sf_mask_round:
40516 case CODE_FOR_avx512f_vgetmantv2df_round:
40517 case CODE_FOR_avx512f_vgetmantv4sf_round:
40518 error ("the immediate argument must be a 4-bit immediate");
40519 return const0_rtx;
40520 case CODE_FOR_avx512f_cmpv8df3_mask_round:
40521 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
40522 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
40523 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
40524 error ("the immediate argument must be a 5-bit immediate");
40525 return const0_rtx;
40526 default:
40527 error ("the immediate argument must be an 8-bit immediate");
40528 return const0_rtx;
40529 }
40530 }
40531 }
40532 else if (i == nargs-1)
40533 {
40534 if (!insn_p->operand[nargs].predicate (op, SImode))
40535 {
40536 error ("incorrect rounding operand");
40537 return const0_rtx;
40538 }
40539
40540 /* If there is no rounding use normal version of the pattern. */
40541 if (INTVAL (op) == NO_ROUND)
40542 redundant_embed_rnd = 1;
40543 }
40544 else
40545 {
40546 if (VECTOR_MODE_P (mode))
40547 op = safe_vector_operand (op, mode);
40548
40549 op = fixup_modeless_constant (op, mode);
40550
40551 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
40552 {
40553 if (optimize || !match)
40554 op = copy_to_mode_reg (mode, op);
40555 }
40556 else
40557 {
40558 op = copy_to_reg (op);
40559 op = lowpart_subreg (mode, op, GET_MODE (op));
40560 }
40561 }
40562
40563 args[i].op = op;
40564 args[i].mode = mode;
40565 }
40566
40567 switch (nargs)
40568 {
40569 case 1:
40570 pat = GEN_FCN (icode) (target, args[0].op);
40571 break;
40572 case 2:
40573 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40574 break;
40575 case 3:
40576 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
40577 args[2].op);
40578 break;
40579 case 4:
40580 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
40581 args[2].op, args[3].op);
40582 break;
40583 case 5:
40584 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
40585 args[2].op, args[3].op, args[4].op);
40586 break;
40587 case 6:
40588 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
40589 args[2].op, args[3].op, args[4].op,
40590 args[5].op);
40591 break;
40592 default:
40593 gcc_unreachable ();
40594 }
40595
40596 if (!pat)
40597 return 0;
40598
40599 if (redundant_embed_rnd)
40600 pat = ix86_erase_embedded_rounding (pat);
40601
40602 emit_insn (pat);
40603 return target;
40604 }
40605
40606 /* Subroutine of ix86_expand_builtin to take care of special insns
40607 with variable number of operands. */
40608
40609 static rtx
40610 ix86_expand_special_args_builtin (const struct builtin_description *d,
40611 tree exp, rtx target)
40612 {
40613 tree arg;
40614 rtx pat, op;
40615 unsigned int i, nargs, arg_adjust, memory;
40616 bool aligned_mem = false;
40617 struct
40618 {
40619 rtx op;
40620 machine_mode mode;
40621 } args[3];
40622 enum insn_code icode = d->icode;
40623 bool last_arg_constant = false;
40624 const struct insn_data_d *insn_p = &insn_data[icode];
40625 machine_mode tmode = insn_p->operand[0].mode;
40626 enum { load, store } klass;
40627
40628 switch ((enum ix86_builtin_func_type) d->flag)
40629 {
40630 case VOID_FTYPE_VOID:
40631 emit_insn (GEN_FCN (icode) (target));
40632 return 0;
40633 case VOID_FTYPE_UINT64:
40634 case VOID_FTYPE_UNSIGNED:
40635 nargs = 0;
40636 klass = store;
40637 memory = 0;
40638 break;
40639
40640 case INT_FTYPE_VOID:
40641 case USHORT_FTYPE_VOID:
40642 case UINT64_FTYPE_VOID:
40643 case UNSIGNED_FTYPE_VOID:
40644 nargs = 0;
40645 klass = load;
40646 memory = 0;
40647 break;
40648 case UINT64_FTYPE_PUNSIGNED:
40649 case V2DI_FTYPE_PV2DI:
40650 case V4DI_FTYPE_PV4DI:
40651 case V32QI_FTYPE_PCCHAR:
40652 case V16QI_FTYPE_PCCHAR:
40653 case V8SF_FTYPE_PCV4SF:
40654 case V8SF_FTYPE_PCFLOAT:
40655 case V4SF_FTYPE_PCFLOAT:
40656 case V4DF_FTYPE_PCV2DF:
40657 case V4DF_FTYPE_PCDOUBLE:
40658 case V2DF_FTYPE_PCDOUBLE:
40659 case VOID_FTYPE_PVOID:
40660 case V8DI_FTYPE_PV8DI:
40661 nargs = 1;
40662 klass = load;
40663 memory = 0;
40664 switch (icode)
40665 {
40666 case CODE_FOR_sse4_1_movntdqa:
40667 case CODE_FOR_avx2_movntdqa:
40668 case CODE_FOR_avx512f_movntdqa:
40669 aligned_mem = true;
40670 break;
40671 default:
40672 break;
40673 }
40674 break;
40675 case VOID_FTYPE_PV2SF_V4SF:
40676 case VOID_FTYPE_PV8DI_V8DI:
40677 case VOID_FTYPE_PV4DI_V4DI:
40678 case VOID_FTYPE_PV2DI_V2DI:
40679 case VOID_FTYPE_PCHAR_V32QI:
40680 case VOID_FTYPE_PCHAR_V16QI:
40681 case VOID_FTYPE_PFLOAT_V16SF:
40682 case VOID_FTYPE_PFLOAT_V8SF:
40683 case VOID_FTYPE_PFLOAT_V4SF:
40684 case VOID_FTYPE_PDOUBLE_V8DF:
40685 case VOID_FTYPE_PDOUBLE_V4DF:
40686 case VOID_FTYPE_PDOUBLE_V2DF:
40687 case VOID_FTYPE_PLONGLONG_LONGLONG:
40688 case VOID_FTYPE_PULONGLONG_ULONGLONG:
40689 case VOID_FTYPE_PINT_INT:
40690 nargs = 1;
40691 klass = store;
40692 /* Reserve memory operand for target. */
40693 memory = ARRAY_SIZE (args);
40694 switch (icode)
40695 {
40696 /* These builtins and instructions require the memory
40697 to be properly aligned. */
40698 case CODE_FOR_avx_movntv4di:
40699 case CODE_FOR_sse2_movntv2di:
40700 case CODE_FOR_avx_movntv8sf:
40701 case CODE_FOR_sse_movntv4sf:
40702 case CODE_FOR_sse4a_vmmovntv4sf:
40703 case CODE_FOR_avx_movntv4df:
40704 case CODE_FOR_sse2_movntv2df:
40705 case CODE_FOR_sse4a_vmmovntv2df:
40706 case CODE_FOR_sse2_movntidi:
40707 case CODE_FOR_sse_movntq:
40708 case CODE_FOR_sse2_movntisi:
40709 case CODE_FOR_avx512f_movntv16sf:
40710 case CODE_FOR_avx512f_movntv8df:
40711 case CODE_FOR_avx512f_movntv8di:
40712 aligned_mem = true;
40713 break;
40714 default:
40715 break;
40716 }
40717 break;
40718 case V4SF_FTYPE_V4SF_PCV2SF:
40719 case V2DF_FTYPE_V2DF_PCDOUBLE:
40720 nargs = 2;
40721 klass = load;
40722 memory = 1;
40723 break;
40724 case V8SF_FTYPE_PCV8SF_V8SI:
40725 case V4DF_FTYPE_PCV4DF_V4DI:
40726 case V4SF_FTYPE_PCV4SF_V4SI:
40727 case V2DF_FTYPE_PCV2DF_V2DI:
40728 case V8SI_FTYPE_PCV8SI_V8SI:
40729 case V4DI_FTYPE_PCV4DI_V4DI:
40730 case V4SI_FTYPE_PCV4SI_V4SI:
40731 case V2DI_FTYPE_PCV2DI_V2DI:
40732 nargs = 2;
40733 klass = load;
40734 memory = 0;
40735 break;
40736 case VOID_FTYPE_PV8DF_V8DF_UQI:
40737 case VOID_FTYPE_PV4DF_V4DF_UQI:
40738 case VOID_FTYPE_PV2DF_V2DF_UQI:
40739 case VOID_FTYPE_PV16SF_V16SF_UHI:
40740 case VOID_FTYPE_PV8SF_V8SF_UQI:
40741 case VOID_FTYPE_PV4SF_V4SF_UQI:
40742 case VOID_FTYPE_PV8DI_V8DI_UQI:
40743 case VOID_FTYPE_PV4DI_V4DI_UQI:
40744 case VOID_FTYPE_PV2DI_V2DI_UQI:
40745 case VOID_FTYPE_PV16SI_V16SI_UHI:
40746 case VOID_FTYPE_PV8SI_V8SI_UQI:
40747 case VOID_FTYPE_PV4SI_V4SI_UQI:
40748 switch (icode)
40749 {
40750 /* These builtins and instructions require the memory
40751 to be properly aligned. */
40752 case CODE_FOR_avx512f_storev16sf_mask:
40753 case CODE_FOR_avx512f_storev16si_mask:
40754 case CODE_FOR_avx512f_storev8df_mask:
40755 case CODE_FOR_avx512f_storev8di_mask:
40756 case CODE_FOR_avx512vl_storev8sf_mask:
40757 case CODE_FOR_avx512vl_storev8si_mask:
40758 case CODE_FOR_avx512vl_storev4df_mask:
40759 case CODE_FOR_avx512vl_storev4di_mask:
40760 case CODE_FOR_avx512vl_storev4sf_mask:
40761 case CODE_FOR_avx512vl_storev4si_mask:
40762 case CODE_FOR_avx512vl_storev2df_mask:
40763 case CODE_FOR_avx512vl_storev2di_mask:
40764 aligned_mem = true;
40765 break;
40766 default:
40767 break;
40768 }
40769 /* FALLTHRU */
40770 case VOID_FTYPE_PV8SF_V8SI_V8SF:
40771 case VOID_FTYPE_PV4DF_V4DI_V4DF:
40772 case VOID_FTYPE_PV4SF_V4SI_V4SF:
40773 case VOID_FTYPE_PV2DF_V2DI_V2DF:
40774 case VOID_FTYPE_PV8SI_V8SI_V8SI:
40775 case VOID_FTYPE_PV4DI_V4DI_V4DI:
40776 case VOID_FTYPE_PV4SI_V4SI_V4SI:
40777 case VOID_FTYPE_PV2DI_V2DI_V2DI:
40778 case VOID_FTYPE_PV8SI_V8DI_UQI:
40779 case VOID_FTYPE_PV8HI_V8DI_UQI:
40780 case VOID_FTYPE_PV16HI_V16SI_UHI:
40781 case VOID_FTYPE_PV16QI_V8DI_UQI:
40782 case VOID_FTYPE_PV16QI_V16SI_UHI:
40783 case VOID_FTYPE_PV4SI_V4DI_UQI:
40784 case VOID_FTYPE_PV4SI_V2DI_UQI:
40785 case VOID_FTYPE_PV8HI_V4DI_UQI:
40786 case VOID_FTYPE_PV8HI_V2DI_UQI:
40787 case VOID_FTYPE_PV8HI_V8SI_UQI:
40788 case VOID_FTYPE_PV8HI_V4SI_UQI:
40789 case VOID_FTYPE_PV16QI_V4DI_UQI:
40790 case VOID_FTYPE_PV16QI_V2DI_UQI:
40791 case VOID_FTYPE_PV16QI_V8SI_UQI:
40792 case VOID_FTYPE_PV16QI_V4SI_UQI:
40793 case VOID_FTYPE_PCHAR_V64QI_UDI:
40794 case VOID_FTYPE_PCHAR_V32QI_USI:
40795 case VOID_FTYPE_PCHAR_V16QI_UHI:
40796 case VOID_FTYPE_PSHORT_V32HI_USI:
40797 case VOID_FTYPE_PSHORT_V16HI_UHI:
40798 case VOID_FTYPE_PSHORT_V8HI_UQI:
40799 case VOID_FTYPE_PINT_V16SI_UHI:
40800 case VOID_FTYPE_PINT_V8SI_UQI:
40801 case VOID_FTYPE_PINT_V4SI_UQI:
40802 case VOID_FTYPE_PINT64_V8DI_UQI:
40803 case VOID_FTYPE_PINT64_V4DI_UQI:
40804 case VOID_FTYPE_PINT64_V2DI_UQI:
40805 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
40806 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
40807 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
40808 case VOID_FTYPE_PFLOAT_V16SF_UHI:
40809 case VOID_FTYPE_PFLOAT_V8SF_UQI:
40810 case VOID_FTYPE_PFLOAT_V4SF_UQI:
40811 nargs = 2;
40812 klass = store;
40813 /* Reserve memory operand for target. */
40814 memory = ARRAY_SIZE (args);
40815 break;
40816 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
40817 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
40818 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
40819 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
40820 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
40821 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
40822 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
40823 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
40824 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
40825 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
40826 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
40827 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
40828 switch (icode)
40829 {
40830 /* These builtins and instructions require the memory
40831 to be properly aligned. */
40832 case CODE_FOR_avx512f_loadv16sf_mask:
40833 case CODE_FOR_avx512f_loadv16si_mask:
40834 case CODE_FOR_avx512f_loadv8df_mask:
40835 case CODE_FOR_avx512f_loadv8di_mask:
40836 case CODE_FOR_avx512vl_loadv8sf_mask:
40837 case CODE_FOR_avx512vl_loadv8si_mask:
40838 case CODE_FOR_avx512vl_loadv4df_mask:
40839 case CODE_FOR_avx512vl_loadv4di_mask:
40840 case CODE_FOR_avx512vl_loadv4sf_mask:
40841 case CODE_FOR_avx512vl_loadv4si_mask:
40842 case CODE_FOR_avx512vl_loadv2df_mask:
40843 case CODE_FOR_avx512vl_loadv2di_mask:
40844 case CODE_FOR_avx512bw_loadv64qi_mask:
40845 case CODE_FOR_avx512vl_loadv32qi_mask:
40846 case CODE_FOR_avx512vl_loadv16qi_mask:
40847 case CODE_FOR_avx512bw_loadv32hi_mask:
40848 case CODE_FOR_avx512vl_loadv16hi_mask:
40849 case CODE_FOR_avx512vl_loadv8hi_mask:
40850 aligned_mem = true;
40851 break;
40852 default:
40853 break;
40854 }
40855 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
40856 case V32QI_FTYPE_PCCHAR_V32QI_USI:
40857 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
40858 case V32HI_FTYPE_PCSHORT_V32HI_USI:
40859 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
40860 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
40861 case V16SI_FTYPE_PCINT_V16SI_UHI:
40862 case V8SI_FTYPE_PCINT_V8SI_UQI:
40863 case V4SI_FTYPE_PCINT_V4SI_UQI:
40864 case V8DI_FTYPE_PCINT64_V8DI_UQI:
40865 case V4DI_FTYPE_PCINT64_V4DI_UQI:
40866 case V2DI_FTYPE_PCINT64_V2DI_UQI:
40867 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
40868 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
40869 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
40870 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
40871 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
40872 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
40873 nargs = 3;
40874 klass = load;
40875 memory = 0;
40876 break;
40877 case VOID_FTYPE_UINT_UINT_UINT:
40878 case VOID_FTYPE_UINT64_UINT_UINT:
40879 case UCHAR_FTYPE_UINT_UINT_UINT:
40880 case UCHAR_FTYPE_UINT64_UINT_UINT:
40881 nargs = 3;
40882 klass = load;
40883 memory = ARRAY_SIZE (args);
40884 last_arg_constant = true;
40885 break;
40886 default:
40887 gcc_unreachable ();
40888 }
40889
40890 gcc_assert (nargs <= ARRAY_SIZE (args));
40891
40892 if (klass == store)
40893 {
40894 arg = CALL_EXPR_ARG (exp, 0);
40895 op = expand_normal (arg);
40896 gcc_assert (target == 0);
40897 if (memory)
40898 {
40899 op = ix86_zero_extend_to_Pmode (op);
40900 target = gen_rtx_MEM (tmode, op);
40901 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
40902 on it. Try to improve it using get_pointer_alignment,
40903 and if the special builtin is one that requires strict
40904 mode alignment, also from it's GET_MODE_ALIGNMENT.
40905 Failure to do so could lead to ix86_legitimate_combined_insn
40906 rejecting all changes to such insns. */
40907 unsigned int align = get_pointer_alignment (arg);
40908 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
40909 align = GET_MODE_ALIGNMENT (tmode);
40910 if (MEM_ALIGN (target) < align)
40911 set_mem_align (target, align);
40912 }
40913 else
40914 target = force_reg (tmode, op);
40915 arg_adjust = 1;
40916 }
40917 else
40918 {
40919 arg_adjust = 0;
40920 if (optimize
40921 || target == 0
40922 || !register_operand (target, tmode)
40923 || GET_MODE (target) != tmode)
40924 target = gen_reg_rtx (tmode);
40925 }
40926
40927 for (i = 0; i < nargs; i++)
40928 {
40929 machine_mode mode = insn_p->operand[i + 1].mode;
40930 bool match;
40931
40932 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
40933 op = expand_normal (arg);
40934 match = insn_p->operand[i + 1].predicate (op, mode);
40935
40936 if (last_arg_constant && (i + 1) == nargs)
40937 {
40938 if (!match)
40939 {
40940 if (icode == CODE_FOR_lwp_lwpvalsi3
40941 || icode == CODE_FOR_lwp_lwpinssi3
40942 || icode == CODE_FOR_lwp_lwpvaldi3
40943 || icode == CODE_FOR_lwp_lwpinsdi3)
40944 error ("the last argument must be a 32-bit immediate");
40945 else
40946 error ("the last argument must be an 8-bit immediate");
40947 return const0_rtx;
40948 }
40949 }
40950 else
40951 {
40952 if (i == memory)
40953 {
40954 /* This must be the memory operand. */
40955 op = ix86_zero_extend_to_Pmode (op);
40956 op = gen_rtx_MEM (mode, op);
40957 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
40958 on it. Try to improve it using get_pointer_alignment,
40959 and if the special builtin is one that requires strict
40960 mode alignment, also from it's GET_MODE_ALIGNMENT.
40961 Failure to do so could lead to ix86_legitimate_combined_insn
40962 rejecting all changes to such insns. */
40963 unsigned int align = get_pointer_alignment (arg);
40964 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
40965 align = GET_MODE_ALIGNMENT (mode);
40966 if (MEM_ALIGN (op) < align)
40967 set_mem_align (op, align);
40968 }
40969 else
40970 {
40971 /* This must be register. */
40972 if (VECTOR_MODE_P (mode))
40973 op = safe_vector_operand (op, mode);
40974
40975 op = fixup_modeless_constant (op, mode);
40976
40977 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
40978 op = copy_to_mode_reg (mode, op);
40979 else
40980 {
40981 op = copy_to_reg (op);
40982 op = lowpart_subreg (mode, op, GET_MODE (op));
40983 }
40984 }
40985 }
40986
40987 args[i].op = op;
40988 args[i].mode = mode;
40989 }
40990
40991 switch (nargs)
40992 {
40993 case 0:
40994 pat = GEN_FCN (icode) (target);
40995 break;
40996 case 1:
40997 pat = GEN_FCN (icode) (target, args[0].op);
40998 break;
40999 case 2:
41000 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
41001 break;
41002 case 3:
41003 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
41004 break;
41005 default:
41006 gcc_unreachable ();
41007 }
41008
41009 if (! pat)
41010 return 0;
41011 emit_insn (pat);
41012 return klass == store ? 0 : target;
41013 }
41014
41015 /* Return the integer constant in ARG. Constrain it to be in the range
41016 of the subparts of VEC_TYPE; issue an error if not. */
41017
41018 static int
41019 get_element_number (tree vec_type, tree arg)
41020 {
41021 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
41022
41023 if (!tree_fits_uhwi_p (arg)
41024 || (elt = tree_to_uhwi (arg), elt > max))
41025 {
41026 error ("selector must be an integer constant in the range 0..%wi", max);
41027 return 0;
41028 }
41029
41030 return elt;
41031 }
41032
41033 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
41034 ix86_expand_vector_init. We DO have language-level syntax for this, in
41035 the form of (type){ init-list }. Except that since we can't place emms
41036 instructions from inside the compiler, we can't allow the use of MMX
41037 registers unless the user explicitly asks for it. So we do *not* define
41038 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
41039 we have builtins invoked by mmintrin.h that gives us license to emit
41040 these sorts of instructions. */
41041
41042 static rtx
41043 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
41044 {
41045 machine_mode tmode = TYPE_MODE (type);
41046 machine_mode inner_mode = GET_MODE_INNER (tmode);
41047 int i, n_elt = GET_MODE_NUNITS (tmode);
41048 rtvec v = rtvec_alloc (n_elt);
41049
41050 gcc_assert (VECTOR_MODE_P (tmode));
41051 gcc_assert (call_expr_nargs (exp) == n_elt);
41052
41053 for (i = 0; i < n_elt; ++i)
41054 {
41055 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
41056 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
41057 }
41058
41059 if (!target || !register_operand (target, tmode))
41060 target = gen_reg_rtx (tmode);
41061
41062 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
41063 return target;
41064 }
41065
41066 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
41067 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
41068 had a language-level syntax for referencing vector elements. */
41069
41070 static rtx
41071 ix86_expand_vec_ext_builtin (tree exp, rtx target)
41072 {
41073 machine_mode tmode, mode0;
41074 tree arg0, arg1;
41075 int elt;
41076 rtx op0;
41077
41078 arg0 = CALL_EXPR_ARG (exp, 0);
41079 arg1 = CALL_EXPR_ARG (exp, 1);
41080
41081 op0 = expand_normal (arg0);
41082 elt = get_element_number (TREE_TYPE (arg0), arg1);
41083
41084 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
41085 mode0 = TYPE_MODE (TREE_TYPE (arg0));
41086 gcc_assert (VECTOR_MODE_P (mode0));
41087
41088 op0 = force_reg (mode0, op0);
41089
41090 if (optimize || !target || !register_operand (target, tmode))
41091 target = gen_reg_rtx (tmode);
41092
41093 ix86_expand_vector_extract (true, target, op0, elt);
41094
41095 return target;
41096 }
41097
41098 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
41099 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
41100 a language-level syntax for referencing vector elements. */
41101
41102 static rtx
41103 ix86_expand_vec_set_builtin (tree exp)
41104 {
41105 machine_mode tmode, mode1;
41106 tree arg0, arg1, arg2;
41107 int elt;
41108 rtx op0, op1, target;
41109
41110 arg0 = CALL_EXPR_ARG (exp, 0);
41111 arg1 = CALL_EXPR_ARG (exp, 1);
41112 arg2 = CALL_EXPR_ARG (exp, 2);
41113
41114 tmode = TYPE_MODE (TREE_TYPE (arg0));
41115 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
41116 gcc_assert (VECTOR_MODE_P (tmode));
41117
41118 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
41119 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
41120 elt = get_element_number (TREE_TYPE (arg0), arg2);
41121
41122 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
41123 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
41124
41125 op0 = force_reg (tmode, op0);
41126 op1 = force_reg (mode1, op1);
41127
41128 /* OP0 is the source of these builtin functions and shouldn't be
41129 modified. Create a copy, use it and return it as target. */
41130 target = gen_reg_rtx (tmode);
41131 emit_move_insn (target, op0);
41132 ix86_expand_vector_set (true, target, op1, elt);
41133
41134 return target;
41135 }
41136
41137 /* Emit conditional move of SRC to DST with condition
41138 OP1 CODE OP2. */
41139 static void
41140 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
41141 {
41142 rtx t;
41143
41144 if (TARGET_CMOVE)
41145 {
41146 t = ix86_expand_compare (code, op1, op2);
41147 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
41148 src, dst)));
41149 }
41150 else
41151 {
41152 rtx_code_label *nomove = gen_label_rtx ();
41153 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
41154 const0_rtx, GET_MODE (op1), 1, nomove);
41155 emit_move_insn (dst, src);
41156 emit_label (nomove);
41157 }
41158 }
41159
41160 /* Choose max of DST and SRC and put it to DST. */
41161 static void
41162 ix86_emit_move_max (rtx dst, rtx src)
41163 {
41164 ix86_emit_cmove (dst, src, LTU, dst, src);
41165 }
41166
41167 /* Expand an expression EXP that calls a built-in function,
41168 with result going to TARGET if that's convenient
41169 (and in mode MODE if that's convenient).
41170 SUBTARGET may be used as the target for computing one of EXP's operands.
41171 IGNORE is nonzero if the value is to be ignored. */
41172
41173 static rtx
41174 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
41175 machine_mode mode, int ignore)
41176 {
41177 const struct builtin_description *d;
41178 size_t i;
41179 enum insn_code icode;
41180 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
41181 tree arg0, arg1, arg2, arg3, arg4;
41182 rtx op0, op1, op2, op3, op4, pat, insn;
41183 machine_mode mode0, mode1, mode2, mode3, mode4;
41184 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
41185
41186 /* For CPU builtins that can be folded, fold first and expand the fold. */
41187 switch (fcode)
41188 {
41189 case IX86_BUILTIN_CPU_INIT:
41190 {
41191 /* Make it call __cpu_indicator_init in libgcc. */
41192 tree call_expr, fndecl, type;
41193 type = build_function_type_list (integer_type_node, NULL_TREE);
41194 fndecl = build_fn_decl ("__cpu_indicator_init", type);
41195 call_expr = build_call_expr (fndecl, 0);
41196 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
41197 }
41198 case IX86_BUILTIN_CPU_IS:
41199 case IX86_BUILTIN_CPU_SUPPORTS:
41200 {
41201 tree arg0 = CALL_EXPR_ARG (exp, 0);
41202 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
41203 gcc_assert (fold_expr != NULL_TREE);
41204 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
41205 }
41206 }
41207
41208 /* Determine whether the builtin function is available under the current ISA.
41209 Originally the builtin was not created if it wasn't applicable to the
41210 current ISA based on the command line switches. With function specific
41211 options, we need to check in the context of the function making the call
41212 whether it is supported. */
41213 if (ix86_builtins_isa[fcode].isa
41214 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
41215 {
41216 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, 0,
41217 NULL, NULL, (enum fpmath_unit) 0,
41218 false);
41219 if (!opts)
41220 error ("%qE needs unknown isa option", fndecl);
41221 else
41222 {
41223 gcc_assert (opts != NULL);
41224 error ("%qE needs isa option %s", fndecl, opts);
41225 free (opts);
41226 }
41227 return const0_rtx;
41228 }
41229
41230 switch (fcode)
41231 {
41232 case IX86_BUILTIN_BNDMK:
41233 if (!target
41234 || GET_MODE (target) != BNDmode
41235 || !register_operand (target, BNDmode))
41236 target = gen_reg_rtx (BNDmode);
41237
41238 arg0 = CALL_EXPR_ARG (exp, 0);
41239 arg1 = CALL_EXPR_ARG (exp, 1);
41240
41241 op0 = expand_normal (arg0);
41242 op1 = expand_normal (arg1);
41243
41244 if (!register_operand (op0, Pmode))
41245 op0 = ix86_zero_extend_to_Pmode (op0);
41246 if (!register_operand (op1, Pmode))
41247 op1 = ix86_zero_extend_to_Pmode (op1);
41248
41249 /* Builtin arg1 is size of block but instruction op1 should
41250 be (size - 1). */
41251 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
41252 NULL_RTX, 1, OPTAB_DIRECT);
41253
41254 emit_insn (BNDmode == BND64mode
41255 ? gen_bnd64_mk (target, op0, op1)
41256 : gen_bnd32_mk (target, op0, op1));
41257 return target;
41258
41259 case IX86_BUILTIN_BNDSTX:
41260 arg0 = CALL_EXPR_ARG (exp, 0);
41261 arg1 = CALL_EXPR_ARG (exp, 1);
41262 arg2 = CALL_EXPR_ARG (exp, 2);
41263
41264 op0 = expand_normal (arg0);
41265 op1 = expand_normal (arg1);
41266 op2 = expand_normal (arg2);
41267
41268 if (!register_operand (op0, Pmode))
41269 op0 = ix86_zero_extend_to_Pmode (op0);
41270 if (!register_operand (op1, BNDmode))
41271 op1 = copy_to_mode_reg (BNDmode, op1);
41272 if (!register_operand (op2, Pmode))
41273 op2 = ix86_zero_extend_to_Pmode (op2);
41274
41275 emit_insn (BNDmode == BND64mode
41276 ? gen_bnd64_stx (op2, op0, op1)
41277 : gen_bnd32_stx (op2, op0, op1));
41278 return 0;
41279
41280 case IX86_BUILTIN_BNDLDX:
41281 if (!target
41282 || GET_MODE (target) != BNDmode
41283 || !register_operand (target, BNDmode))
41284 target = gen_reg_rtx (BNDmode);
41285
41286 arg0 = CALL_EXPR_ARG (exp, 0);
41287 arg1 = CALL_EXPR_ARG (exp, 1);
41288
41289 op0 = expand_normal (arg0);
41290 op1 = expand_normal (arg1);
41291
41292 if (!register_operand (op0, Pmode))
41293 op0 = ix86_zero_extend_to_Pmode (op0);
41294 if (!register_operand (op1, Pmode))
41295 op1 = ix86_zero_extend_to_Pmode (op1);
41296
41297 emit_insn (BNDmode == BND64mode
41298 ? gen_bnd64_ldx (target, op0, op1)
41299 : gen_bnd32_ldx (target, op0, op1));
41300 return target;
41301
41302 case IX86_BUILTIN_BNDCL:
41303 arg0 = CALL_EXPR_ARG (exp, 0);
41304 arg1 = CALL_EXPR_ARG (exp, 1);
41305
41306 op0 = expand_normal (arg0);
41307 op1 = expand_normal (arg1);
41308
41309 if (!register_operand (op0, Pmode))
41310 op0 = ix86_zero_extend_to_Pmode (op0);
41311 if (!register_operand (op1, BNDmode))
41312 op1 = copy_to_mode_reg (BNDmode, op1);
41313
41314 emit_insn (BNDmode == BND64mode
41315 ? gen_bnd64_cl (op1, op0)
41316 : gen_bnd32_cl (op1, op0));
41317 return 0;
41318
41319 case IX86_BUILTIN_BNDCU:
41320 arg0 = CALL_EXPR_ARG (exp, 0);
41321 arg1 = CALL_EXPR_ARG (exp, 1);
41322
41323 op0 = expand_normal (arg0);
41324 op1 = expand_normal (arg1);
41325
41326 if (!register_operand (op0, Pmode))
41327 op0 = ix86_zero_extend_to_Pmode (op0);
41328 if (!register_operand (op1, BNDmode))
41329 op1 = copy_to_mode_reg (BNDmode, op1);
41330
41331 emit_insn (BNDmode == BND64mode
41332 ? gen_bnd64_cu (op1, op0)
41333 : gen_bnd32_cu (op1, op0));
41334 return 0;
41335
41336 case IX86_BUILTIN_BNDRET:
41337 arg0 = CALL_EXPR_ARG (exp, 0);
41338 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
41339 target = chkp_get_rtl_bounds (arg0);
41340
41341 /* If no bounds were specified for returned value,
41342 then use INIT bounds. It usually happens when
41343 some built-in function is expanded. */
41344 if (!target)
41345 {
41346 rtx t1 = gen_reg_rtx (Pmode);
41347 rtx t2 = gen_reg_rtx (Pmode);
41348 target = gen_reg_rtx (BNDmode);
41349 emit_move_insn (t1, const0_rtx);
41350 emit_move_insn (t2, constm1_rtx);
41351 emit_insn (BNDmode == BND64mode
41352 ? gen_bnd64_mk (target, t1, t2)
41353 : gen_bnd32_mk (target, t1, t2));
41354 }
41355
41356 gcc_assert (target && REG_P (target));
41357 return target;
41358
41359 case IX86_BUILTIN_BNDNARROW:
41360 {
41361 rtx m1, m1h1, m1h2, lb, ub, t1;
41362
41363 /* Return value and lb. */
41364 arg0 = CALL_EXPR_ARG (exp, 0);
41365 /* Bounds. */
41366 arg1 = CALL_EXPR_ARG (exp, 1);
41367 /* Size. */
41368 arg2 = CALL_EXPR_ARG (exp, 2);
41369
41370 lb = expand_normal (arg0);
41371 op1 = expand_normal (arg1);
41372 op2 = expand_normal (arg2);
41373
41374 /* Size was passed but we need to use (size - 1) as for bndmk. */
41375 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
41376 NULL_RTX, 1, OPTAB_DIRECT);
41377
41378 /* Add LB to size and inverse to get UB. */
41379 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
41380 op2, 1, OPTAB_DIRECT);
41381 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
41382
41383 if (!register_operand (lb, Pmode))
41384 lb = ix86_zero_extend_to_Pmode (lb);
41385 if (!register_operand (ub, Pmode))
41386 ub = ix86_zero_extend_to_Pmode (ub);
41387
41388 /* We need to move bounds to memory before any computations. */
41389 if (MEM_P (op1))
41390 m1 = op1;
41391 else
41392 {
41393 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
41394 emit_move_insn (m1, op1);
41395 }
41396
41397 /* Generate mem expression to be used for access to LB and UB. */
41398 m1h1 = adjust_address (m1, Pmode, 0);
41399 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
41400
41401 t1 = gen_reg_rtx (Pmode);
41402
41403 /* Compute LB. */
41404 emit_move_insn (t1, m1h1);
41405 ix86_emit_move_max (t1, lb);
41406 emit_move_insn (m1h1, t1);
41407
41408 /* Compute UB. UB is stored in 1's complement form. Therefore
41409 we also use max here. */
41410 emit_move_insn (t1, m1h2);
41411 ix86_emit_move_max (t1, ub);
41412 emit_move_insn (m1h2, t1);
41413
41414 op2 = gen_reg_rtx (BNDmode);
41415 emit_move_insn (op2, m1);
41416
41417 return chkp_join_splitted_slot (lb, op2);
41418 }
41419
41420 case IX86_BUILTIN_BNDINT:
41421 {
41422 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
41423
41424 if (!target
41425 || GET_MODE (target) != BNDmode
41426 || !register_operand (target, BNDmode))
41427 target = gen_reg_rtx (BNDmode);
41428
41429 arg0 = CALL_EXPR_ARG (exp, 0);
41430 arg1 = CALL_EXPR_ARG (exp, 1);
41431
41432 op0 = expand_normal (arg0);
41433 op1 = expand_normal (arg1);
41434
41435 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
41436 rh1 = adjust_address (res, Pmode, 0);
41437 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
41438
41439 /* Put first bounds to temporaries. */
41440 lb1 = gen_reg_rtx (Pmode);
41441 ub1 = gen_reg_rtx (Pmode);
41442 if (MEM_P (op0))
41443 {
41444 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
41445 emit_move_insn (ub1, adjust_address (op0, Pmode,
41446 GET_MODE_SIZE (Pmode)));
41447 }
41448 else
41449 {
41450 emit_move_insn (res, op0);
41451 emit_move_insn (lb1, rh1);
41452 emit_move_insn (ub1, rh2);
41453 }
41454
41455 /* Put second bounds to temporaries. */
41456 lb2 = gen_reg_rtx (Pmode);
41457 ub2 = gen_reg_rtx (Pmode);
41458 if (MEM_P (op1))
41459 {
41460 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
41461 emit_move_insn (ub2, adjust_address (op1, Pmode,
41462 GET_MODE_SIZE (Pmode)));
41463 }
41464 else
41465 {
41466 emit_move_insn (res, op1);
41467 emit_move_insn (lb2, rh1);
41468 emit_move_insn (ub2, rh2);
41469 }
41470
41471 /* Compute LB. */
41472 ix86_emit_move_max (lb1, lb2);
41473 emit_move_insn (rh1, lb1);
41474
41475 /* Compute UB. UB is stored in 1's complement form. Therefore
41476 we also use max here. */
41477 ix86_emit_move_max (ub1, ub2);
41478 emit_move_insn (rh2, ub1);
41479
41480 emit_move_insn (target, res);
41481
41482 return target;
41483 }
41484
41485 case IX86_BUILTIN_SIZEOF:
41486 {
41487 tree name;
41488 rtx symbol;
41489
41490 if (!target
41491 || GET_MODE (target) != Pmode
41492 || !register_operand (target, Pmode))
41493 target = gen_reg_rtx (Pmode);
41494
41495 arg0 = CALL_EXPR_ARG (exp, 0);
41496 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
41497
41498 name = DECL_ASSEMBLER_NAME (arg0);
41499 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
41500
41501 emit_insn (Pmode == SImode
41502 ? gen_move_size_reloc_si (target, symbol)
41503 : gen_move_size_reloc_di (target, symbol));
41504
41505 return target;
41506 }
41507
41508 case IX86_BUILTIN_BNDLOWER:
41509 {
41510 rtx mem, hmem;
41511
41512 if (!target
41513 || GET_MODE (target) != Pmode
41514 || !register_operand (target, Pmode))
41515 target = gen_reg_rtx (Pmode);
41516
41517 arg0 = CALL_EXPR_ARG (exp, 0);
41518 op0 = expand_normal (arg0);
41519
41520 /* We need to move bounds to memory first. */
41521 if (MEM_P (op0))
41522 mem = op0;
41523 else
41524 {
41525 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
41526 emit_move_insn (mem, op0);
41527 }
41528
41529 /* Generate mem expression to access LB and load it. */
41530 hmem = adjust_address (mem, Pmode, 0);
41531 emit_move_insn (target, hmem);
41532
41533 return target;
41534 }
41535
41536 case IX86_BUILTIN_BNDUPPER:
41537 {
41538 rtx mem, hmem, res;
41539
41540 if (!target
41541 || GET_MODE (target) != Pmode
41542 || !register_operand (target, Pmode))
41543 target = gen_reg_rtx (Pmode);
41544
41545 arg0 = CALL_EXPR_ARG (exp, 0);
41546 op0 = expand_normal (arg0);
41547
41548 /* We need to move bounds to memory first. */
41549 if (MEM_P (op0))
41550 mem = op0;
41551 else
41552 {
41553 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
41554 emit_move_insn (mem, op0);
41555 }
41556
41557 /* Generate mem expression to access UB. */
41558 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
41559
41560 /* We need to inverse all bits of UB. */
41561 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
41562
41563 if (res != target)
41564 emit_move_insn (target, res);
41565
41566 return target;
41567 }
41568
41569 case IX86_BUILTIN_MASKMOVQ:
41570 case IX86_BUILTIN_MASKMOVDQU:
41571 icode = (fcode == IX86_BUILTIN_MASKMOVQ
41572 ? CODE_FOR_mmx_maskmovq
41573 : CODE_FOR_sse2_maskmovdqu);
41574 /* Note the arg order is different from the operand order. */
41575 arg1 = CALL_EXPR_ARG (exp, 0);
41576 arg2 = CALL_EXPR_ARG (exp, 1);
41577 arg0 = CALL_EXPR_ARG (exp, 2);
41578 op0 = expand_normal (arg0);
41579 op1 = expand_normal (arg1);
41580 op2 = expand_normal (arg2);
41581 mode0 = insn_data[icode].operand[0].mode;
41582 mode1 = insn_data[icode].operand[1].mode;
41583 mode2 = insn_data[icode].operand[2].mode;
41584
41585 op0 = ix86_zero_extend_to_Pmode (op0);
41586 op0 = gen_rtx_MEM (mode1, op0);
41587
41588 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41589 op0 = copy_to_mode_reg (mode0, op0);
41590 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41591 op1 = copy_to_mode_reg (mode1, op1);
41592 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41593 op2 = copy_to_mode_reg (mode2, op2);
41594 pat = GEN_FCN (icode) (op0, op1, op2);
41595 if (! pat)
41596 return 0;
41597 emit_insn (pat);
41598 return 0;
41599
41600 case IX86_BUILTIN_LDMXCSR:
41601 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
41602 target = assign_386_stack_local (SImode, SLOT_TEMP);
41603 emit_move_insn (target, op0);
41604 emit_insn (gen_sse_ldmxcsr (target));
41605 return 0;
41606
41607 case IX86_BUILTIN_STMXCSR:
41608 target = assign_386_stack_local (SImode, SLOT_TEMP);
41609 emit_insn (gen_sse_stmxcsr (target));
41610 return copy_to_mode_reg (SImode, target);
41611
41612 case IX86_BUILTIN_CLFLUSH:
41613 arg0 = CALL_EXPR_ARG (exp, 0);
41614 op0 = expand_normal (arg0);
41615 icode = CODE_FOR_sse2_clflush;
41616 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41617 op0 = ix86_zero_extend_to_Pmode (op0);
41618
41619 emit_insn (gen_sse2_clflush (op0));
41620 return 0;
41621
41622 case IX86_BUILTIN_CLWB:
41623 arg0 = CALL_EXPR_ARG (exp, 0);
41624 op0 = expand_normal (arg0);
41625 icode = CODE_FOR_clwb;
41626 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41627 op0 = ix86_zero_extend_to_Pmode (op0);
41628
41629 emit_insn (gen_clwb (op0));
41630 return 0;
41631
41632 case IX86_BUILTIN_CLFLUSHOPT:
41633 arg0 = CALL_EXPR_ARG (exp, 0);
41634 op0 = expand_normal (arg0);
41635 icode = CODE_FOR_clflushopt;
41636 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41637 op0 = ix86_zero_extend_to_Pmode (op0);
41638
41639 emit_insn (gen_clflushopt (op0));
41640 return 0;
41641
41642 case IX86_BUILTIN_MONITOR:
41643 case IX86_BUILTIN_MONITORX:
41644 arg0 = CALL_EXPR_ARG (exp, 0);
41645 arg1 = CALL_EXPR_ARG (exp, 1);
41646 arg2 = CALL_EXPR_ARG (exp, 2);
41647 op0 = expand_normal (arg0);
41648 op1 = expand_normal (arg1);
41649 op2 = expand_normal (arg2);
41650 if (!REG_P (op0))
41651 op0 = ix86_zero_extend_to_Pmode (op0);
41652 if (!REG_P (op1))
41653 op1 = copy_to_mode_reg (SImode, op1);
41654 if (!REG_P (op2))
41655 op2 = copy_to_mode_reg (SImode, op2);
41656
41657 emit_insn (fcode == IX86_BUILTIN_MONITOR
41658 ? ix86_gen_monitor (op0, op1, op2)
41659 : ix86_gen_monitorx (op0, op1, op2));
41660 return 0;
41661
41662 case IX86_BUILTIN_MWAIT:
41663 arg0 = CALL_EXPR_ARG (exp, 0);
41664 arg1 = CALL_EXPR_ARG (exp, 1);
41665 op0 = expand_normal (arg0);
41666 op1 = expand_normal (arg1);
41667 if (!REG_P (op0))
41668 op0 = copy_to_mode_reg (SImode, op0);
41669 if (!REG_P (op1))
41670 op1 = copy_to_mode_reg (SImode, op1);
41671 emit_insn (gen_sse3_mwait (op0, op1));
41672 return 0;
41673
41674 case IX86_BUILTIN_MWAITX:
41675 arg0 = CALL_EXPR_ARG (exp, 0);
41676 arg1 = CALL_EXPR_ARG (exp, 1);
41677 arg2 = CALL_EXPR_ARG (exp, 2);
41678 op0 = expand_normal (arg0);
41679 op1 = expand_normal (arg1);
41680 op2 = expand_normal (arg2);
41681 if (!REG_P (op0))
41682 op0 = copy_to_mode_reg (SImode, op0);
41683 if (!REG_P (op1))
41684 op1 = copy_to_mode_reg (SImode, op1);
41685 if (!REG_P (op2))
41686 op2 = copy_to_mode_reg (SImode, op2);
41687 emit_insn (gen_mwaitx (op0, op1, op2));
41688 return 0;
41689
41690 case IX86_BUILTIN_CLZERO:
41691 arg0 = CALL_EXPR_ARG (exp, 0);
41692 op0 = expand_normal (arg0);
41693 if (!REG_P (op0))
41694 op0 = ix86_zero_extend_to_Pmode (op0);
41695 emit_insn (ix86_gen_clzero (op0));
41696 return 0;
41697
41698 case IX86_BUILTIN_VEC_INIT_V2SI:
41699 case IX86_BUILTIN_VEC_INIT_V4HI:
41700 case IX86_BUILTIN_VEC_INIT_V8QI:
41701 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
41702
41703 case IX86_BUILTIN_VEC_EXT_V2DF:
41704 case IX86_BUILTIN_VEC_EXT_V2DI:
41705 case IX86_BUILTIN_VEC_EXT_V4SF:
41706 case IX86_BUILTIN_VEC_EXT_V4SI:
41707 case IX86_BUILTIN_VEC_EXT_V8HI:
41708 case IX86_BUILTIN_VEC_EXT_V2SI:
41709 case IX86_BUILTIN_VEC_EXT_V4HI:
41710 case IX86_BUILTIN_VEC_EXT_V16QI:
41711 return ix86_expand_vec_ext_builtin (exp, target);
41712
41713 case IX86_BUILTIN_VEC_SET_V2DI:
41714 case IX86_BUILTIN_VEC_SET_V4SF:
41715 case IX86_BUILTIN_VEC_SET_V4SI:
41716 case IX86_BUILTIN_VEC_SET_V8HI:
41717 case IX86_BUILTIN_VEC_SET_V4HI:
41718 case IX86_BUILTIN_VEC_SET_V16QI:
41719 return ix86_expand_vec_set_builtin (exp);
41720
41721 case IX86_BUILTIN_INFQ:
41722 case IX86_BUILTIN_HUGE_VALQ:
41723 {
41724 REAL_VALUE_TYPE inf;
41725 rtx tmp;
41726
41727 real_inf (&inf);
41728 tmp = const_double_from_real_value (inf, mode);
41729
41730 tmp = validize_mem (force_const_mem (mode, tmp));
41731
41732 if (target == 0)
41733 target = gen_reg_rtx (mode);
41734
41735 emit_move_insn (target, tmp);
41736 return target;
41737 }
41738
41739 case IX86_BUILTIN_NANQ:
41740 case IX86_BUILTIN_NANSQ:
41741 return expand_call (exp, target, ignore);
41742
41743 case IX86_BUILTIN_RDPMC:
41744 case IX86_BUILTIN_RDTSC:
41745 case IX86_BUILTIN_RDTSCP:
41746
41747 op0 = gen_reg_rtx (DImode);
41748 op1 = gen_reg_rtx (DImode);
41749
41750 if (fcode == IX86_BUILTIN_RDPMC)
41751 {
41752 arg0 = CALL_EXPR_ARG (exp, 0);
41753 op2 = expand_normal (arg0);
41754 if (!register_operand (op2, SImode))
41755 op2 = copy_to_mode_reg (SImode, op2);
41756
41757 insn = (TARGET_64BIT
41758 ? gen_rdpmc_rex64 (op0, op1, op2)
41759 : gen_rdpmc (op0, op2));
41760 emit_insn (insn);
41761 }
41762 else if (fcode == IX86_BUILTIN_RDTSC)
41763 {
41764 insn = (TARGET_64BIT
41765 ? gen_rdtsc_rex64 (op0, op1)
41766 : gen_rdtsc (op0));
41767 emit_insn (insn);
41768 }
41769 else
41770 {
41771 op2 = gen_reg_rtx (SImode);
41772
41773 insn = (TARGET_64BIT
41774 ? gen_rdtscp_rex64 (op0, op1, op2)
41775 : gen_rdtscp (op0, op2));
41776 emit_insn (insn);
41777
41778 arg0 = CALL_EXPR_ARG (exp, 0);
41779 op4 = expand_normal (arg0);
41780 if (!address_operand (op4, VOIDmode))
41781 {
41782 op4 = convert_memory_address (Pmode, op4);
41783 op4 = copy_addr_to_reg (op4);
41784 }
41785 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
41786 }
41787
41788 if (target == 0)
41789 {
41790 /* mode is VOIDmode if __builtin_rd* has been called
41791 without lhs. */
41792 if (mode == VOIDmode)
41793 return target;
41794 target = gen_reg_rtx (mode);
41795 }
41796
41797 if (TARGET_64BIT)
41798 {
41799 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
41800 op1, 1, OPTAB_DIRECT);
41801 op0 = expand_simple_binop (DImode, IOR, op0, op1,
41802 op0, 1, OPTAB_DIRECT);
41803 }
41804
41805 emit_move_insn (target, op0);
41806 return target;
41807
41808 case IX86_BUILTIN_FXSAVE:
41809 case IX86_BUILTIN_FXRSTOR:
41810 case IX86_BUILTIN_FXSAVE64:
41811 case IX86_BUILTIN_FXRSTOR64:
41812 case IX86_BUILTIN_FNSTENV:
41813 case IX86_BUILTIN_FLDENV:
41814 mode0 = BLKmode;
41815 switch (fcode)
41816 {
41817 case IX86_BUILTIN_FXSAVE:
41818 icode = CODE_FOR_fxsave;
41819 break;
41820 case IX86_BUILTIN_FXRSTOR:
41821 icode = CODE_FOR_fxrstor;
41822 break;
41823 case IX86_BUILTIN_FXSAVE64:
41824 icode = CODE_FOR_fxsave64;
41825 break;
41826 case IX86_BUILTIN_FXRSTOR64:
41827 icode = CODE_FOR_fxrstor64;
41828 break;
41829 case IX86_BUILTIN_FNSTENV:
41830 icode = CODE_FOR_fnstenv;
41831 break;
41832 case IX86_BUILTIN_FLDENV:
41833 icode = CODE_FOR_fldenv;
41834 break;
41835 default:
41836 gcc_unreachable ();
41837 }
41838
41839 arg0 = CALL_EXPR_ARG (exp, 0);
41840 op0 = expand_normal (arg0);
41841
41842 if (!address_operand (op0, VOIDmode))
41843 {
41844 op0 = convert_memory_address (Pmode, op0);
41845 op0 = copy_addr_to_reg (op0);
41846 }
41847 op0 = gen_rtx_MEM (mode0, op0);
41848
41849 pat = GEN_FCN (icode) (op0);
41850 if (pat)
41851 emit_insn (pat);
41852 return 0;
41853
41854 case IX86_BUILTIN_XSAVE:
41855 case IX86_BUILTIN_XRSTOR:
41856 case IX86_BUILTIN_XSAVE64:
41857 case IX86_BUILTIN_XRSTOR64:
41858 case IX86_BUILTIN_XSAVEOPT:
41859 case IX86_BUILTIN_XSAVEOPT64:
41860 case IX86_BUILTIN_XSAVES:
41861 case IX86_BUILTIN_XRSTORS:
41862 case IX86_BUILTIN_XSAVES64:
41863 case IX86_BUILTIN_XRSTORS64:
41864 case IX86_BUILTIN_XSAVEC:
41865 case IX86_BUILTIN_XSAVEC64:
41866 arg0 = CALL_EXPR_ARG (exp, 0);
41867 arg1 = CALL_EXPR_ARG (exp, 1);
41868 op0 = expand_normal (arg0);
41869 op1 = expand_normal (arg1);
41870
41871 if (!address_operand (op0, VOIDmode))
41872 {
41873 op0 = convert_memory_address (Pmode, op0);
41874 op0 = copy_addr_to_reg (op0);
41875 }
41876 op0 = gen_rtx_MEM (BLKmode, op0);
41877
41878 op1 = force_reg (DImode, op1);
41879
41880 if (TARGET_64BIT)
41881 {
41882 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
41883 NULL, 1, OPTAB_DIRECT);
41884 switch (fcode)
41885 {
41886 case IX86_BUILTIN_XSAVE:
41887 icode = CODE_FOR_xsave_rex64;
41888 break;
41889 case IX86_BUILTIN_XRSTOR:
41890 icode = CODE_FOR_xrstor_rex64;
41891 break;
41892 case IX86_BUILTIN_XSAVE64:
41893 icode = CODE_FOR_xsave64;
41894 break;
41895 case IX86_BUILTIN_XRSTOR64:
41896 icode = CODE_FOR_xrstor64;
41897 break;
41898 case IX86_BUILTIN_XSAVEOPT:
41899 icode = CODE_FOR_xsaveopt_rex64;
41900 break;
41901 case IX86_BUILTIN_XSAVEOPT64:
41902 icode = CODE_FOR_xsaveopt64;
41903 break;
41904 case IX86_BUILTIN_XSAVES:
41905 icode = CODE_FOR_xsaves_rex64;
41906 break;
41907 case IX86_BUILTIN_XRSTORS:
41908 icode = CODE_FOR_xrstors_rex64;
41909 break;
41910 case IX86_BUILTIN_XSAVES64:
41911 icode = CODE_FOR_xsaves64;
41912 break;
41913 case IX86_BUILTIN_XRSTORS64:
41914 icode = CODE_FOR_xrstors64;
41915 break;
41916 case IX86_BUILTIN_XSAVEC:
41917 icode = CODE_FOR_xsavec_rex64;
41918 break;
41919 case IX86_BUILTIN_XSAVEC64:
41920 icode = CODE_FOR_xsavec64;
41921 break;
41922 default:
41923 gcc_unreachable ();
41924 }
41925
41926 op2 = gen_lowpart (SImode, op2);
41927 op1 = gen_lowpart (SImode, op1);
41928 pat = GEN_FCN (icode) (op0, op1, op2);
41929 }
41930 else
41931 {
41932 switch (fcode)
41933 {
41934 case IX86_BUILTIN_XSAVE:
41935 icode = CODE_FOR_xsave;
41936 break;
41937 case IX86_BUILTIN_XRSTOR:
41938 icode = CODE_FOR_xrstor;
41939 break;
41940 case IX86_BUILTIN_XSAVEOPT:
41941 icode = CODE_FOR_xsaveopt;
41942 break;
41943 case IX86_BUILTIN_XSAVES:
41944 icode = CODE_FOR_xsaves;
41945 break;
41946 case IX86_BUILTIN_XRSTORS:
41947 icode = CODE_FOR_xrstors;
41948 break;
41949 case IX86_BUILTIN_XSAVEC:
41950 icode = CODE_FOR_xsavec;
41951 break;
41952 default:
41953 gcc_unreachable ();
41954 }
41955 pat = GEN_FCN (icode) (op0, op1);
41956 }
41957
41958 if (pat)
41959 emit_insn (pat);
41960 return 0;
41961
41962 case IX86_BUILTIN_LLWPCB:
41963 arg0 = CALL_EXPR_ARG (exp, 0);
41964 op0 = expand_normal (arg0);
41965 icode = CODE_FOR_lwp_llwpcb;
41966 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41967 op0 = ix86_zero_extend_to_Pmode (op0);
41968 emit_insn (gen_lwp_llwpcb (op0));
41969 return 0;
41970
41971 case IX86_BUILTIN_SLWPCB:
41972 icode = CODE_FOR_lwp_slwpcb;
41973 if (!target
41974 || !insn_data[icode].operand[0].predicate (target, Pmode))
41975 target = gen_reg_rtx (Pmode);
41976 emit_insn (gen_lwp_slwpcb (target));
41977 return target;
41978
41979 case IX86_BUILTIN_BEXTRI32:
41980 case IX86_BUILTIN_BEXTRI64:
41981 arg0 = CALL_EXPR_ARG (exp, 0);
41982 arg1 = CALL_EXPR_ARG (exp, 1);
41983 op0 = expand_normal (arg0);
41984 op1 = expand_normal (arg1);
41985 icode = (fcode == IX86_BUILTIN_BEXTRI32
41986 ? CODE_FOR_tbm_bextri_si
41987 : CODE_FOR_tbm_bextri_di);
41988 if (!CONST_INT_P (op1))
41989 {
41990 error ("last argument must be an immediate");
41991 return const0_rtx;
41992 }
41993 else
41994 {
41995 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
41996 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41997 op1 = GEN_INT (length);
41998 op2 = GEN_INT (lsb_index);
41999 pat = GEN_FCN (icode) (target, op0, op1, op2);
42000 if (pat)
42001 emit_insn (pat);
42002 return target;
42003 }
42004
42005 case IX86_BUILTIN_RDRAND16_STEP:
42006 icode = CODE_FOR_rdrandhi_1;
42007 mode0 = HImode;
42008 goto rdrand_step;
42009
42010 case IX86_BUILTIN_RDRAND32_STEP:
42011 icode = CODE_FOR_rdrandsi_1;
42012 mode0 = SImode;
42013 goto rdrand_step;
42014
42015 case IX86_BUILTIN_RDRAND64_STEP:
42016 icode = CODE_FOR_rdranddi_1;
42017 mode0 = DImode;
42018
42019 rdrand_step:
42020 op0 = gen_reg_rtx (mode0);
42021 emit_insn (GEN_FCN (icode) (op0));
42022
42023 arg0 = CALL_EXPR_ARG (exp, 0);
42024 op1 = expand_normal (arg0);
42025 if (!address_operand (op1, VOIDmode))
42026 {
42027 op1 = convert_memory_address (Pmode, op1);
42028 op1 = copy_addr_to_reg (op1);
42029 }
42030 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
42031
42032 op1 = gen_reg_rtx (SImode);
42033 emit_move_insn (op1, CONST1_RTX (SImode));
42034
42035 /* Emit SImode conditional move. */
42036 if (mode0 == HImode)
42037 {
42038 op2 = gen_reg_rtx (SImode);
42039 emit_insn (gen_zero_extendhisi2 (op2, op0));
42040 }
42041 else if (mode0 == SImode)
42042 op2 = op0;
42043 else
42044 op2 = gen_rtx_SUBREG (SImode, op0, 0);
42045
42046 if (target == 0
42047 || !register_operand (target, SImode))
42048 target = gen_reg_rtx (SImode);
42049
42050 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
42051 const0_rtx);
42052 emit_insn (gen_rtx_SET (target,
42053 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
42054 return target;
42055
42056 case IX86_BUILTIN_RDSEED16_STEP:
42057 icode = CODE_FOR_rdseedhi_1;
42058 mode0 = HImode;
42059 goto rdseed_step;
42060
42061 case IX86_BUILTIN_RDSEED32_STEP:
42062 icode = CODE_FOR_rdseedsi_1;
42063 mode0 = SImode;
42064 goto rdseed_step;
42065
42066 case IX86_BUILTIN_RDSEED64_STEP:
42067 icode = CODE_FOR_rdseeddi_1;
42068 mode0 = DImode;
42069
42070 rdseed_step:
42071 op0 = gen_reg_rtx (mode0);
42072 emit_insn (GEN_FCN (icode) (op0));
42073
42074 arg0 = CALL_EXPR_ARG (exp, 0);
42075 op1 = expand_normal (arg0);
42076 if (!address_operand (op1, VOIDmode))
42077 {
42078 op1 = convert_memory_address (Pmode, op1);
42079 op1 = copy_addr_to_reg (op1);
42080 }
42081 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
42082
42083 op2 = gen_reg_rtx (QImode);
42084
42085 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
42086 const0_rtx);
42087 emit_insn (gen_rtx_SET (op2, pat));
42088
42089 if (target == 0
42090 || !register_operand (target, SImode))
42091 target = gen_reg_rtx (SImode);
42092
42093 emit_insn (gen_zero_extendqisi2 (target, op2));
42094 return target;
42095
42096 case IX86_BUILTIN_SBB32:
42097 icode = CODE_FOR_subborrowsi;
42098 mode0 = SImode;
42099 goto handlecarry;
42100
42101 case IX86_BUILTIN_SBB64:
42102 icode = CODE_FOR_subborrowdi;
42103 mode0 = DImode;
42104 goto handlecarry;
42105
42106 case IX86_BUILTIN_ADDCARRYX32:
42107 icode = CODE_FOR_addcarrysi;
42108 mode0 = SImode;
42109 goto handlecarry;
42110
42111 case IX86_BUILTIN_ADDCARRYX64:
42112 icode = CODE_FOR_addcarrydi;
42113 mode0 = DImode;
42114
42115 handlecarry:
42116 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
42117 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
42118 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
42119 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
42120
42121 op1 = expand_normal (arg0);
42122 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
42123
42124 op2 = expand_normal (arg1);
42125 if (!register_operand (op2, mode0))
42126 op2 = copy_to_mode_reg (mode0, op2);
42127
42128 op3 = expand_normal (arg2);
42129 if (!register_operand (op3, mode0))
42130 op3 = copy_to_mode_reg (mode0, op3);
42131
42132 op4 = expand_normal (arg3);
42133 if (!address_operand (op4, VOIDmode))
42134 {
42135 op4 = convert_memory_address (Pmode, op4);
42136 op4 = copy_addr_to_reg (op4);
42137 }
42138
42139 /* Generate CF from input operand. */
42140 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
42141
42142 /* Generate instruction that consumes CF. */
42143 op0 = gen_reg_rtx (mode0);
42144
42145 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
42146 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
42147 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
42148
42149 /* Return current CF value. */
42150 if (target == 0)
42151 target = gen_reg_rtx (QImode);
42152
42153 PUT_MODE (pat, QImode);
42154 emit_insn (gen_rtx_SET (target, pat));
42155
42156 /* Store the result. */
42157 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
42158
42159 return target;
42160
42161 case IX86_BUILTIN_READ_FLAGS:
42162 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
42163
42164 if (optimize
42165 || target == NULL_RTX
42166 || !nonimmediate_operand (target, word_mode)
42167 || GET_MODE (target) != word_mode)
42168 target = gen_reg_rtx (word_mode);
42169
42170 emit_insn (gen_pop (target));
42171 return target;
42172
42173 case IX86_BUILTIN_WRITE_FLAGS:
42174
42175 arg0 = CALL_EXPR_ARG (exp, 0);
42176 op0 = expand_normal (arg0);
42177 if (!general_no_elim_operand (op0, word_mode))
42178 op0 = copy_to_mode_reg (word_mode, op0);
42179
42180 emit_insn (gen_push (op0));
42181 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
42182 return 0;
42183
42184 case IX86_BUILTIN_KORTESTC16:
42185 icode = CODE_FOR_kortestchi;
42186 mode0 = HImode;
42187 mode1 = CCCmode;
42188 goto kortest;
42189
42190 case IX86_BUILTIN_KORTESTZ16:
42191 icode = CODE_FOR_kortestzhi;
42192 mode0 = HImode;
42193 mode1 = CCZmode;
42194
42195 kortest:
42196 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
42197 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
42198 op0 = expand_normal (arg0);
42199 op1 = expand_normal (arg1);
42200
42201 op0 = copy_to_reg (op0);
42202 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
42203 op1 = copy_to_reg (op1);
42204 op1 = lowpart_subreg (mode0, op1, GET_MODE (op1));
42205
42206 target = gen_reg_rtx (QImode);
42207 emit_insn (gen_rtx_SET (target, const0_rtx));
42208
42209 /* Emit kortest. */
42210 emit_insn (GEN_FCN (icode) (op0, op1));
42211 /* And use setcc to return result from flags. */
42212 ix86_expand_setcc (target, EQ,
42213 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
42214 return target;
42215
42216 case IX86_BUILTIN_GATHERSIV2DF:
42217 icode = CODE_FOR_avx2_gathersiv2df;
42218 goto gather_gen;
42219 case IX86_BUILTIN_GATHERSIV4DF:
42220 icode = CODE_FOR_avx2_gathersiv4df;
42221 goto gather_gen;
42222 case IX86_BUILTIN_GATHERDIV2DF:
42223 icode = CODE_FOR_avx2_gatherdiv2df;
42224 goto gather_gen;
42225 case IX86_BUILTIN_GATHERDIV4DF:
42226 icode = CODE_FOR_avx2_gatherdiv4df;
42227 goto gather_gen;
42228 case IX86_BUILTIN_GATHERSIV4SF:
42229 icode = CODE_FOR_avx2_gathersiv4sf;
42230 goto gather_gen;
42231 case IX86_BUILTIN_GATHERSIV8SF:
42232 icode = CODE_FOR_avx2_gathersiv8sf;
42233 goto gather_gen;
42234 case IX86_BUILTIN_GATHERDIV4SF:
42235 icode = CODE_FOR_avx2_gatherdiv4sf;
42236 goto gather_gen;
42237 case IX86_BUILTIN_GATHERDIV8SF:
42238 icode = CODE_FOR_avx2_gatherdiv8sf;
42239 goto gather_gen;
42240 case IX86_BUILTIN_GATHERSIV2DI:
42241 icode = CODE_FOR_avx2_gathersiv2di;
42242 goto gather_gen;
42243 case IX86_BUILTIN_GATHERSIV4DI:
42244 icode = CODE_FOR_avx2_gathersiv4di;
42245 goto gather_gen;
42246 case IX86_BUILTIN_GATHERDIV2DI:
42247 icode = CODE_FOR_avx2_gatherdiv2di;
42248 goto gather_gen;
42249 case IX86_BUILTIN_GATHERDIV4DI:
42250 icode = CODE_FOR_avx2_gatherdiv4di;
42251 goto gather_gen;
42252 case IX86_BUILTIN_GATHERSIV4SI:
42253 icode = CODE_FOR_avx2_gathersiv4si;
42254 goto gather_gen;
42255 case IX86_BUILTIN_GATHERSIV8SI:
42256 icode = CODE_FOR_avx2_gathersiv8si;
42257 goto gather_gen;
42258 case IX86_BUILTIN_GATHERDIV4SI:
42259 icode = CODE_FOR_avx2_gatherdiv4si;
42260 goto gather_gen;
42261 case IX86_BUILTIN_GATHERDIV8SI:
42262 icode = CODE_FOR_avx2_gatherdiv8si;
42263 goto gather_gen;
42264 case IX86_BUILTIN_GATHERALTSIV4DF:
42265 icode = CODE_FOR_avx2_gathersiv4df;
42266 goto gather_gen;
42267 case IX86_BUILTIN_GATHERALTDIV8SF:
42268 icode = CODE_FOR_avx2_gatherdiv8sf;
42269 goto gather_gen;
42270 case IX86_BUILTIN_GATHERALTSIV4DI:
42271 icode = CODE_FOR_avx2_gathersiv4di;
42272 goto gather_gen;
42273 case IX86_BUILTIN_GATHERALTDIV8SI:
42274 icode = CODE_FOR_avx2_gatherdiv8si;
42275 goto gather_gen;
42276 case IX86_BUILTIN_GATHER3SIV16SF:
42277 icode = CODE_FOR_avx512f_gathersiv16sf;
42278 goto gather_gen;
42279 case IX86_BUILTIN_GATHER3SIV8DF:
42280 icode = CODE_FOR_avx512f_gathersiv8df;
42281 goto gather_gen;
42282 case IX86_BUILTIN_GATHER3DIV16SF:
42283 icode = CODE_FOR_avx512f_gatherdiv16sf;
42284 goto gather_gen;
42285 case IX86_BUILTIN_GATHER3DIV8DF:
42286 icode = CODE_FOR_avx512f_gatherdiv8df;
42287 goto gather_gen;
42288 case IX86_BUILTIN_GATHER3SIV16SI:
42289 icode = CODE_FOR_avx512f_gathersiv16si;
42290 goto gather_gen;
42291 case IX86_BUILTIN_GATHER3SIV8DI:
42292 icode = CODE_FOR_avx512f_gathersiv8di;
42293 goto gather_gen;
42294 case IX86_BUILTIN_GATHER3DIV16SI:
42295 icode = CODE_FOR_avx512f_gatherdiv16si;
42296 goto gather_gen;
42297 case IX86_BUILTIN_GATHER3DIV8DI:
42298 icode = CODE_FOR_avx512f_gatherdiv8di;
42299 goto gather_gen;
42300 case IX86_BUILTIN_GATHER3ALTSIV8DF:
42301 icode = CODE_FOR_avx512f_gathersiv8df;
42302 goto gather_gen;
42303 case IX86_BUILTIN_GATHER3ALTDIV16SF:
42304 icode = CODE_FOR_avx512f_gatherdiv16sf;
42305 goto gather_gen;
42306 case IX86_BUILTIN_GATHER3ALTSIV8DI:
42307 icode = CODE_FOR_avx512f_gathersiv8di;
42308 goto gather_gen;
42309 case IX86_BUILTIN_GATHER3ALTDIV16SI:
42310 icode = CODE_FOR_avx512f_gatherdiv16si;
42311 goto gather_gen;
42312 case IX86_BUILTIN_GATHER3SIV2DF:
42313 icode = CODE_FOR_avx512vl_gathersiv2df;
42314 goto gather_gen;
42315 case IX86_BUILTIN_GATHER3SIV4DF:
42316 icode = CODE_FOR_avx512vl_gathersiv4df;
42317 goto gather_gen;
42318 case IX86_BUILTIN_GATHER3DIV2DF:
42319 icode = CODE_FOR_avx512vl_gatherdiv2df;
42320 goto gather_gen;
42321 case IX86_BUILTIN_GATHER3DIV4DF:
42322 icode = CODE_FOR_avx512vl_gatherdiv4df;
42323 goto gather_gen;
42324 case IX86_BUILTIN_GATHER3SIV4SF:
42325 icode = CODE_FOR_avx512vl_gathersiv4sf;
42326 goto gather_gen;
42327 case IX86_BUILTIN_GATHER3SIV8SF:
42328 icode = CODE_FOR_avx512vl_gathersiv8sf;
42329 goto gather_gen;
42330 case IX86_BUILTIN_GATHER3DIV4SF:
42331 icode = CODE_FOR_avx512vl_gatherdiv4sf;
42332 goto gather_gen;
42333 case IX86_BUILTIN_GATHER3DIV8SF:
42334 icode = CODE_FOR_avx512vl_gatherdiv8sf;
42335 goto gather_gen;
42336 case IX86_BUILTIN_GATHER3SIV2DI:
42337 icode = CODE_FOR_avx512vl_gathersiv2di;
42338 goto gather_gen;
42339 case IX86_BUILTIN_GATHER3SIV4DI:
42340 icode = CODE_FOR_avx512vl_gathersiv4di;
42341 goto gather_gen;
42342 case IX86_BUILTIN_GATHER3DIV2DI:
42343 icode = CODE_FOR_avx512vl_gatherdiv2di;
42344 goto gather_gen;
42345 case IX86_BUILTIN_GATHER3DIV4DI:
42346 icode = CODE_FOR_avx512vl_gatherdiv4di;
42347 goto gather_gen;
42348 case IX86_BUILTIN_GATHER3SIV4SI:
42349 icode = CODE_FOR_avx512vl_gathersiv4si;
42350 goto gather_gen;
42351 case IX86_BUILTIN_GATHER3SIV8SI:
42352 icode = CODE_FOR_avx512vl_gathersiv8si;
42353 goto gather_gen;
42354 case IX86_BUILTIN_GATHER3DIV4SI:
42355 icode = CODE_FOR_avx512vl_gatherdiv4si;
42356 goto gather_gen;
42357 case IX86_BUILTIN_GATHER3DIV8SI:
42358 icode = CODE_FOR_avx512vl_gatherdiv8si;
42359 goto gather_gen;
42360 case IX86_BUILTIN_GATHER3ALTSIV4DF:
42361 icode = CODE_FOR_avx512vl_gathersiv4df;
42362 goto gather_gen;
42363 case IX86_BUILTIN_GATHER3ALTDIV8SF:
42364 icode = CODE_FOR_avx512vl_gatherdiv8sf;
42365 goto gather_gen;
42366 case IX86_BUILTIN_GATHER3ALTSIV4DI:
42367 icode = CODE_FOR_avx512vl_gathersiv4di;
42368 goto gather_gen;
42369 case IX86_BUILTIN_GATHER3ALTDIV8SI:
42370 icode = CODE_FOR_avx512vl_gatherdiv8si;
42371 goto gather_gen;
42372 case IX86_BUILTIN_SCATTERSIV16SF:
42373 icode = CODE_FOR_avx512f_scattersiv16sf;
42374 goto scatter_gen;
42375 case IX86_BUILTIN_SCATTERSIV8DF:
42376 icode = CODE_FOR_avx512f_scattersiv8df;
42377 goto scatter_gen;
42378 case IX86_BUILTIN_SCATTERDIV16SF:
42379 icode = CODE_FOR_avx512f_scatterdiv16sf;
42380 goto scatter_gen;
42381 case IX86_BUILTIN_SCATTERDIV8DF:
42382 icode = CODE_FOR_avx512f_scatterdiv8df;
42383 goto scatter_gen;
42384 case IX86_BUILTIN_SCATTERSIV16SI:
42385 icode = CODE_FOR_avx512f_scattersiv16si;
42386 goto scatter_gen;
42387 case IX86_BUILTIN_SCATTERSIV8DI:
42388 icode = CODE_FOR_avx512f_scattersiv8di;
42389 goto scatter_gen;
42390 case IX86_BUILTIN_SCATTERDIV16SI:
42391 icode = CODE_FOR_avx512f_scatterdiv16si;
42392 goto scatter_gen;
42393 case IX86_BUILTIN_SCATTERDIV8DI:
42394 icode = CODE_FOR_avx512f_scatterdiv8di;
42395 goto scatter_gen;
42396 case IX86_BUILTIN_SCATTERSIV8SF:
42397 icode = CODE_FOR_avx512vl_scattersiv8sf;
42398 goto scatter_gen;
42399 case IX86_BUILTIN_SCATTERSIV4SF:
42400 icode = CODE_FOR_avx512vl_scattersiv4sf;
42401 goto scatter_gen;
42402 case IX86_BUILTIN_SCATTERSIV4DF:
42403 icode = CODE_FOR_avx512vl_scattersiv4df;
42404 goto scatter_gen;
42405 case IX86_BUILTIN_SCATTERSIV2DF:
42406 icode = CODE_FOR_avx512vl_scattersiv2df;
42407 goto scatter_gen;
42408 case IX86_BUILTIN_SCATTERDIV8SF:
42409 icode = CODE_FOR_avx512vl_scatterdiv8sf;
42410 goto scatter_gen;
42411 case IX86_BUILTIN_SCATTERDIV4SF:
42412 icode = CODE_FOR_avx512vl_scatterdiv4sf;
42413 goto scatter_gen;
42414 case IX86_BUILTIN_SCATTERDIV4DF:
42415 icode = CODE_FOR_avx512vl_scatterdiv4df;
42416 goto scatter_gen;
42417 case IX86_BUILTIN_SCATTERDIV2DF:
42418 icode = CODE_FOR_avx512vl_scatterdiv2df;
42419 goto scatter_gen;
42420 case IX86_BUILTIN_SCATTERSIV8SI:
42421 icode = CODE_FOR_avx512vl_scattersiv8si;
42422 goto scatter_gen;
42423 case IX86_BUILTIN_SCATTERSIV4SI:
42424 icode = CODE_FOR_avx512vl_scattersiv4si;
42425 goto scatter_gen;
42426 case IX86_BUILTIN_SCATTERSIV4DI:
42427 icode = CODE_FOR_avx512vl_scattersiv4di;
42428 goto scatter_gen;
42429 case IX86_BUILTIN_SCATTERSIV2DI:
42430 icode = CODE_FOR_avx512vl_scattersiv2di;
42431 goto scatter_gen;
42432 case IX86_BUILTIN_SCATTERDIV8SI:
42433 icode = CODE_FOR_avx512vl_scatterdiv8si;
42434 goto scatter_gen;
42435 case IX86_BUILTIN_SCATTERDIV4SI:
42436 icode = CODE_FOR_avx512vl_scatterdiv4si;
42437 goto scatter_gen;
42438 case IX86_BUILTIN_SCATTERDIV4DI:
42439 icode = CODE_FOR_avx512vl_scatterdiv4di;
42440 goto scatter_gen;
42441 case IX86_BUILTIN_SCATTERDIV2DI:
42442 icode = CODE_FOR_avx512vl_scatterdiv2di;
42443 goto scatter_gen;
42444 case IX86_BUILTIN_GATHERPFDPD:
42445 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
42446 goto vec_prefetch_gen;
42447 case IX86_BUILTIN_SCATTERALTSIV8DF:
42448 icode = CODE_FOR_avx512f_scattersiv8df;
42449 goto scatter_gen;
42450 case IX86_BUILTIN_SCATTERALTDIV16SF:
42451 icode = CODE_FOR_avx512f_scatterdiv16sf;
42452 goto scatter_gen;
42453 case IX86_BUILTIN_SCATTERALTSIV8DI:
42454 icode = CODE_FOR_avx512f_scattersiv8di;
42455 goto scatter_gen;
42456 case IX86_BUILTIN_SCATTERALTDIV16SI:
42457 icode = CODE_FOR_avx512f_scatterdiv16si;
42458 goto scatter_gen;
42459 case IX86_BUILTIN_GATHERPFDPS:
42460 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
42461 goto vec_prefetch_gen;
42462 case IX86_BUILTIN_GATHERPFQPD:
42463 icode = CODE_FOR_avx512pf_gatherpfv8didf;
42464 goto vec_prefetch_gen;
42465 case IX86_BUILTIN_GATHERPFQPS:
42466 icode = CODE_FOR_avx512pf_gatherpfv8disf;
42467 goto vec_prefetch_gen;
42468 case IX86_BUILTIN_SCATTERPFDPD:
42469 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
42470 goto vec_prefetch_gen;
42471 case IX86_BUILTIN_SCATTERPFDPS:
42472 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
42473 goto vec_prefetch_gen;
42474 case IX86_BUILTIN_SCATTERPFQPD:
42475 icode = CODE_FOR_avx512pf_scatterpfv8didf;
42476 goto vec_prefetch_gen;
42477 case IX86_BUILTIN_SCATTERPFQPS:
42478 icode = CODE_FOR_avx512pf_scatterpfv8disf;
42479 goto vec_prefetch_gen;
42480
42481 gather_gen:
42482 rtx half;
42483 rtx (*gen) (rtx, rtx);
42484
42485 arg0 = CALL_EXPR_ARG (exp, 0);
42486 arg1 = CALL_EXPR_ARG (exp, 1);
42487 arg2 = CALL_EXPR_ARG (exp, 2);
42488 arg3 = CALL_EXPR_ARG (exp, 3);
42489 arg4 = CALL_EXPR_ARG (exp, 4);
42490 op0 = expand_normal (arg0);
42491 op1 = expand_normal (arg1);
42492 op2 = expand_normal (arg2);
42493 op3 = expand_normal (arg3);
42494 op4 = expand_normal (arg4);
42495 /* Note the arg order is different from the operand order. */
42496 mode0 = insn_data[icode].operand[1].mode;
42497 mode2 = insn_data[icode].operand[3].mode;
42498 mode3 = insn_data[icode].operand[4].mode;
42499 mode4 = insn_data[icode].operand[5].mode;
42500
42501 if (target == NULL_RTX
42502 || GET_MODE (target) != insn_data[icode].operand[0].mode
42503 || !insn_data[icode].operand[0].predicate (target,
42504 GET_MODE (target)))
42505 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
42506 else
42507 subtarget = target;
42508
42509 switch (fcode)
42510 {
42511 case IX86_BUILTIN_GATHER3ALTSIV8DF:
42512 case IX86_BUILTIN_GATHER3ALTSIV8DI:
42513 half = gen_reg_rtx (V8SImode);
42514 if (!nonimmediate_operand (op2, V16SImode))
42515 op2 = copy_to_mode_reg (V16SImode, op2);
42516 emit_insn (gen_vec_extract_lo_v16si (half, op2));
42517 op2 = half;
42518 break;
42519 case IX86_BUILTIN_GATHER3ALTSIV4DF:
42520 case IX86_BUILTIN_GATHER3ALTSIV4DI:
42521 case IX86_BUILTIN_GATHERALTSIV4DF:
42522 case IX86_BUILTIN_GATHERALTSIV4DI:
42523 half = gen_reg_rtx (V4SImode);
42524 if (!nonimmediate_operand (op2, V8SImode))
42525 op2 = copy_to_mode_reg (V8SImode, op2);
42526 emit_insn (gen_vec_extract_lo_v8si (half, op2));
42527 op2 = half;
42528 break;
42529 case IX86_BUILTIN_GATHER3ALTDIV16SF:
42530 case IX86_BUILTIN_GATHER3ALTDIV16SI:
42531 half = gen_reg_rtx (mode0);
42532 if (mode0 == V8SFmode)
42533 gen = gen_vec_extract_lo_v16sf;
42534 else
42535 gen = gen_vec_extract_lo_v16si;
42536 if (!nonimmediate_operand (op0, GET_MODE (op0)))
42537 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
42538 emit_insn (gen (half, op0));
42539 op0 = half;
42540 if (GET_MODE (op3) != VOIDmode)
42541 {
42542 if (!nonimmediate_operand (op3, GET_MODE (op3)))
42543 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
42544 emit_insn (gen (half, op3));
42545 op3 = half;
42546 }
42547 break;
42548 case IX86_BUILTIN_GATHER3ALTDIV8SF:
42549 case IX86_BUILTIN_GATHER3ALTDIV8SI:
42550 case IX86_BUILTIN_GATHERALTDIV8SF:
42551 case IX86_BUILTIN_GATHERALTDIV8SI:
42552 half = gen_reg_rtx (mode0);
42553 if (mode0 == V4SFmode)
42554 gen = gen_vec_extract_lo_v8sf;
42555 else
42556 gen = gen_vec_extract_lo_v8si;
42557 if (!nonimmediate_operand (op0, GET_MODE (op0)))
42558 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
42559 emit_insn (gen (half, op0));
42560 op0 = half;
42561 if (GET_MODE (op3) != VOIDmode)
42562 {
42563 if (!nonimmediate_operand (op3, GET_MODE (op3)))
42564 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
42565 emit_insn (gen (half, op3));
42566 op3 = half;
42567 }
42568 break;
42569 default:
42570 break;
42571 }
42572
42573 /* Force memory operand only with base register here. But we
42574 don't want to do it on memory operand for other builtin
42575 functions. */
42576 op1 = ix86_zero_extend_to_Pmode (op1);
42577
42578 if (!insn_data[icode].operand[1].predicate (op0, mode0))
42579 op0 = copy_to_mode_reg (mode0, op0);
42580 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
42581 op1 = copy_to_mode_reg (Pmode, op1);
42582 if (!insn_data[icode].operand[3].predicate (op2, mode2))
42583 op2 = copy_to_mode_reg (mode2, op2);
42584
42585 op3 = fixup_modeless_constant (op3, mode3);
42586
42587 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
42588 {
42589 if (!insn_data[icode].operand[4].predicate (op3, mode3))
42590 op3 = copy_to_mode_reg (mode3, op3);
42591 }
42592 else
42593 {
42594 op3 = copy_to_reg (op3);
42595 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
42596 }
42597 if (!insn_data[icode].operand[5].predicate (op4, mode4))
42598 {
42599 error ("the last argument must be scale 1, 2, 4, 8");
42600 return const0_rtx;
42601 }
42602
42603 /* Optimize. If mask is known to have all high bits set,
42604 replace op0 with pc_rtx to signal that the instruction
42605 overwrites the whole destination and doesn't use its
42606 previous contents. */
42607 if (optimize)
42608 {
42609 if (TREE_CODE (arg3) == INTEGER_CST)
42610 {
42611 if (integer_all_onesp (arg3))
42612 op0 = pc_rtx;
42613 }
42614 else if (TREE_CODE (arg3) == VECTOR_CST)
42615 {
42616 unsigned int negative = 0;
42617 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
42618 {
42619 tree cst = VECTOR_CST_ELT (arg3, i);
42620 if (TREE_CODE (cst) == INTEGER_CST
42621 && tree_int_cst_sign_bit (cst))
42622 negative++;
42623 else if (TREE_CODE (cst) == REAL_CST
42624 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
42625 negative++;
42626 }
42627 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
42628 op0 = pc_rtx;
42629 }
42630 else if (TREE_CODE (arg3) == SSA_NAME
42631 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
42632 {
42633 /* Recognize also when mask is like:
42634 __v2df src = _mm_setzero_pd ();
42635 __v2df mask = _mm_cmpeq_pd (src, src);
42636 or
42637 __v8sf src = _mm256_setzero_ps ();
42638 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
42639 as that is a cheaper way to load all ones into
42640 a register than having to load a constant from
42641 memory. */
42642 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
42643 if (is_gimple_call (def_stmt))
42644 {
42645 tree fndecl = gimple_call_fndecl (def_stmt);
42646 if (fndecl
42647 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
42648 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
42649 {
42650 case IX86_BUILTIN_CMPPD:
42651 case IX86_BUILTIN_CMPPS:
42652 case IX86_BUILTIN_CMPPD256:
42653 case IX86_BUILTIN_CMPPS256:
42654 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
42655 break;
42656 /* FALLTHRU */
42657 case IX86_BUILTIN_CMPEQPD:
42658 case IX86_BUILTIN_CMPEQPS:
42659 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
42660 && initializer_zerop (gimple_call_arg (def_stmt,
42661 1)))
42662 op0 = pc_rtx;
42663 break;
42664 default:
42665 break;
42666 }
42667 }
42668 }
42669 }
42670
42671 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
42672 if (! pat)
42673 return const0_rtx;
42674 emit_insn (pat);
42675
42676 switch (fcode)
42677 {
42678 case IX86_BUILTIN_GATHER3DIV16SF:
42679 if (target == NULL_RTX)
42680 target = gen_reg_rtx (V8SFmode);
42681 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
42682 break;
42683 case IX86_BUILTIN_GATHER3DIV16SI:
42684 if (target == NULL_RTX)
42685 target = gen_reg_rtx (V8SImode);
42686 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
42687 break;
42688 case IX86_BUILTIN_GATHER3DIV8SF:
42689 case IX86_BUILTIN_GATHERDIV8SF:
42690 if (target == NULL_RTX)
42691 target = gen_reg_rtx (V4SFmode);
42692 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
42693 break;
42694 case IX86_BUILTIN_GATHER3DIV8SI:
42695 case IX86_BUILTIN_GATHERDIV8SI:
42696 if (target == NULL_RTX)
42697 target = gen_reg_rtx (V4SImode);
42698 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
42699 break;
42700 default:
42701 target = subtarget;
42702 break;
42703 }
42704 return target;
42705
42706 scatter_gen:
42707 arg0 = CALL_EXPR_ARG (exp, 0);
42708 arg1 = CALL_EXPR_ARG (exp, 1);
42709 arg2 = CALL_EXPR_ARG (exp, 2);
42710 arg3 = CALL_EXPR_ARG (exp, 3);
42711 arg4 = CALL_EXPR_ARG (exp, 4);
42712 op0 = expand_normal (arg0);
42713 op1 = expand_normal (arg1);
42714 op2 = expand_normal (arg2);
42715 op3 = expand_normal (arg3);
42716 op4 = expand_normal (arg4);
42717 mode1 = insn_data[icode].operand[1].mode;
42718 mode2 = insn_data[icode].operand[2].mode;
42719 mode3 = insn_data[icode].operand[3].mode;
42720 mode4 = insn_data[icode].operand[4].mode;
42721
42722 /* Scatter instruction stores operand op3 to memory with
42723 indices from op2 and scale from op4 under writemask op1.
42724 If index operand op2 has more elements then source operand
42725 op3 one need to use only its low half. And vice versa. */
42726 switch (fcode)
42727 {
42728 case IX86_BUILTIN_SCATTERALTSIV8DF:
42729 case IX86_BUILTIN_SCATTERALTSIV8DI:
42730 half = gen_reg_rtx (V8SImode);
42731 if (!nonimmediate_operand (op2, V16SImode))
42732 op2 = copy_to_mode_reg (V16SImode, op2);
42733 emit_insn (gen_vec_extract_lo_v16si (half, op2));
42734 op2 = half;
42735 break;
42736 case IX86_BUILTIN_SCATTERALTDIV16SF:
42737 case IX86_BUILTIN_SCATTERALTDIV16SI:
42738 half = gen_reg_rtx (mode3);
42739 if (mode3 == V8SFmode)
42740 gen = gen_vec_extract_lo_v16sf;
42741 else
42742 gen = gen_vec_extract_lo_v16si;
42743 if (!nonimmediate_operand (op3, GET_MODE (op3)))
42744 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
42745 emit_insn (gen (half, op3));
42746 op3 = half;
42747 break;
42748 default:
42749 break;
42750 }
42751
42752 /* Force memory operand only with base register here. But we
42753 don't want to do it on memory operand for other builtin
42754 functions. */
42755 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
42756
42757 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
42758 op0 = copy_to_mode_reg (Pmode, op0);
42759
42760 op1 = fixup_modeless_constant (op1, mode1);
42761
42762 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
42763 {
42764 if (!insn_data[icode].operand[1].predicate (op1, mode1))
42765 op1 = copy_to_mode_reg (mode1, op1);
42766 }
42767 else
42768 {
42769 op1 = copy_to_reg (op1);
42770 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
42771 }
42772
42773 if (!insn_data[icode].operand[2].predicate (op2, mode2))
42774 op2 = copy_to_mode_reg (mode2, op2);
42775
42776 if (!insn_data[icode].operand[3].predicate (op3, mode3))
42777 op3 = copy_to_mode_reg (mode3, op3);
42778
42779 if (!insn_data[icode].operand[4].predicate (op4, mode4))
42780 {
42781 error ("the last argument must be scale 1, 2, 4, 8");
42782 return const0_rtx;
42783 }
42784
42785 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
42786 if (! pat)
42787 return const0_rtx;
42788
42789 emit_insn (pat);
42790 return 0;
42791
42792 vec_prefetch_gen:
42793 arg0 = CALL_EXPR_ARG (exp, 0);
42794 arg1 = CALL_EXPR_ARG (exp, 1);
42795 arg2 = CALL_EXPR_ARG (exp, 2);
42796 arg3 = CALL_EXPR_ARG (exp, 3);
42797 arg4 = CALL_EXPR_ARG (exp, 4);
42798 op0 = expand_normal (arg0);
42799 op1 = expand_normal (arg1);
42800 op2 = expand_normal (arg2);
42801 op3 = expand_normal (arg3);
42802 op4 = expand_normal (arg4);
42803 mode0 = insn_data[icode].operand[0].mode;
42804 mode1 = insn_data[icode].operand[1].mode;
42805 mode3 = insn_data[icode].operand[3].mode;
42806 mode4 = insn_data[icode].operand[4].mode;
42807
42808 op0 = fixup_modeless_constant (op0, mode0);
42809
42810 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
42811 {
42812 if (!insn_data[icode].operand[0].predicate (op0, mode0))
42813 op0 = copy_to_mode_reg (mode0, op0);
42814 }
42815 else
42816 {
42817 op0 = copy_to_reg (op0);
42818 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
42819 }
42820
42821 if (!insn_data[icode].operand[1].predicate (op1, mode1))
42822 op1 = copy_to_mode_reg (mode1, op1);
42823
42824 /* Force memory operand only with base register here. But we
42825 don't want to do it on memory operand for other builtin
42826 functions. */
42827 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
42828
42829 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
42830 op2 = copy_to_mode_reg (Pmode, op2);
42831
42832 if (!insn_data[icode].operand[3].predicate (op3, mode3))
42833 {
42834 error ("the forth argument must be scale 1, 2, 4, 8");
42835 return const0_rtx;
42836 }
42837
42838 if (!insn_data[icode].operand[4].predicate (op4, mode4))
42839 {
42840 error ("incorrect hint operand");
42841 return const0_rtx;
42842 }
42843
42844 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
42845 if (! pat)
42846 return const0_rtx;
42847
42848 emit_insn (pat);
42849
42850 return 0;
42851
42852 case IX86_BUILTIN_XABORT:
42853 icode = CODE_FOR_xabort;
42854 arg0 = CALL_EXPR_ARG (exp, 0);
42855 op0 = expand_normal (arg0);
42856 mode0 = insn_data[icode].operand[0].mode;
42857 if (!insn_data[icode].operand[0].predicate (op0, mode0))
42858 {
42859 error ("the xabort's argument must be an 8-bit immediate");
42860 return const0_rtx;
42861 }
42862 emit_insn (gen_xabort (op0));
42863 return 0;
42864
42865 default:
42866 break;
42867 }
42868
42869 for (i = 0, d = bdesc_special_args;
42870 i < ARRAY_SIZE (bdesc_special_args);
42871 i++, d++)
42872 if (d->code == fcode)
42873 return ix86_expand_special_args_builtin (d, exp, target);
42874
42875 for (i = 0, d = bdesc_args;
42876 i < ARRAY_SIZE (bdesc_args);
42877 i++, d++)
42878 if (d->code == fcode)
42879 switch (fcode)
42880 {
42881 case IX86_BUILTIN_FABSQ:
42882 case IX86_BUILTIN_COPYSIGNQ:
42883 if (!TARGET_SSE)
42884 /* Emit a normal call if SSE isn't available. */
42885 return expand_call (exp, target, ignore);
42886 /* FALLTHRU */
42887 default:
42888 return ix86_expand_args_builtin (d, exp, target);
42889 }
42890
42891 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
42892 if (d->code == fcode)
42893 return ix86_expand_sse_comi (d, exp, target);
42894
42895 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
42896 if (d->code == fcode)
42897 return ix86_expand_round_builtin (d, exp, target);
42898
42899 for (i = 0, d = bdesc_pcmpestr;
42900 i < ARRAY_SIZE (bdesc_pcmpestr);
42901 i++, d++)
42902 if (d->code == fcode)
42903 return ix86_expand_sse_pcmpestr (d, exp, target);
42904
42905 for (i = 0, d = bdesc_pcmpistr;
42906 i < ARRAY_SIZE (bdesc_pcmpistr);
42907 i++, d++)
42908 if (d->code == fcode)
42909 return ix86_expand_sse_pcmpistr (d, exp, target);
42910
42911 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
42912 if (d->code == fcode)
42913 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
42914 (enum ix86_builtin_func_type)
42915 d->flag, d->comparison);
42916
42917 gcc_unreachable ();
42918 }
42919
42920 /* This returns the target-specific builtin with code CODE if
42921 current_function_decl has visibility on this builtin, which is checked
42922 using isa flags. Returns NULL_TREE otherwise. */
42923
42924 static tree ix86_get_builtin (enum ix86_builtins code)
42925 {
42926 struct cl_target_option *opts;
42927 tree target_tree = NULL_TREE;
42928
42929 /* Determine the isa flags of current_function_decl. */
42930
42931 if (current_function_decl)
42932 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
42933
42934 if (target_tree == NULL)
42935 target_tree = target_option_default_node;
42936
42937 opts = TREE_TARGET_OPTION (target_tree);
42938
42939 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
42940 return ix86_builtin_decl (code, true);
42941 else
42942 return NULL_TREE;
42943 }
42944
42945 /* Return function decl for target specific builtin
42946 for given MPX builtin passed i FCODE. */
42947 static tree
42948 ix86_builtin_mpx_function (unsigned fcode)
42949 {
42950 switch (fcode)
42951 {
42952 case BUILT_IN_CHKP_BNDMK:
42953 return ix86_builtins[IX86_BUILTIN_BNDMK];
42954
42955 case BUILT_IN_CHKP_BNDSTX:
42956 return ix86_builtins[IX86_BUILTIN_BNDSTX];
42957
42958 case BUILT_IN_CHKP_BNDLDX:
42959 return ix86_builtins[IX86_BUILTIN_BNDLDX];
42960
42961 case BUILT_IN_CHKP_BNDCL:
42962 return ix86_builtins[IX86_BUILTIN_BNDCL];
42963
42964 case BUILT_IN_CHKP_BNDCU:
42965 return ix86_builtins[IX86_BUILTIN_BNDCU];
42966
42967 case BUILT_IN_CHKP_BNDRET:
42968 return ix86_builtins[IX86_BUILTIN_BNDRET];
42969
42970 case BUILT_IN_CHKP_INTERSECT:
42971 return ix86_builtins[IX86_BUILTIN_BNDINT];
42972
42973 case BUILT_IN_CHKP_NARROW:
42974 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
42975
42976 case BUILT_IN_CHKP_SIZEOF:
42977 return ix86_builtins[IX86_BUILTIN_SIZEOF];
42978
42979 case BUILT_IN_CHKP_EXTRACT_LOWER:
42980 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
42981
42982 case BUILT_IN_CHKP_EXTRACT_UPPER:
42983 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
42984
42985 default:
42986 return NULL_TREE;
42987 }
42988
42989 gcc_unreachable ();
42990 }
42991
42992 /* Helper function for ix86_load_bounds and ix86_store_bounds.
42993
42994 Return an address to be used to load/store bounds for pointer
42995 passed in SLOT.
42996
42997 SLOT_NO is an integer constant holding number of a target
42998 dependent special slot to be used in case SLOT is not a memory.
42999
43000 SPECIAL_BASE is a pointer to be used as a base of fake address
43001 to access special slots in Bounds Table. SPECIAL_BASE[-1],
43002 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
43003
43004 static rtx
43005 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
43006 {
43007 rtx addr = NULL;
43008
43009 /* NULL slot means we pass bounds for pointer not passed to the
43010 function at all. Register slot means we pass pointer in a
43011 register. In both these cases bounds are passed via Bounds
43012 Table. Since we do not have actual pointer stored in memory,
43013 we have to use fake addresses to access Bounds Table. We
43014 start with (special_base - sizeof (void*)) and decrease this
43015 address by pointer size to get addresses for other slots. */
43016 if (!slot || REG_P (slot))
43017 {
43018 gcc_assert (CONST_INT_P (slot_no));
43019 addr = plus_constant (Pmode, special_base,
43020 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
43021 }
43022 /* If pointer is passed in a memory then its address is used to
43023 access Bounds Table. */
43024 else if (MEM_P (slot))
43025 {
43026 addr = XEXP (slot, 0);
43027 if (!register_operand (addr, Pmode))
43028 addr = copy_addr_to_reg (addr);
43029 }
43030 else
43031 gcc_unreachable ();
43032
43033 return addr;
43034 }
43035
43036 /* Expand pass uses this hook to load bounds for function parameter
43037 PTR passed in SLOT in case its bounds are not passed in a register.
43038
43039 If SLOT is a memory, then bounds are loaded as for regular pointer
43040 loaded from memory. PTR may be NULL in case SLOT is a memory.
43041 In such case value of PTR (if required) may be loaded from SLOT.
43042
43043 If SLOT is NULL or a register then SLOT_NO is an integer constant
43044 holding number of the target dependent special slot which should be
43045 used to obtain bounds.
43046
43047 Return loaded bounds. */
43048
43049 static rtx
43050 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
43051 {
43052 rtx reg = gen_reg_rtx (BNDmode);
43053 rtx addr;
43054
43055 /* Get address to be used to access Bounds Table. Special slots start
43056 at the location of return address of the current function. */
43057 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
43058
43059 /* Load pointer value from a memory if we don't have it. */
43060 if (!ptr)
43061 {
43062 gcc_assert (MEM_P (slot));
43063 ptr = copy_addr_to_reg (slot);
43064 }
43065
43066 if (!register_operand (ptr, Pmode))
43067 ptr = ix86_zero_extend_to_Pmode (ptr);
43068
43069 emit_insn (BNDmode == BND64mode
43070 ? gen_bnd64_ldx (reg, addr, ptr)
43071 : gen_bnd32_ldx (reg, addr, ptr));
43072
43073 return reg;
43074 }
43075
43076 /* Expand pass uses this hook to store BOUNDS for call argument PTR
43077 passed in SLOT in case BOUNDS are not passed in a register.
43078
43079 If SLOT is a memory, then BOUNDS are stored as for regular pointer
43080 stored in memory. PTR may be NULL in case SLOT is a memory.
43081 In such case value of PTR (if required) may be loaded from SLOT.
43082
43083 If SLOT is NULL or a register then SLOT_NO is an integer constant
43084 holding number of the target dependent special slot which should be
43085 used to store BOUNDS. */
43086
43087 static void
43088 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
43089 {
43090 rtx addr;
43091
43092 /* Get address to be used to access Bounds Table. Special slots start
43093 at the location of return address of a called function. */
43094 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
43095
43096 /* Load pointer value from a memory if we don't have it. */
43097 if (!ptr)
43098 {
43099 gcc_assert (MEM_P (slot));
43100 ptr = copy_addr_to_reg (slot);
43101 }
43102
43103 if (!register_operand (ptr, Pmode))
43104 ptr = ix86_zero_extend_to_Pmode (ptr);
43105
43106 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
43107 if (!register_operand (bounds, BNDmode))
43108 bounds = copy_to_mode_reg (BNDmode, bounds);
43109
43110 emit_insn (BNDmode == BND64mode
43111 ? gen_bnd64_stx (addr, ptr, bounds)
43112 : gen_bnd32_stx (addr, ptr, bounds));
43113 }
43114
43115 /* Load and return bounds returned by function in SLOT. */
43116
43117 static rtx
43118 ix86_load_returned_bounds (rtx slot)
43119 {
43120 rtx res;
43121
43122 gcc_assert (REG_P (slot));
43123 res = gen_reg_rtx (BNDmode);
43124 emit_move_insn (res, slot);
43125
43126 return res;
43127 }
43128
43129 /* Store BOUNDS returned by function into SLOT. */
43130
43131 static void
43132 ix86_store_returned_bounds (rtx slot, rtx bounds)
43133 {
43134 gcc_assert (REG_P (slot));
43135 emit_move_insn (slot, bounds);
43136 }
43137
43138 /* Returns a function decl for a vectorized version of the combined function
43139 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
43140 if it is not available. */
43141
43142 static tree
43143 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
43144 tree type_in)
43145 {
43146 machine_mode in_mode, out_mode;
43147 int in_n, out_n;
43148
43149 if (TREE_CODE (type_out) != VECTOR_TYPE
43150 || TREE_CODE (type_in) != VECTOR_TYPE)
43151 return NULL_TREE;
43152
43153 out_mode = TYPE_MODE (TREE_TYPE (type_out));
43154 out_n = TYPE_VECTOR_SUBPARTS (type_out);
43155 in_mode = TYPE_MODE (TREE_TYPE (type_in));
43156 in_n = TYPE_VECTOR_SUBPARTS (type_in);
43157
43158 switch (fn)
43159 {
43160 CASE_CFN_EXP2:
43161 if (out_mode == SFmode && in_mode == SFmode)
43162 {
43163 if (out_n == 16 && in_n == 16)
43164 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
43165 }
43166 break;
43167
43168 CASE_CFN_IFLOOR:
43169 CASE_CFN_LFLOOR:
43170 CASE_CFN_LLFLOOR:
43171 /* The round insn does not trap on denormals. */
43172 if (flag_trapping_math || !TARGET_ROUND)
43173 break;
43174
43175 if (out_mode == SImode && in_mode == DFmode)
43176 {
43177 if (out_n == 4 && in_n == 2)
43178 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
43179 else if (out_n == 8 && in_n == 4)
43180 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
43181 else if (out_n == 16 && in_n == 8)
43182 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
43183 }
43184 if (out_mode == SImode && in_mode == SFmode)
43185 {
43186 if (out_n == 4 && in_n == 4)
43187 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
43188 else if (out_n == 8 && in_n == 8)
43189 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
43190 else if (out_n == 16 && in_n == 16)
43191 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
43192 }
43193 break;
43194
43195 CASE_CFN_ICEIL:
43196 CASE_CFN_LCEIL:
43197 CASE_CFN_LLCEIL:
43198 /* The round insn does not trap on denormals. */
43199 if (flag_trapping_math || !TARGET_ROUND)
43200 break;
43201
43202 if (out_mode == SImode && in_mode == DFmode)
43203 {
43204 if (out_n == 4 && in_n == 2)
43205 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
43206 else if (out_n == 8 && in_n == 4)
43207 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
43208 else if (out_n == 16 && in_n == 8)
43209 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
43210 }
43211 if (out_mode == SImode && in_mode == SFmode)
43212 {
43213 if (out_n == 4 && in_n == 4)
43214 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
43215 else if (out_n == 8 && in_n == 8)
43216 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
43217 else if (out_n == 16 && in_n == 16)
43218 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
43219 }
43220 break;
43221
43222 CASE_CFN_IRINT:
43223 CASE_CFN_LRINT:
43224 CASE_CFN_LLRINT:
43225 if (out_mode == SImode && in_mode == DFmode)
43226 {
43227 if (out_n == 4 && in_n == 2)
43228 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
43229 else if (out_n == 8 && in_n == 4)
43230 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
43231 else if (out_n == 16 && in_n == 8)
43232 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
43233 }
43234 if (out_mode == SImode && in_mode == SFmode)
43235 {
43236 if (out_n == 4 && in_n == 4)
43237 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
43238 else if (out_n == 8 && in_n == 8)
43239 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
43240 else if (out_n == 16 && in_n == 16)
43241 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
43242 }
43243 break;
43244
43245 CASE_CFN_IROUND:
43246 CASE_CFN_LROUND:
43247 CASE_CFN_LLROUND:
43248 /* The round insn does not trap on denormals. */
43249 if (flag_trapping_math || !TARGET_ROUND)
43250 break;
43251
43252 if (out_mode == SImode && in_mode == DFmode)
43253 {
43254 if (out_n == 4 && in_n == 2)
43255 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
43256 else if (out_n == 8 && in_n == 4)
43257 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
43258 else if (out_n == 16 && in_n == 8)
43259 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
43260 }
43261 if (out_mode == SImode && in_mode == SFmode)
43262 {
43263 if (out_n == 4 && in_n == 4)
43264 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
43265 else if (out_n == 8 && in_n == 8)
43266 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
43267 else if (out_n == 16 && in_n == 16)
43268 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
43269 }
43270 break;
43271
43272 CASE_CFN_FLOOR:
43273 /* The round insn does not trap on denormals. */
43274 if (flag_trapping_math || !TARGET_ROUND)
43275 break;
43276
43277 if (out_mode == DFmode && in_mode == DFmode)
43278 {
43279 if (out_n == 2 && in_n == 2)
43280 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
43281 else if (out_n == 4 && in_n == 4)
43282 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
43283 else if (out_n == 8 && in_n == 8)
43284 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
43285 }
43286 if (out_mode == SFmode && in_mode == SFmode)
43287 {
43288 if (out_n == 4 && in_n == 4)
43289 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
43290 else if (out_n == 8 && in_n == 8)
43291 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
43292 else if (out_n == 16 && in_n == 16)
43293 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
43294 }
43295 break;
43296
43297 CASE_CFN_CEIL:
43298 /* The round insn does not trap on denormals. */
43299 if (flag_trapping_math || !TARGET_ROUND)
43300 break;
43301
43302 if (out_mode == DFmode && in_mode == DFmode)
43303 {
43304 if (out_n == 2 && in_n == 2)
43305 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
43306 else if (out_n == 4 && in_n == 4)
43307 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
43308 else if (out_n == 8 && in_n == 8)
43309 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
43310 }
43311 if (out_mode == SFmode && in_mode == SFmode)
43312 {
43313 if (out_n == 4 && in_n == 4)
43314 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
43315 else if (out_n == 8 && in_n == 8)
43316 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
43317 else if (out_n == 16 && in_n == 16)
43318 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
43319 }
43320 break;
43321
43322 CASE_CFN_TRUNC:
43323 /* The round insn does not trap on denormals. */
43324 if (flag_trapping_math || !TARGET_ROUND)
43325 break;
43326
43327 if (out_mode == DFmode && in_mode == DFmode)
43328 {
43329 if (out_n == 2 && in_n == 2)
43330 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
43331 else if (out_n == 4 && in_n == 4)
43332 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
43333 else if (out_n == 8 && in_n == 8)
43334 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
43335 }
43336 if (out_mode == SFmode && in_mode == SFmode)
43337 {
43338 if (out_n == 4 && in_n == 4)
43339 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
43340 else if (out_n == 8 && in_n == 8)
43341 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
43342 else if (out_n == 16 && in_n == 16)
43343 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
43344 }
43345 break;
43346
43347 CASE_CFN_RINT:
43348 /* The round insn does not trap on denormals. */
43349 if (flag_trapping_math || !TARGET_ROUND)
43350 break;
43351
43352 if (out_mode == DFmode && in_mode == DFmode)
43353 {
43354 if (out_n == 2 && in_n == 2)
43355 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
43356 else if (out_n == 4 && in_n == 4)
43357 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
43358 }
43359 if (out_mode == SFmode && in_mode == SFmode)
43360 {
43361 if (out_n == 4 && in_n == 4)
43362 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
43363 else if (out_n == 8 && in_n == 8)
43364 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
43365 }
43366 break;
43367
43368 CASE_CFN_FMA:
43369 if (out_mode == DFmode && in_mode == DFmode)
43370 {
43371 if (out_n == 2 && in_n == 2)
43372 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
43373 if (out_n == 4 && in_n == 4)
43374 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
43375 }
43376 if (out_mode == SFmode && in_mode == SFmode)
43377 {
43378 if (out_n == 4 && in_n == 4)
43379 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
43380 if (out_n == 8 && in_n == 8)
43381 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
43382 }
43383 break;
43384
43385 default:
43386 break;
43387 }
43388
43389 /* Dispatch to a handler for a vectorization library. */
43390 if (ix86_veclib_handler)
43391 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
43392
43393 return NULL_TREE;
43394 }
43395
43396 /* Handler for an SVML-style interface to
43397 a library with vectorized intrinsics. */
43398
43399 static tree
43400 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
43401 {
43402 char name[20];
43403 tree fntype, new_fndecl, args;
43404 unsigned arity;
43405 const char *bname;
43406 machine_mode el_mode, in_mode;
43407 int n, in_n;
43408
43409 /* The SVML is suitable for unsafe math only. */
43410 if (!flag_unsafe_math_optimizations)
43411 return NULL_TREE;
43412
43413 el_mode = TYPE_MODE (TREE_TYPE (type_out));
43414 n = TYPE_VECTOR_SUBPARTS (type_out);
43415 in_mode = TYPE_MODE (TREE_TYPE (type_in));
43416 in_n = TYPE_VECTOR_SUBPARTS (type_in);
43417 if (el_mode != in_mode
43418 || n != in_n)
43419 return NULL_TREE;
43420
43421 switch (fn)
43422 {
43423 CASE_CFN_EXP:
43424 CASE_CFN_LOG:
43425 CASE_CFN_LOG10:
43426 CASE_CFN_POW:
43427 CASE_CFN_TANH:
43428 CASE_CFN_TAN:
43429 CASE_CFN_ATAN:
43430 CASE_CFN_ATAN2:
43431 CASE_CFN_ATANH:
43432 CASE_CFN_CBRT:
43433 CASE_CFN_SINH:
43434 CASE_CFN_SIN:
43435 CASE_CFN_ASINH:
43436 CASE_CFN_ASIN:
43437 CASE_CFN_COSH:
43438 CASE_CFN_COS:
43439 CASE_CFN_ACOSH:
43440 CASE_CFN_ACOS:
43441 if ((el_mode != DFmode || n != 2)
43442 && (el_mode != SFmode || n != 4))
43443 return NULL_TREE;
43444 break;
43445
43446 default:
43447 return NULL_TREE;
43448 }
43449
43450 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
43451 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
43452
43453 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
43454 strcpy (name, "vmlsLn4");
43455 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
43456 strcpy (name, "vmldLn2");
43457 else if (n == 4)
43458 {
43459 sprintf (name, "vmls%s", bname+10);
43460 name[strlen (name)-1] = '4';
43461 }
43462 else
43463 sprintf (name, "vmld%s2", bname+10);
43464
43465 /* Convert to uppercase. */
43466 name[4] &= ~0x20;
43467
43468 arity = 0;
43469 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
43470 arity++;
43471
43472 if (arity == 1)
43473 fntype = build_function_type_list (type_out, type_in, NULL);
43474 else
43475 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
43476
43477 /* Build a function declaration for the vectorized function. */
43478 new_fndecl = build_decl (BUILTINS_LOCATION,
43479 FUNCTION_DECL, get_identifier (name), fntype);
43480 TREE_PUBLIC (new_fndecl) = 1;
43481 DECL_EXTERNAL (new_fndecl) = 1;
43482 DECL_IS_NOVOPS (new_fndecl) = 1;
43483 TREE_READONLY (new_fndecl) = 1;
43484
43485 return new_fndecl;
43486 }
43487
43488 /* Handler for an ACML-style interface to
43489 a library with vectorized intrinsics. */
43490
43491 static tree
43492 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
43493 {
43494 char name[20] = "__vr.._";
43495 tree fntype, new_fndecl, args;
43496 unsigned arity;
43497 const char *bname;
43498 machine_mode el_mode, in_mode;
43499 int n, in_n;
43500
43501 /* The ACML is 64bits only and suitable for unsafe math only as
43502 it does not correctly support parts of IEEE with the required
43503 precision such as denormals. */
43504 if (!TARGET_64BIT
43505 || !flag_unsafe_math_optimizations)
43506 return NULL_TREE;
43507
43508 el_mode = TYPE_MODE (TREE_TYPE (type_out));
43509 n = TYPE_VECTOR_SUBPARTS (type_out);
43510 in_mode = TYPE_MODE (TREE_TYPE (type_in));
43511 in_n = TYPE_VECTOR_SUBPARTS (type_in);
43512 if (el_mode != in_mode
43513 || n != in_n)
43514 return NULL_TREE;
43515
43516 switch (fn)
43517 {
43518 CASE_CFN_SIN:
43519 CASE_CFN_COS:
43520 CASE_CFN_EXP:
43521 CASE_CFN_LOG:
43522 CASE_CFN_LOG2:
43523 CASE_CFN_LOG10:
43524 if (el_mode == DFmode && n == 2)
43525 {
43526 name[4] = 'd';
43527 name[5] = '2';
43528 }
43529 else if (el_mode == SFmode && n == 4)
43530 {
43531 name[4] = 's';
43532 name[5] = '4';
43533 }
43534 else
43535 return NULL_TREE;
43536 break;
43537
43538 default:
43539 return NULL_TREE;
43540 }
43541
43542 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
43543 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
43544 sprintf (name + 7, "%s", bname+10);
43545
43546 arity = 0;
43547 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
43548 arity++;
43549
43550 if (arity == 1)
43551 fntype = build_function_type_list (type_out, type_in, NULL);
43552 else
43553 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
43554
43555 /* Build a function declaration for the vectorized function. */
43556 new_fndecl = build_decl (BUILTINS_LOCATION,
43557 FUNCTION_DECL, get_identifier (name), fntype);
43558 TREE_PUBLIC (new_fndecl) = 1;
43559 DECL_EXTERNAL (new_fndecl) = 1;
43560 DECL_IS_NOVOPS (new_fndecl) = 1;
43561 TREE_READONLY (new_fndecl) = 1;
43562
43563 return new_fndecl;
43564 }
43565
43566 /* Returns a decl of a function that implements gather load with
43567 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
43568 Return NULL_TREE if it is not available. */
43569
43570 static tree
43571 ix86_vectorize_builtin_gather (const_tree mem_vectype,
43572 const_tree index_type, int scale)
43573 {
43574 bool si;
43575 enum ix86_builtins code;
43576
43577 if (! TARGET_AVX2)
43578 return NULL_TREE;
43579
43580 if ((TREE_CODE (index_type) != INTEGER_TYPE
43581 && !POINTER_TYPE_P (index_type))
43582 || (TYPE_MODE (index_type) != SImode
43583 && TYPE_MODE (index_type) != DImode))
43584 return NULL_TREE;
43585
43586 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
43587 return NULL_TREE;
43588
43589 /* v*gather* insn sign extends index to pointer mode. */
43590 if (TYPE_PRECISION (index_type) < POINTER_SIZE
43591 && TYPE_UNSIGNED (index_type))
43592 return NULL_TREE;
43593
43594 if (scale <= 0
43595 || scale > 8
43596 || (scale & (scale - 1)) != 0)
43597 return NULL_TREE;
43598
43599 si = TYPE_MODE (index_type) == SImode;
43600 switch (TYPE_MODE (mem_vectype))
43601 {
43602 case V2DFmode:
43603 if (TARGET_AVX512VL)
43604 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
43605 else
43606 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
43607 break;
43608 case V4DFmode:
43609 if (TARGET_AVX512VL)
43610 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
43611 else
43612 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
43613 break;
43614 case V2DImode:
43615 if (TARGET_AVX512VL)
43616 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
43617 else
43618 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
43619 break;
43620 case V4DImode:
43621 if (TARGET_AVX512VL)
43622 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
43623 else
43624 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
43625 break;
43626 case V4SFmode:
43627 if (TARGET_AVX512VL)
43628 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
43629 else
43630 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
43631 break;
43632 case V8SFmode:
43633 if (TARGET_AVX512VL)
43634 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
43635 else
43636 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
43637 break;
43638 case V4SImode:
43639 if (TARGET_AVX512VL)
43640 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
43641 else
43642 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
43643 break;
43644 case V8SImode:
43645 if (TARGET_AVX512VL)
43646 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
43647 else
43648 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
43649 break;
43650 case V8DFmode:
43651 if (TARGET_AVX512F)
43652 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
43653 else
43654 return NULL_TREE;
43655 break;
43656 case V8DImode:
43657 if (TARGET_AVX512F)
43658 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
43659 else
43660 return NULL_TREE;
43661 break;
43662 case V16SFmode:
43663 if (TARGET_AVX512F)
43664 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
43665 else
43666 return NULL_TREE;
43667 break;
43668 case V16SImode:
43669 if (TARGET_AVX512F)
43670 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
43671 else
43672 return NULL_TREE;
43673 break;
43674 default:
43675 return NULL_TREE;
43676 }
43677
43678 return ix86_get_builtin (code);
43679 }
43680
43681 /* Returns a decl of a function that implements scatter store with
43682 register type VECTYPE and index type INDEX_TYPE and SCALE.
43683 Return NULL_TREE if it is not available. */
43684
43685 static tree
43686 ix86_vectorize_builtin_scatter (const_tree vectype,
43687 const_tree index_type, int scale)
43688 {
43689 bool si;
43690 enum ix86_builtins code;
43691
43692 if (!TARGET_AVX512F)
43693 return NULL_TREE;
43694
43695 if ((TREE_CODE (index_type) != INTEGER_TYPE
43696 && !POINTER_TYPE_P (index_type))
43697 || (TYPE_MODE (index_type) != SImode
43698 && TYPE_MODE (index_type) != DImode))
43699 return NULL_TREE;
43700
43701 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
43702 return NULL_TREE;
43703
43704 /* v*scatter* insn sign extends index to pointer mode. */
43705 if (TYPE_PRECISION (index_type) < POINTER_SIZE
43706 && TYPE_UNSIGNED (index_type))
43707 return NULL_TREE;
43708
43709 /* Scale can be 1, 2, 4 or 8. */
43710 if (scale <= 0
43711 || scale > 8
43712 || (scale & (scale - 1)) != 0)
43713 return NULL_TREE;
43714
43715 si = TYPE_MODE (index_type) == SImode;
43716 switch (TYPE_MODE (vectype))
43717 {
43718 case V8DFmode:
43719 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
43720 break;
43721 case V8DImode:
43722 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
43723 break;
43724 case V16SFmode:
43725 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
43726 break;
43727 case V16SImode:
43728 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
43729 break;
43730 default:
43731 return NULL_TREE;
43732 }
43733
43734 return ix86_builtins[code];
43735 }
43736
43737 /* Return true if it is safe to use the rsqrt optabs to optimize
43738 1.0/sqrt. */
43739
43740 static bool
43741 use_rsqrt_p ()
43742 {
43743 return (TARGET_SSE_MATH
43744 && flag_finite_math_only
43745 && !flag_trapping_math
43746 && flag_unsafe_math_optimizations);
43747 }
43748
43749 /* Returns a code for a target-specific builtin that implements
43750 reciprocal of the function, or NULL_TREE if not available. */
43751
43752 static tree
43753 ix86_builtin_reciprocal (tree fndecl)
43754 {
43755 switch (DECL_FUNCTION_CODE (fndecl))
43756 {
43757 /* Vectorized version of sqrt to rsqrt conversion. */
43758 case IX86_BUILTIN_SQRTPS_NR:
43759 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
43760
43761 case IX86_BUILTIN_SQRTPS_NR256:
43762 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
43763
43764 default:
43765 return NULL_TREE;
43766 }
43767 }
43768 \f
43769 /* Helper for avx_vpermilps256_operand et al. This is also used by
43770 the expansion functions to turn the parallel back into a mask.
43771 The return value is 0 for no match and the imm8+1 for a match. */
43772
43773 int
43774 avx_vpermilp_parallel (rtx par, machine_mode mode)
43775 {
43776 unsigned i, nelt = GET_MODE_NUNITS (mode);
43777 unsigned mask = 0;
43778 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
43779
43780 if (XVECLEN (par, 0) != (int) nelt)
43781 return 0;
43782
43783 /* Validate that all of the elements are constants, and not totally
43784 out of range. Copy the data into an integral array to make the
43785 subsequent checks easier. */
43786 for (i = 0; i < nelt; ++i)
43787 {
43788 rtx er = XVECEXP (par, 0, i);
43789 unsigned HOST_WIDE_INT ei;
43790
43791 if (!CONST_INT_P (er))
43792 return 0;
43793 ei = INTVAL (er);
43794 if (ei >= nelt)
43795 return 0;
43796 ipar[i] = ei;
43797 }
43798
43799 switch (mode)
43800 {
43801 case V8DFmode:
43802 /* In the 512-bit DFmode case, we can only move elements within
43803 a 128-bit lane. First fill the second part of the mask,
43804 then fallthru. */
43805 for (i = 4; i < 6; ++i)
43806 {
43807 if (ipar[i] < 4 || ipar[i] >= 6)
43808 return 0;
43809 mask |= (ipar[i] - 4) << i;
43810 }
43811 for (i = 6; i < 8; ++i)
43812 {
43813 if (ipar[i] < 6)
43814 return 0;
43815 mask |= (ipar[i] - 6) << i;
43816 }
43817 /* FALLTHRU */
43818
43819 case V4DFmode:
43820 /* In the 256-bit DFmode case, we can only move elements within
43821 a 128-bit lane. */
43822 for (i = 0; i < 2; ++i)
43823 {
43824 if (ipar[i] >= 2)
43825 return 0;
43826 mask |= ipar[i] << i;
43827 }
43828 for (i = 2; i < 4; ++i)
43829 {
43830 if (ipar[i] < 2)
43831 return 0;
43832 mask |= (ipar[i] - 2) << i;
43833 }
43834 break;
43835
43836 case V16SFmode:
43837 /* In 512 bit SFmode case, permutation in the upper 256 bits
43838 must mirror the permutation in the lower 256-bits. */
43839 for (i = 0; i < 8; ++i)
43840 if (ipar[i] + 8 != ipar[i + 8])
43841 return 0;
43842 /* FALLTHRU */
43843
43844 case V8SFmode:
43845 /* In 256 bit SFmode case, we have full freedom of
43846 movement within the low 128-bit lane, but the high 128-bit
43847 lane must mirror the exact same pattern. */
43848 for (i = 0; i < 4; ++i)
43849 if (ipar[i] + 4 != ipar[i + 4])
43850 return 0;
43851 nelt = 4;
43852 /* FALLTHRU */
43853
43854 case V2DFmode:
43855 case V4SFmode:
43856 /* In the 128-bit case, we've full freedom in the placement of
43857 the elements from the source operand. */
43858 for (i = 0; i < nelt; ++i)
43859 mask |= ipar[i] << (i * (nelt / 2));
43860 break;
43861
43862 default:
43863 gcc_unreachable ();
43864 }
43865
43866 /* Make sure success has a non-zero value by adding one. */
43867 return mask + 1;
43868 }
43869
43870 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
43871 the expansion functions to turn the parallel back into a mask.
43872 The return value is 0 for no match and the imm8+1 for a match. */
43873
43874 int
43875 avx_vperm2f128_parallel (rtx par, machine_mode mode)
43876 {
43877 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
43878 unsigned mask = 0;
43879 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
43880
43881 if (XVECLEN (par, 0) != (int) nelt)
43882 return 0;
43883
43884 /* Validate that all of the elements are constants, and not totally
43885 out of range. Copy the data into an integral array to make the
43886 subsequent checks easier. */
43887 for (i = 0; i < nelt; ++i)
43888 {
43889 rtx er = XVECEXP (par, 0, i);
43890 unsigned HOST_WIDE_INT ei;
43891
43892 if (!CONST_INT_P (er))
43893 return 0;
43894 ei = INTVAL (er);
43895 if (ei >= 2 * nelt)
43896 return 0;
43897 ipar[i] = ei;
43898 }
43899
43900 /* Validate that the halves of the permute are halves. */
43901 for (i = 0; i < nelt2 - 1; ++i)
43902 if (ipar[i] + 1 != ipar[i + 1])
43903 return 0;
43904 for (i = nelt2; i < nelt - 1; ++i)
43905 if (ipar[i] + 1 != ipar[i + 1])
43906 return 0;
43907
43908 /* Reconstruct the mask. */
43909 for (i = 0; i < 2; ++i)
43910 {
43911 unsigned e = ipar[i * nelt2];
43912 if (e % nelt2)
43913 return 0;
43914 e /= nelt2;
43915 mask |= e << (i * 4);
43916 }
43917
43918 /* Make sure success has a non-zero value by adding one. */
43919 return mask + 1;
43920 }
43921 \f
43922 /* Return a register priority for hard reg REGNO. */
43923 static int
43924 ix86_register_priority (int hard_regno)
43925 {
43926 /* ebp and r13 as the base always wants a displacement, r12 as the
43927 base always wants an index. So discourage their usage in an
43928 address. */
43929 if (hard_regno == R12_REG || hard_regno == R13_REG)
43930 return 0;
43931 if (hard_regno == BP_REG)
43932 return 1;
43933 /* New x86-64 int registers result in bigger code size. Discourage
43934 them. */
43935 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
43936 return 2;
43937 /* New x86-64 SSE registers result in bigger code size. Discourage
43938 them. */
43939 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
43940 return 2;
43941 /* Usage of AX register results in smaller code. Prefer it. */
43942 if (hard_regno == AX_REG)
43943 return 4;
43944 return 3;
43945 }
43946
43947 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
43948
43949 Put float CONST_DOUBLE in the constant pool instead of fp regs.
43950 QImode must go into class Q_REGS.
43951 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
43952 movdf to do mem-to-mem moves through integer regs. */
43953
43954 static reg_class_t
43955 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
43956 {
43957 machine_mode mode = GET_MODE (x);
43958
43959 /* We're only allowed to return a subclass of CLASS. Many of the
43960 following checks fail for NO_REGS, so eliminate that early. */
43961 if (regclass == NO_REGS)
43962 return NO_REGS;
43963
43964 /* All classes can load zeros. */
43965 if (x == CONST0_RTX (mode))
43966 return regclass;
43967
43968 /* Force constants into memory if we are loading a (nonzero) constant into
43969 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
43970 instructions to load from a constant. */
43971 if (CONSTANT_P (x)
43972 && (MAYBE_MMX_CLASS_P (regclass)
43973 || MAYBE_SSE_CLASS_P (regclass)
43974 || MAYBE_MASK_CLASS_P (regclass)))
43975 return NO_REGS;
43976
43977 /* Floating-point constants need more complex checks. */
43978 if (CONST_DOUBLE_P (x))
43979 {
43980 /* General regs can load everything. */
43981 if (INTEGER_CLASS_P (regclass))
43982 return regclass;
43983
43984 /* Floats can load 0 and 1 plus some others. Note that we eliminated
43985 zero above. We only want to wind up preferring 80387 registers if
43986 we plan on doing computation with them. */
43987 if (IS_STACK_MODE (mode)
43988 && standard_80387_constant_p (x) > 0)
43989 {
43990 /* Limit class to FP regs. */
43991 if (FLOAT_CLASS_P (regclass))
43992 return FLOAT_REGS;
43993 else if (regclass == FP_TOP_SSE_REGS)
43994 return FP_TOP_REG;
43995 else if (regclass == FP_SECOND_SSE_REGS)
43996 return FP_SECOND_REG;
43997 }
43998
43999 return NO_REGS;
44000 }
44001
44002 /* Prefer SSE regs only, if we can use them for math. */
44003 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44004 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
44005
44006 /* Generally when we see PLUS here, it's the function invariant
44007 (plus soft-fp const_int). Which can only be computed into general
44008 regs. */
44009 if (GET_CODE (x) == PLUS)
44010 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
44011
44012 /* QImode constants are easy to load, but non-constant QImode data
44013 must go into Q_REGS. */
44014 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
44015 {
44016 if (Q_CLASS_P (regclass))
44017 return regclass;
44018 else if (reg_class_subset_p (Q_REGS, regclass))
44019 return Q_REGS;
44020 else
44021 return NO_REGS;
44022 }
44023
44024 return regclass;
44025 }
44026
44027 /* Discourage putting floating-point values in SSE registers unless
44028 SSE math is being used, and likewise for the 387 registers. */
44029 static reg_class_t
44030 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
44031 {
44032 machine_mode mode = GET_MODE (x);
44033
44034 /* Restrict the output reload class to the register bank that we are doing
44035 math on. If we would like not to return a subset of CLASS, reject this
44036 alternative: if reload cannot do this, it will still use its choice. */
44037 mode = GET_MODE (x);
44038 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44039 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
44040
44041 if (IS_STACK_MODE (mode))
44042 {
44043 if (regclass == FP_TOP_SSE_REGS)
44044 return FP_TOP_REG;
44045 else if (regclass == FP_SECOND_SSE_REGS)
44046 return FP_SECOND_REG;
44047 else
44048 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
44049 }
44050
44051 return regclass;
44052 }
44053
44054 static reg_class_t
44055 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
44056 machine_mode mode, secondary_reload_info *sri)
44057 {
44058 /* Double-word spills from general registers to non-offsettable memory
44059 references (zero-extended addresses) require special handling. */
44060 if (TARGET_64BIT
44061 && MEM_P (x)
44062 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
44063 && INTEGER_CLASS_P (rclass)
44064 && !offsettable_memref_p (x))
44065 {
44066 sri->icode = (in_p
44067 ? CODE_FOR_reload_noff_load
44068 : CODE_FOR_reload_noff_store);
44069 /* Add the cost of moving address to a temporary. */
44070 sri->extra_cost = 1;
44071
44072 return NO_REGS;
44073 }
44074
44075 /* QImode spills from non-QI registers require
44076 intermediate register on 32bit targets. */
44077 if (mode == QImode
44078 && (MAYBE_MASK_CLASS_P (rclass)
44079 || (!TARGET_64BIT && !in_p
44080 && INTEGER_CLASS_P (rclass)
44081 && MAYBE_NON_Q_CLASS_P (rclass))))
44082 {
44083 int regno;
44084
44085 if (REG_P (x))
44086 regno = REGNO (x);
44087 else
44088 regno = -1;
44089
44090 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
44091 regno = true_regnum (x);
44092
44093 /* Return Q_REGS if the operand is in memory. */
44094 if (regno == -1)
44095 return Q_REGS;
44096 }
44097
44098 /* This condition handles corner case where an expression involving
44099 pointers gets vectorized. We're trying to use the address of a
44100 stack slot as a vector initializer.
44101
44102 (set (reg:V2DI 74 [ vect_cst_.2 ])
44103 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
44104
44105 Eventually frame gets turned into sp+offset like this:
44106
44107 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
44108 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
44109 (const_int 392 [0x188]))))
44110
44111 That later gets turned into:
44112
44113 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
44114 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
44115 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
44116
44117 We'll have the following reload recorded:
44118
44119 Reload 0: reload_in (DI) =
44120 (plus:DI (reg/f:DI 7 sp)
44121 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
44122 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
44123 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
44124 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
44125 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
44126 reload_reg_rtx: (reg:V2DI 22 xmm1)
44127
44128 Which isn't going to work since SSE instructions can't handle scalar
44129 additions. Returning GENERAL_REGS forces the addition into integer
44130 register and reload can handle subsequent reloads without problems. */
44131
44132 if (in_p && GET_CODE (x) == PLUS
44133 && SSE_CLASS_P (rclass)
44134 && SCALAR_INT_MODE_P (mode))
44135 return GENERAL_REGS;
44136
44137 return NO_REGS;
44138 }
44139
44140 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
44141
44142 static bool
44143 ix86_class_likely_spilled_p (reg_class_t rclass)
44144 {
44145 switch (rclass)
44146 {
44147 case AREG:
44148 case DREG:
44149 case CREG:
44150 case BREG:
44151 case AD_REGS:
44152 case SIREG:
44153 case DIREG:
44154 case SSE_FIRST_REG:
44155 case FP_TOP_REG:
44156 case FP_SECOND_REG:
44157 case BND_REGS:
44158 return true;
44159
44160 default:
44161 break;
44162 }
44163
44164 return false;
44165 }
44166
44167 /* If we are copying between general and FP registers, we need a memory
44168 location. The same is true for SSE and MMX registers.
44169
44170 To optimize register_move_cost performance, allow inline variant.
44171
44172 The macro can't work reliably when one of the CLASSES is class containing
44173 registers from multiple units (SSE, MMX, integer). We avoid this by never
44174 combining those units in single alternative in the machine description.
44175 Ensure that this constraint holds to avoid unexpected surprises.
44176
44177 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
44178 enforce these sanity checks. */
44179
44180 static inline bool
44181 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
44182 machine_mode mode, int strict)
44183 {
44184 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
44185 return false;
44186 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
44187 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
44188 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
44189 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
44190 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
44191 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
44192 {
44193 gcc_assert (!strict || lra_in_progress);
44194 return true;
44195 }
44196
44197 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
44198 return true;
44199
44200 /* Between mask and general, we have moves no larger than word size. */
44201 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
44202 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
44203 return true;
44204
44205 /* ??? This is a lie. We do have moves between mmx/general, and for
44206 mmx/sse2. But by saying we need secondary memory we discourage the
44207 register allocator from using the mmx registers unless needed. */
44208 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
44209 return true;
44210
44211 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
44212 {
44213 /* SSE1 doesn't have any direct moves from other classes. */
44214 if (!TARGET_SSE2)
44215 return true;
44216
44217 /* If the target says that inter-unit moves are more expensive
44218 than moving through memory, then don't generate them. */
44219 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
44220 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
44221 return true;
44222
44223 /* Between SSE and general, we have moves no larger than word size. */
44224 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44225 return true;
44226 }
44227
44228 return false;
44229 }
44230
44231 bool
44232 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
44233 machine_mode mode, int strict)
44234 {
44235 return inline_secondary_memory_needed (class1, class2, mode, strict);
44236 }
44237
44238 /* Implement the TARGET_CLASS_MAX_NREGS hook.
44239
44240 On the 80386, this is the size of MODE in words,
44241 except in the FP regs, where a single reg is always enough. */
44242
44243 static unsigned char
44244 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
44245 {
44246 if (MAYBE_INTEGER_CLASS_P (rclass))
44247 {
44248 if (mode == XFmode)
44249 return (TARGET_64BIT ? 2 : 3);
44250 else if (mode == XCmode)
44251 return (TARGET_64BIT ? 4 : 6);
44252 else
44253 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
44254 }
44255 else
44256 {
44257 if (COMPLEX_MODE_P (mode))
44258 return 2;
44259 else
44260 return 1;
44261 }
44262 }
44263
44264 /* Return true if the registers in CLASS cannot represent the change from
44265 modes FROM to TO. */
44266
44267 bool
44268 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
44269 enum reg_class regclass)
44270 {
44271 if (from == to)
44272 return false;
44273
44274 /* x87 registers can't do subreg at all, as all values are reformatted
44275 to extended precision. */
44276 if (MAYBE_FLOAT_CLASS_P (regclass))
44277 return true;
44278
44279 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
44280 {
44281 /* Vector registers do not support QI or HImode loads. If we don't
44282 disallow a change to these modes, reload will assume it's ok to
44283 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
44284 the vec_dupv4hi pattern. */
44285 if (GET_MODE_SIZE (from) < 4)
44286 return true;
44287 }
44288
44289 return false;
44290 }
44291
44292 /* Return the cost of moving data of mode M between a
44293 register and memory. A value of 2 is the default; this cost is
44294 relative to those in `REGISTER_MOVE_COST'.
44295
44296 This function is used extensively by register_move_cost that is used to
44297 build tables at startup. Make it inline in this case.
44298 When IN is 2, return maximum of in and out move cost.
44299
44300 If moving between registers and memory is more expensive than
44301 between two registers, you should define this macro to express the
44302 relative cost.
44303
44304 Model also increased moving costs of QImode registers in non
44305 Q_REGS classes.
44306 */
44307 static inline int
44308 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
44309 int in)
44310 {
44311 int cost;
44312 if (FLOAT_CLASS_P (regclass))
44313 {
44314 int index;
44315 switch (mode)
44316 {
44317 case SFmode:
44318 index = 0;
44319 break;
44320 case DFmode:
44321 index = 1;
44322 break;
44323 case XFmode:
44324 index = 2;
44325 break;
44326 default:
44327 return 100;
44328 }
44329 if (in == 2)
44330 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
44331 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
44332 }
44333 if (SSE_CLASS_P (regclass))
44334 {
44335 int index;
44336 switch (GET_MODE_SIZE (mode))
44337 {
44338 case 4:
44339 index = 0;
44340 break;
44341 case 8:
44342 index = 1;
44343 break;
44344 case 16:
44345 index = 2;
44346 break;
44347 default:
44348 return 100;
44349 }
44350 if (in == 2)
44351 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
44352 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
44353 }
44354 if (MMX_CLASS_P (regclass))
44355 {
44356 int index;
44357 switch (GET_MODE_SIZE (mode))
44358 {
44359 case 4:
44360 index = 0;
44361 break;
44362 case 8:
44363 index = 1;
44364 break;
44365 default:
44366 return 100;
44367 }
44368 if (in)
44369 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
44370 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
44371 }
44372 switch (GET_MODE_SIZE (mode))
44373 {
44374 case 1:
44375 if (Q_CLASS_P (regclass) || TARGET_64BIT)
44376 {
44377 if (!in)
44378 return ix86_cost->int_store[0];
44379 if (TARGET_PARTIAL_REG_DEPENDENCY
44380 && optimize_function_for_speed_p (cfun))
44381 cost = ix86_cost->movzbl_load;
44382 else
44383 cost = ix86_cost->int_load[0];
44384 if (in == 2)
44385 return MAX (cost, ix86_cost->int_store[0]);
44386 return cost;
44387 }
44388 else
44389 {
44390 if (in == 2)
44391 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
44392 if (in)
44393 return ix86_cost->movzbl_load;
44394 else
44395 return ix86_cost->int_store[0] + 4;
44396 }
44397 break;
44398 case 2:
44399 if (in == 2)
44400 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
44401 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
44402 default:
44403 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
44404 if (mode == TFmode)
44405 mode = XFmode;
44406 if (in == 2)
44407 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
44408 else if (in)
44409 cost = ix86_cost->int_load[2];
44410 else
44411 cost = ix86_cost->int_store[2];
44412 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
44413 }
44414 }
44415
44416 static int
44417 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
44418 bool in)
44419 {
44420 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
44421 }
44422
44423
44424 /* Return the cost of moving data from a register in class CLASS1 to
44425 one in class CLASS2.
44426
44427 It is not required that the cost always equal 2 when FROM is the same as TO;
44428 on some machines it is expensive to move between registers if they are not
44429 general registers. */
44430
44431 static int
44432 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
44433 reg_class_t class2_i)
44434 {
44435 enum reg_class class1 = (enum reg_class) class1_i;
44436 enum reg_class class2 = (enum reg_class) class2_i;
44437
44438 /* In case we require secondary memory, compute cost of the store followed
44439 by load. In order to avoid bad register allocation choices, we need
44440 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
44441
44442 if (inline_secondary_memory_needed (class1, class2, mode, 0))
44443 {
44444 int cost = 1;
44445
44446 cost += inline_memory_move_cost (mode, class1, 2);
44447 cost += inline_memory_move_cost (mode, class2, 2);
44448
44449 /* In case of copying from general_purpose_register we may emit multiple
44450 stores followed by single load causing memory size mismatch stall.
44451 Count this as arbitrarily high cost of 20. */
44452 if (targetm.class_max_nregs (class1, mode)
44453 > targetm.class_max_nregs (class2, mode))
44454 cost += 20;
44455
44456 /* In the case of FP/MMX moves, the registers actually overlap, and we
44457 have to switch modes in order to treat them differently. */
44458 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
44459 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
44460 cost += 20;
44461
44462 return cost;
44463 }
44464
44465 /* Moves between SSE/MMX and integer unit are expensive. */
44466 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
44467 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
44468
44469 /* ??? By keeping returned value relatively high, we limit the number
44470 of moves between integer and MMX/SSE registers for all targets.
44471 Additionally, high value prevents problem with x86_modes_tieable_p(),
44472 where integer modes in MMX/SSE registers are not tieable
44473 because of missing QImode and HImode moves to, from or between
44474 MMX/SSE registers. */
44475 return MAX (8, ix86_cost->mmxsse_to_integer);
44476
44477 if (MAYBE_FLOAT_CLASS_P (class1))
44478 return ix86_cost->fp_move;
44479 if (MAYBE_SSE_CLASS_P (class1))
44480 return ix86_cost->sse_move;
44481 if (MAYBE_MMX_CLASS_P (class1))
44482 return ix86_cost->mmx_move;
44483 return 2;
44484 }
44485
44486 /* Return TRUE if hard register REGNO can hold a value of machine-mode
44487 MODE. */
44488
44489 bool
44490 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
44491 {
44492 /* Flags and only flags can only hold CCmode values. */
44493 if (CC_REGNO_P (regno))
44494 return GET_MODE_CLASS (mode) == MODE_CC;
44495 if (GET_MODE_CLASS (mode) == MODE_CC
44496 || GET_MODE_CLASS (mode) == MODE_RANDOM
44497 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
44498 return false;
44499 if (STACK_REGNO_P (regno))
44500 return VALID_FP_MODE_P (mode);
44501 if (MASK_REGNO_P (regno))
44502 return (VALID_MASK_REG_MODE (mode)
44503 || (TARGET_AVX512BW
44504 && VALID_MASK_AVX512BW_MODE (mode)));
44505 if (BND_REGNO_P (regno))
44506 return VALID_BND_REG_MODE (mode);
44507 if (SSE_REGNO_P (regno))
44508 {
44509 /* We implement the move patterns for all vector modes into and
44510 out of SSE registers, even when no operation instructions
44511 are available. */
44512
44513 /* For AVX-512 we allow, regardless of regno:
44514 - XI mode
44515 - any of 512-bit wide vector mode
44516 - any scalar mode. */
44517 if (TARGET_AVX512F
44518 && (mode == XImode
44519 || VALID_AVX512F_REG_MODE (mode)
44520 || VALID_AVX512F_SCALAR_MODE (mode)))
44521 return true;
44522
44523 /* TODO check for QI/HI scalars. */
44524 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
44525 if (TARGET_AVX512VL
44526 && (mode == OImode
44527 || mode == TImode
44528 || VALID_AVX256_REG_MODE (mode)
44529 || VALID_AVX512VL_128_REG_MODE (mode)))
44530 return true;
44531
44532 /* xmm16-xmm31 are only available for AVX-512. */
44533 if (EXT_REX_SSE_REGNO_P (regno))
44534 return false;
44535
44536 /* OImode and AVX modes are available only when AVX is enabled. */
44537 return ((TARGET_AVX
44538 && VALID_AVX256_REG_OR_OI_MODE (mode))
44539 || VALID_SSE_REG_MODE (mode)
44540 || VALID_SSE2_REG_MODE (mode)
44541 || VALID_MMX_REG_MODE (mode)
44542 || VALID_MMX_REG_MODE_3DNOW (mode));
44543 }
44544 if (MMX_REGNO_P (regno))
44545 {
44546 /* We implement the move patterns for 3DNOW modes even in MMX mode,
44547 so if the register is available at all, then we can move data of
44548 the given mode into or out of it. */
44549 return (VALID_MMX_REG_MODE (mode)
44550 || VALID_MMX_REG_MODE_3DNOW (mode));
44551 }
44552
44553 if (mode == QImode)
44554 {
44555 /* Take care for QImode values - they can be in non-QI regs,
44556 but then they do cause partial register stalls. */
44557 if (ANY_QI_REGNO_P (regno))
44558 return true;
44559 if (!TARGET_PARTIAL_REG_STALL)
44560 return true;
44561 /* LRA checks if the hard register is OK for the given mode.
44562 QImode values can live in non-QI regs, so we allow all
44563 registers here. */
44564 if (lra_in_progress)
44565 return true;
44566 return !can_create_pseudo_p ();
44567 }
44568 /* We handle both integer and floats in the general purpose registers. */
44569 else if (VALID_INT_MODE_P (mode))
44570 return true;
44571 else if (VALID_FP_MODE_P (mode))
44572 return true;
44573 else if (VALID_DFP_MODE_P (mode))
44574 return true;
44575 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
44576 on to use that value in smaller contexts, this can easily force a
44577 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
44578 supporting DImode, allow it. */
44579 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
44580 return true;
44581
44582 return false;
44583 }
44584
44585 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
44586 tieable integer mode. */
44587
44588 static bool
44589 ix86_tieable_integer_mode_p (machine_mode mode)
44590 {
44591 switch (mode)
44592 {
44593 case HImode:
44594 case SImode:
44595 return true;
44596
44597 case QImode:
44598 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
44599
44600 case DImode:
44601 return TARGET_64BIT;
44602
44603 default:
44604 return false;
44605 }
44606 }
44607
44608 /* Return true if MODE1 is accessible in a register that can hold MODE2
44609 without copying. That is, all register classes that can hold MODE2
44610 can also hold MODE1. */
44611
44612 bool
44613 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
44614 {
44615 if (mode1 == mode2)
44616 return true;
44617
44618 if (ix86_tieable_integer_mode_p (mode1)
44619 && ix86_tieable_integer_mode_p (mode2))
44620 return true;
44621
44622 /* MODE2 being XFmode implies fp stack or general regs, which means we
44623 can tie any smaller floating point modes to it. Note that we do not
44624 tie this with TFmode. */
44625 if (mode2 == XFmode)
44626 return mode1 == SFmode || mode1 == DFmode;
44627
44628 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
44629 that we can tie it with SFmode. */
44630 if (mode2 == DFmode)
44631 return mode1 == SFmode;
44632
44633 /* If MODE2 is only appropriate for an SSE register, then tie with
44634 any other mode acceptable to SSE registers. */
44635 if (GET_MODE_SIZE (mode2) == 32
44636 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
44637 return (GET_MODE_SIZE (mode1) == 32
44638 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
44639 if (GET_MODE_SIZE (mode2) == 16
44640 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
44641 return (GET_MODE_SIZE (mode1) == 16
44642 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
44643
44644 /* If MODE2 is appropriate for an MMX register, then tie
44645 with any other mode acceptable to MMX registers. */
44646 if (GET_MODE_SIZE (mode2) == 8
44647 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
44648 return (GET_MODE_SIZE (mode1) == 8
44649 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
44650
44651 return false;
44652 }
44653
44654 /* Return the cost of moving between two registers of mode MODE. */
44655
44656 static int
44657 ix86_set_reg_reg_cost (machine_mode mode)
44658 {
44659 unsigned int units = UNITS_PER_WORD;
44660
44661 switch (GET_MODE_CLASS (mode))
44662 {
44663 default:
44664 break;
44665
44666 case MODE_CC:
44667 units = GET_MODE_SIZE (CCmode);
44668 break;
44669
44670 case MODE_FLOAT:
44671 if ((TARGET_SSE && mode == TFmode)
44672 || (TARGET_80387 && mode == XFmode)
44673 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
44674 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
44675 units = GET_MODE_SIZE (mode);
44676 break;
44677
44678 case MODE_COMPLEX_FLOAT:
44679 if ((TARGET_SSE && mode == TCmode)
44680 || (TARGET_80387 && mode == XCmode)
44681 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
44682 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
44683 units = GET_MODE_SIZE (mode);
44684 break;
44685
44686 case MODE_VECTOR_INT:
44687 case MODE_VECTOR_FLOAT:
44688 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44689 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44690 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44691 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44692 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
44693 units = GET_MODE_SIZE (mode);
44694 }
44695
44696 /* Return the cost of moving between two registers of mode MODE,
44697 assuming that the move will be in pieces of at most UNITS bytes. */
44698 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
44699 }
44700
44701 /* Compute a (partial) cost for rtx X. Return true if the complete
44702 cost has been computed, and false if subexpressions should be
44703 scanned. In either case, *TOTAL contains the cost result. */
44704
44705 static bool
44706 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
44707 int *total, bool speed)
44708 {
44709 rtx mask;
44710 enum rtx_code code = GET_CODE (x);
44711 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
44712 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
44713
44714 switch (code)
44715 {
44716 case SET:
44717 if (register_operand (SET_DEST (x), VOIDmode)
44718 && reg_or_0_operand (SET_SRC (x), VOIDmode))
44719 {
44720 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
44721 return true;
44722 }
44723 return false;
44724
44725 case CONST_INT:
44726 case CONST:
44727 case LABEL_REF:
44728 case SYMBOL_REF:
44729 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
44730 *total = 3;
44731 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
44732 *total = 2;
44733 else if (flag_pic && SYMBOLIC_CONST (x)
44734 && !(TARGET_64BIT
44735 && (GET_CODE (x) == LABEL_REF
44736 || (GET_CODE (x) == SYMBOL_REF
44737 && SYMBOL_REF_LOCAL_P (x))))
44738 /* Use 0 cost for CONST to improve its propagation. */
44739 && (TARGET_64BIT || GET_CODE (x) != CONST))
44740 *total = 1;
44741 else
44742 *total = 0;
44743 return true;
44744
44745 case CONST_DOUBLE:
44746 if (IS_STACK_MODE (mode))
44747 switch (standard_80387_constant_p (x))
44748 {
44749 case -1:
44750 case 0:
44751 break;
44752 case 1: /* 0.0 */
44753 *total = 1;
44754 return true;
44755 default: /* Other constants */
44756 *total = 2;
44757 return true;
44758 }
44759 /* FALLTHRU */
44760
44761 case CONST_VECTOR:
44762 switch (standard_sse_constant_p (x, mode))
44763 {
44764 case 0:
44765 break;
44766 case 1: /* 0: xor eliminates false dependency */
44767 *total = 0;
44768 return true;
44769 default: /* -1: cmp contains false dependency */
44770 *total = 1;
44771 return true;
44772 }
44773 /* FALLTHRU */
44774
44775 case CONST_WIDE_INT:
44776 /* Fall back to (MEM (SYMBOL_REF)), since that's where
44777 it'll probably end up. Add a penalty for size. */
44778 *total = (COSTS_N_INSNS (1)
44779 + (!TARGET_64BIT && flag_pic)
44780 + (GET_MODE_SIZE (mode) <= 4
44781 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
44782 return true;
44783
44784 case ZERO_EXTEND:
44785 /* The zero extensions is often completely free on x86_64, so make
44786 it as cheap as possible. */
44787 if (TARGET_64BIT && mode == DImode
44788 && GET_MODE (XEXP (x, 0)) == SImode)
44789 *total = 1;
44790 else if (TARGET_ZERO_EXTEND_WITH_AND)
44791 *total = cost->add;
44792 else
44793 *total = cost->movzx;
44794 return false;
44795
44796 case SIGN_EXTEND:
44797 *total = cost->movsx;
44798 return false;
44799
44800 case ASHIFT:
44801 if (SCALAR_INT_MODE_P (mode)
44802 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
44803 && CONST_INT_P (XEXP (x, 1)))
44804 {
44805 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
44806 if (value == 1)
44807 {
44808 *total = cost->add;
44809 return false;
44810 }
44811 if ((value == 2 || value == 3)
44812 && cost->lea <= cost->shift_const)
44813 {
44814 *total = cost->lea;
44815 return false;
44816 }
44817 }
44818 /* FALLTHRU */
44819
44820 case ROTATE:
44821 case ASHIFTRT:
44822 case LSHIFTRT:
44823 case ROTATERT:
44824 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44825 {
44826 /* ??? Should be SSE vector operation cost. */
44827 /* At least for published AMD latencies, this really is the same
44828 as the latency for a simple fpu operation like fabs. */
44829 /* V*QImode is emulated with 1-11 insns. */
44830 if (mode == V16QImode || mode == V32QImode)
44831 {
44832 int count = 11;
44833 if (TARGET_XOP && mode == V16QImode)
44834 {
44835 /* For XOP we use vpshab, which requires a broadcast of the
44836 value to the variable shift insn. For constants this
44837 means a V16Q const in mem; even when we can perform the
44838 shift with one insn set the cost to prefer paddb. */
44839 if (CONSTANT_P (XEXP (x, 1)))
44840 {
44841 *total = (cost->fabs
44842 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
44843 + (speed ? 2 : COSTS_N_BYTES (16)));
44844 return true;
44845 }
44846 count = 3;
44847 }
44848 else if (TARGET_SSSE3)
44849 count = 7;
44850 *total = cost->fabs * count;
44851 }
44852 else
44853 *total = cost->fabs;
44854 }
44855 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44856 {
44857 if (CONST_INT_P (XEXP (x, 1)))
44858 {
44859 if (INTVAL (XEXP (x, 1)) > 32)
44860 *total = cost->shift_const + COSTS_N_INSNS (2);
44861 else
44862 *total = cost->shift_const * 2;
44863 }
44864 else
44865 {
44866 if (GET_CODE (XEXP (x, 1)) == AND)
44867 *total = cost->shift_var * 2;
44868 else
44869 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
44870 }
44871 }
44872 else
44873 {
44874 if (CONST_INT_P (XEXP (x, 1)))
44875 *total = cost->shift_const;
44876 else if (SUBREG_P (XEXP (x, 1))
44877 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
44878 {
44879 /* Return the cost after shift-and truncation. */
44880 *total = cost->shift_var;
44881 return true;
44882 }
44883 else
44884 *total = cost->shift_var;
44885 }
44886 return false;
44887
44888 case FMA:
44889 {
44890 rtx sub;
44891
44892 gcc_assert (FLOAT_MODE_P (mode));
44893 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
44894
44895 /* ??? SSE scalar/vector cost should be used here. */
44896 /* ??? Bald assumption that fma has the same cost as fmul. */
44897 *total = cost->fmul;
44898 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
44899
44900 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
44901 sub = XEXP (x, 0);
44902 if (GET_CODE (sub) == NEG)
44903 sub = XEXP (sub, 0);
44904 *total += rtx_cost (sub, mode, FMA, 0, speed);
44905
44906 sub = XEXP (x, 2);
44907 if (GET_CODE (sub) == NEG)
44908 sub = XEXP (sub, 0);
44909 *total += rtx_cost (sub, mode, FMA, 2, speed);
44910 return true;
44911 }
44912
44913 case MULT:
44914 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44915 {
44916 /* ??? SSE scalar cost should be used here. */
44917 *total = cost->fmul;
44918 return false;
44919 }
44920 else if (X87_FLOAT_MODE_P (mode))
44921 {
44922 *total = cost->fmul;
44923 return false;
44924 }
44925 else if (FLOAT_MODE_P (mode))
44926 {
44927 /* ??? SSE vector cost should be used here. */
44928 *total = cost->fmul;
44929 return false;
44930 }
44931 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44932 {
44933 /* V*QImode is emulated with 7-13 insns. */
44934 if (mode == V16QImode || mode == V32QImode)
44935 {
44936 int extra = 11;
44937 if (TARGET_XOP && mode == V16QImode)
44938 extra = 5;
44939 else if (TARGET_SSSE3)
44940 extra = 6;
44941 *total = cost->fmul * 2 + cost->fabs * extra;
44942 }
44943 /* V*DImode is emulated with 5-8 insns. */
44944 else if (mode == V2DImode || mode == V4DImode)
44945 {
44946 if (TARGET_XOP && mode == V2DImode)
44947 *total = cost->fmul * 2 + cost->fabs * 3;
44948 else
44949 *total = cost->fmul * 3 + cost->fabs * 5;
44950 }
44951 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
44952 insns, including two PMULUDQ. */
44953 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
44954 *total = cost->fmul * 2 + cost->fabs * 5;
44955 else
44956 *total = cost->fmul;
44957 return false;
44958 }
44959 else
44960 {
44961 rtx op0 = XEXP (x, 0);
44962 rtx op1 = XEXP (x, 1);
44963 int nbits;
44964 if (CONST_INT_P (XEXP (x, 1)))
44965 {
44966 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
44967 for (nbits = 0; value != 0; value &= value - 1)
44968 nbits++;
44969 }
44970 else
44971 /* This is arbitrary. */
44972 nbits = 7;
44973
44974 /* Compute costs correctly for widening multiplication. */
44975 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
44976 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
44977 == GET_MODE_SIZE (mode))
44978 {
44979 int is_mulwiden = 0;
44980 machine_mode inner_mode = GET_MODE (op0);
44981
44982 if (GET_CODE (op0) == GET_CODE (op1))
44983 is_mulwiden = 1, op1 = XEXP (op1, 0);
44984 else if (CONST_INT_P (op1))
44985 {
44986 if (GET_CODE (op0) == SIGN_EXTEND)
44987 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
44988 == INTVAL (op1);
44989 else
44990 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
44991 }
44992
44993 if (is_mulwiden)
44994 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
44995 }
44996
44997 *total = (cost->mult_init[MODE_INDEX (mode)]
44998 + nbits * cost->mult_bit
44999 + rtx_cost (op0, mode, outer_code, opno, speed)
45000 + rtx_cost (op1, mode, outer_code, opno, speed));
45001
45002 return true;
45003 }
45004
45005 case DIV:
45006 case UDIV:
45007 case MOD:
45008 case UMOD:
45009 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
45010 /* ??? SSE cost should be used here. */
45011 *total = cost->fdiv;
45012 else if (X87_FLOAT_MODE_P (mode))
45013 *total = cost->fdiv;
45014 else if (FLOAT_MODE_P (mode))
45015 /* ??? SSE vector cost should be used here. */
45016 *total = cost->fdiv;
45017 else
45018 *total = cost->divide[MODE_INDEX (mode)];
45019 return false;
45020
45021 case PLUS:
45022 if (GET_MODE_CLASS (mode) == MODE_INT
45023 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
45024 {
45025 if (GET_CODE (XEXP (x, 0)) == PLUS
45026 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
45027 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
45028 && CONSTANT_P (XEXP (x, 1)))
45029 {
45030 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
45031 if (val == 2 || val == 4 || val == 8)
45032 {
45033 *total = cost->lea;
45034 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
45035 outer_code, opno, speed);
45036 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
45037 outer_code, opno, speed);
45038 *total += rtx_cost (XEXP (x, 1), mode,
45039 outer_code, opno, speed);
45040 return true;
45041 }
45042 }
45043 else if (GET_CODE (XEXP (x, 0)) == MULT
45044 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
45045 {
45046 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
45047 if (val == 2 || val == 4 || val == 8)
45048 {
45049 *total = cost->lea;
45050 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
45051 outer_code, opno, speed);
45052 *total += rtx_cost (XEXP (x, 1), mode,
45053 outer_code, opno, speed);
45054 return true;
45055 }
45056 }
45057 else if (GET_CODE (XEXP (x, 0)) == PLUS)
45058 {
45059 *total = cost->lea;
45060 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
45061 outer_code, opno, speed);
45062 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
45063 outer_code, opno, speed);
45064 *total += rtx_cost (XEXP (x, 1), mode,
45065 outer_code, opno, speed);
45066 return true;
45067 }
45068 }
45069 /* FALLTHRU */
45070
45071 case MINUS:
45072 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
45073 {
45074 /* ??? SSE cost should be used here. */
45075 *total = cost->fadd;
45076 return false;
45077 }
45078 else if (X87_FLOAT_MODE_P (mode))
45079 {
45080 *total = cost->fadd;
45081 return false;
45082 }
45083 else if (FLOAT_MODE_P (mode))
45084 {
45085 /* ??? SSE vector cost should be used here. */
45086 *total = cost->fadd;
45087 return false;
45088 }
45089 /* FALLTHRU */
45090
45091 case AND:
45092 case IOR:
45093 case XOR:
45094 if (GET_MODE_CLASS (mode) == MODE_INT
45095 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
45096 {
45097 *total = (cost->add * 2
45098 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
45099 << (GET_MODE (XEXP (x, 0)) != DImode))
45100 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
45101 << (GET_MODE (XEXP (x, 1)) != DImode)));
45102 return true;
45103 }
45104 /* FALLTHRU */
45105
45106 case NEG:
45107 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
45108 {
45109 /* ??? SSE cost should be used here. */
45110 *total = cost->fchs;
45111 return false;
45112 }
45113 else if (X87_FLOAT_MODE_P (mode))
45114 {
45115 *total = cost->fchs;
45116 return false;
45117 }
45118 else if (FLOAT_MODE_P (mode))
45119 {
45120 /* ??? SSE vector cost should be used here. */
45121 *total = cost->fchs;
45122 return false;
45123 }
45124 /* FALLTHRU */
45125
45126 case NOT:
45127 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
45128 {
45129 /* ??? Should be SSE vector operation cost. */
45130 /* At least for published AMD latencies, this really is the same
45131 as the latency for a simple fpu operation like fabs. */
45132 *total = cost->fabs;
45133 }
45134 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
45135 *total = cost->add * 2;
45136 else
45137 *total = cost->add;
45138 return false;
45139
45140 case COMPARE:
45141 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
45142 && XEXP (XEXP (x, 0), 1) == const1_rtx
45143 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
45144 && XEXP (x, 1) == const0_rtx)
45145 {
45146 /* This kind of construct is implemented using test[bwl].
45147 Treat it as if we had an AND. */
45148 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
45149 *total = (cost->add
45150 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
45151 opno, speed)
45152 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
45153 return true;
45154 }
45155
45156 /* The embedded comparison operand is completely free. */
45157 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
45158 && XEXP (x, 1) == const0_rtx)
45159 *total = 0;
45160
45161 return false;
45162
45163 case FLOAT_EXTEND:
45164 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
45165 *total = 0;
45166 return false;
45167
45168 case ABS:
45169 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
45170 /* ??? SSE cost should be used here. */
45171 *total = cost->fabs;
45172 else if (X87_FLOAT_MODE_P (mode))
45173 *total = cost->fabs;
45174 else if (FLOAT_MODE_P (mode))
45175 /* ??? SSE vector cost should be used here. */
45176 *total = cost->fabs;
45177 return false;
45178
45179 case SQRT:
45180 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
45181 /* ??? SSE cost should be used here. */
45182 *total = cost->fsqrt;
45183 else if (X87_FLOAT_MODE_P (mode))
45184 *total = cost->fsqrt;
45185 else if (FLOAT_MODE_P (mode))
45186 /* ??? SSE vector cost should be used here. */
45187 *total = cost->fsqrt;
45188 return false;
45189
45190 case UNSPEC:
45191 if (XINT (x, 1) == UNSPEC_TP)
45192 *total = 0;
45193 return false;
45194
45195 case VEC_SELECT:
45196 case VEC_CONCAT:
45197 case VEC_DUPLICATE:
45198 /* ??? Assume all of these vector manipulation patterns are
45199 recognizable. In which case they all pretty much have the
45200 same cost. */
45201 *total = cost->fabs;
45202 return true;
45203 case VEC_MERGE:
45204 mask = XEXP (x, 2);
45205 /* This is masked instruction, assume the same cost,
45206 as nonmasked variant. */
45207 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
45208 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
45209 else
45210 *total = cost->fabs;
45211 return true;
45212
45213 default:
45214 return false;
45215 }
45216 }
45217
45218 #if TARGET_MACHO
45219
45220 static int current_machopic_label_num;
45221
45222 /* Given a symbol name and its associated stub, write out the
45223 definition of the stub. */
45224
45225 void
45226 machopic_output_stub (FILE *file, const char *symb, const char *stub)
45227 {
45228 unsigned int length;
45229 char *binder_name, *symbol_name, lazy_ptr_name[32];
45230 int label = ++current_machopic_label_num;
45231
45232 /* For 64-bit we shouldn't get here. */
45233 gcc_assert (!TARGET_64BIT);
45234
45235 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
45236 symb = targetm.strip_name_encoding (symb);
45237
45238 length = strlen (stub);
45239 binder_name = XALLOCAVEC (char, length + 32);
45240 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
45241
45242 length = strlen (symb);
45243 symbol_name = XALLOCAVEC (char, length + 32);
45244 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
45245
45246 sprintf (lazy_ptr_name, "L%d$lz", label);
45247
45248 if (MACHOPIC_ATT_STUB)
45249 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
45250 else if (MACHOPIC_PURE)
45251 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
45252 else
45253 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
45254
45255 fprintf (file, "%s:\n", stub);
45256 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
45257
45258 if (MACHOPIC_ATT_STUB)
45259 {
45260 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
45261 }
45262 else if (MACHOPIC_PURE)
45263 {
45264 /* PIC stub. */
45265 /* 25-byte PIC stub using "CALL get_pc_thunk". */
45266 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
45267 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
45268 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
45269 label, lazy_ptr_name, label);
45270 fprintf (file, "\tjmp\t*%%ecx\n");
45271 }
45272 else
45273 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
45274
45275 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
45276 it needs no stub-binding-helper. */
45277 if (MACHOPIC_ATT_STUB)
45278 return;
45279
45280 fprintf (file, "%s:\n", binder_name);
45281
45282 if (MACHOPIC_PURE)
45283 {
45284 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
45285 fprintf (file, "\tpushl\t%%ecx\n");
45286 }
45287 else
45288 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
45289
45290 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
45291
45292 /* N.B. Keep the correspondence of these
45293 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
45294 old-pic/new-pic/non-pic stubs; altering this will break
45295 compatibility with existing dylibs. */
45296 if (MACHOPIC_PURE)
45297 {
45298 /* 25-byte PIC stub using "CALL get_pc_thunk". */
45299 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
45300 }
45301 else
45302 /* 16-byte -mdynamic-no-pic stub. */
45303 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
45304
45305 fprintf (file, "%s:\n", lazy_ptr_name);
45306 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
45307 fprintf (file, ASM_LONG "%s\n", binder_name);
45308 }
45309 #endif /* TARGET_MACHO */
45310
45311 /* Order the registers for register allocator. */
45312
45313 void
45314 x86_order_regs_for_local_alloc (void)
45315 {
45316 int pos = 0;
45317 int i;
45318
45319 /* First allocate the local general purpose registers. */
45320 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45321 if (GENERAL_REGNO_P (i) && call_used_regs[i])
45322 reg_alloc_order [pos++] = i;
45323
45324 /* Global general purpose registers. */
45325 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45326 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
45327 reg_alloc_order [pos++] = i;
45328
45329 /* x87 registers come first in case we are doing FP math
45330 using them. */
45331 if (!TARGET_SSE_MATH)
45332 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
45333 reg_alloc_order [pos++] = i;
45334
45335 /* SSE registers. */
45336 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
45337 reg_alloc_order [pos++] = i;
45338 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
45339 reg_alloc_order [pos++] = i;
45340
45341 /* Extended REX SSE registers. */
45342 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
45343 reg_alloc_order [pos++] = i;
45344
45345 /* Mask register. */
45346 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
45347 reg_alloc_order [pos++] = i;
45348
45349 /* MPX bound registers. */
45350 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
45351 reg_alloc_order [pos++] = i;
45352
45353 /* x87 registers. */
45354 if (TARGET_SSE_MATH)
45355 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
45356 reg_alloc_order [pos++] = i;
45357
45358 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
45359 reg_alloc_order [pos++] = i;
45360
45361 /* Initialize the rest of array as we do not allocate some registers
45362 at all. */
45363 while (pos < FIRST_PSEUDO_REGISTER)
45364 reg_alloc_order [pos++] = 0;
45365 }
45366
45367 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
45368 in struct attribute_spec handler. */
45369 static tree
45370 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
45371 tree args,
45372 int,
45373 bool *no_add_attrs)
45374 {
45375 if (TREE_CODE (*node) != FUNCTION_TYPE
45376 && TREE_CODE (*node) != METHOD_TYPE
45377 && TREE_CODE (*node) != FIELD_DECL
45378 && TREE_CODE (*node) != TYPE_DECL)
45379 {
45380 warning (OPT_Wattributes, "%qE attribute only applies to functions",
45381 name);
45382 *no_add_attrs = true;
45383 return NULL_TREE;
45384 }
45385 if (TARGET_64BIT)
45386 {
45387 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
45388 name);
45389 *no_add_attrs = true;
45390 return NULL_TREE;
45391 }
45392 if (is_attribute_p ("callee_pop_aggregate_return", name))
45393 {
45394 tree cst;
45395
45396 cst = TREE_VALUE (args);
45397 if (TREE_CODE (cst) != INTEGER_CST)
45398 {
45399 warning (OPT_Wattributes,
45400 "%qE attribute requires an integer constant argument",
45401 name);
45402 *no_add_attrs = true;
45403 }
45404 else if (compare_tree_int (cst, 0) != 0
45405 && compare_tree_int (cst, 1) != 0)
45406 {
45407 warning (OPT_Wattributes,
45408 "argument to %qE attribute is neither zero, nor one",
45409 name);
45410 *no_add_attrs = true;
45411 }
45412
45413 return NULL_TREE;
45414 }
45415
45416 return NULL_TREE;
45417 }
45418
45419 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
45420 struct attribute_spec.handler. */
45421 static tree
45422 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
45423 bool *no_add_attrs)
45424 {
45425 if (TREE_CODE (*node) != FUNCTION_TYPE
45426 && TREE_CODE (*node) != METHOD_TYPE
45427 && TREE_CODE (*node) != FIELD_DECL
45428 && TREE_CODE (*node) != TYPE_DECL)
45429 {
45430 warning (OPT_Wattributes, "%qE attribute only applies to functions",
45431 name);
45432 *no_add_attrs = true;
45433 return NULL_TREE;
45434 }
45435
45436 /* Can combine regparm with all attributes but fastcall. */
45437 if (is_attribute_p ("ms_abi", name))
45438 {
45439 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
45440 {
45441 error ("ms_abi and sysv_abi attributes are not compatible");
45442 }
45443
45444 return NULL_TREE;
45445 }
45446 else if (is_attribute_p ("sysv_abi", name))
45447 {
45448 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
45449 {
45450 error ("ms_abi and sysv_abi attributes are not compatible");
45451 }
45452
45453 return NULL_TREE;
45454 }
45455
45456 return NULL_TREE;
45457 }
45458
45459 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
45460 struct attribute_spec.handler. */
45461 static tree
45462 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
45463 bool *no_add_attrs)
45464 {
45465 tree *type = NULL;
45466 if (DECL_P (*node))
45467 {
45468 if (TREE_CODE (*node) == TYPE_DECL)
45469 type = &TREE_TYPE (*node);
45470 }
45471 else
45472 type = node;
45473
45474 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
45475 {
45476 warning (OPT_Wattributes, "%qE attribute ignored",
45477 name);
45478 *no_add_attrs = true;
45479 }
45480
45481 else if ((is_attribute_p ("ms_struct", name)
45482 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
45483 || ((is_attribute_p ("gcc_struct", name)
45484 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
45485 {
45486 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
45487 name);
45488 *no_add_attrs = true;
45489 }
45490
45491 return NULL_TREE;
45492 }
45493
45494 static tree
45495 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
45496 bool *no_add_attrs)
45497 {
45498 if (TREE_CODE (*node) != FUNCTION_DECL)
45499 {
45500 warning (OPT_Wattributes, "%qE attribute only applies to functions",
45501 name);
45502 *no_add_attrs = true;
45503 }
45504 return NULL_TREE;
45505 }
45506
45507 static tree
45508 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
45509 int, bool *)
45510 {
45511 return NULL_TREE;
45512 }
45513
45514 static tree
45515 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
45516 {
45517 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
45518 but the function type contains args and return type data. */
45519 tree func_type = *node;
45520 tree return_type = TREE_TYPE (func_type);
45521
45522 int nargs = 0;
45523 tree current_arg_type = TYPE_ARG_TYPES (func_type);
45524 while (current_arg_type
45525 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
45526 {
45527 if (nargs == 0)
45528 {
45529 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
45530 error ("interrupt service routine should have a pointer "
45531 "as the first argument");
45532 }
45533 else if (nargs == 1)
45534 {
45535 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
45536 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
45537 error ("interrupt service routine should have unsigned %s"
45538 "int as the second argument",
45539 TARGET_64BIT
45540 ? (TARGET_X32 ? "long long " : "long ")
45541 : "");
45542 }
45543 nargs++;
45544 current_arg_type = TREE_CHAIN (current_arg_type);
45545 }
45546 if (!nargs || nargs > 2)
45547 error ("interrupt service routine can only have a pointer argument "
45548 "and an optional integer argument");
45549 if (! VOID_TYPE_P (return_type))
45550 error ("interrupt service routine can't have non-void return value");
45551
45552 return NULL_TREE;
45553 }
45554
45555 static bool
45556 ix86_ms_bitfield_layout_p (const_tree record_type)
45557 {
45558 return ((TARGET_MS_BITFIELD_LAYOUT
45559 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
45560 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
45561 }
45562
45563 /* Returns an expression indicating where the this parameter is
45564 located on entry to the FUNCTION. */
45565
45566 static rtx
45567 x86_this_parameter (tree function)
45568 {
45569 tree type = TREE_TYPE (function);
45570 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
45571 int nregs;
45572
45573 if (TARGET_64BIT)
45574 {
45575 const int *parm_regs;
45576
45577 if (ix86_function_type_abi (type) == MS_ABI)
45578 parm_regs = x86_64_ms_abi_int_parameter_registers;
45579 else
45580 parm_regs = x86_64_int_parameter_registers;
45581 return gen_rtx_REG (Pmode, parm_regs[aggr]);
45582 }
45583
45584 nregs = ix86_function_regparm (type, function);
45585
45586 if (nregs > 0 && !stdarg_p (type))
45587 {
45588 int regno;
45589 unsigned int ccvt = ix86_get_callcvt (type);
45590
45591 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
45592 regno = aggr ? DX_REG : CX_REG;
45593 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
45594 {
45595 regno = CX_REG;
45596 if (aggr)
45597 return gen_rtx_MEM (SImode,
45598 plus_constant (Pmode, stack_pointer_rtx, 4));
45599 }
45600 else
45601 {
45602 regno = AX_REG;
45603 if (aggr)
45604 {
45605 regno = DX_REG;
45606 if (nregs == 1)
45607 return gen_rtx_MEM (SImode,
45608 plus_constant (Pmode,
45609 stack_pointer_rtx, 4));
45610 }
45611 }
45612 return gen_rtx_REG (SImode, regno);
45613 }
45614
45615 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
45616 aggr ? 8 : 4));
45617 }
45618
45619 /* Determine whether x86_output_mi_thunk can succeed. */
45620
45621 static bool
45622 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
45623 const_tree function)
45624 {
45625 /* 64-bit can handle anything. */
45626 if (TARGET_64BIT)
45627 return true;
45628
45629 /* For 32-bit, everything's fine if we have one free register. */
45630 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
45631 return true;
45632
45633 /* Need a free register for vcall_offset. */
45634 if (vcall_offset)
45635 return false;
45636
45637 /* Need a free register for GOT references. */
45638 if (flag_pic && !targetm.binds_local_p (function))
45639 return false;
45640
45641 /* Otherwise ok. */
45642 return true;
45643 }
45644
45645 /* Output the assembler code for a thunk function. THUNK_DECL is the
45646 declaration for the thunk function itself, FUNCTION is the decl for
45647 the target function. DELTA is an immediate constant offset to be
45648 added to THIS. If VCALL_OFFSET is nonzero, the word at
45649 *(*this + vcall_offset) should be added to THIS. */
45650
45651 static void
45652 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
45653 HOST_WIDE_INT vcall_offset, tree function)
45654 {
45655 rtx this_param = x86_this_parameter (function);
45656 rtx this_reg, tmp, fnaddr;
45657 unsigned int tmp_regno;
45658 rtx_insn *insn;
45659
45660 if (TARGET_64BIT)
45661 tmp_regno = R10_REG;
45662 else
45663 {
45664 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
45665 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
45666 tmp_regno = AX_REG;
45667 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
45668 tmp_regno = DX_REG;
45669 else
45670 tmp_regno = CX_REG;
45671 }
45672
45673 emit_note (NOTE_INSN_PROLOGUE_END);
45674
45675 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
45676 pull it in now and let DELTA benefit. */
45677 if (REG_P (this_param))
45678 this_reg = this_param;
45679 else if (vcall_offset)
45680 {
45681 /* Put the this parameter into %eax. */
45682 this_reg = gen_rtx_REG (Pmode, AX_REG);
45683 emit_move_insn (this_reg, this_param);
45684 }
45685 else
45686 this_reg = NULL_RTX;
45687
45688 /* Adjust the this parameter by a fixed constant. */
45689 if (delta)
45690 {
45691 rtx delta_rtx = GEN_INT (delta);
45692 rtx delta_dst = this_reg ? this_reg : this_param;
45693
45694 if (TARGET_64BIT)
45695 {
45696 if (!x86_64_general_operand (delta_rtx, Pmode))
45697 {
45698 tmp = gen_rtx_REG (Pmode, tmp_regno);
45699 emit_move_insn (tmp, delta_rtx);
45700 delta_rtx = tmp;
45701 }
45702 }
45703
45704 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
45705 }
45706
45707 /* Adjust the this parameter by a value stored in the vtable. */
45708 if (vcall_offset)
45709 {
45710 rtx vcall_addr, vcall_mem, this_mem;
45711
45712 tmp = gen_rtx_REG (Pmode, tmp_regno);
45713
45714 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
45715 if (Pmode != ptr_mode)
45716 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
45717 emit_move_insn (tmp, this_mem);
45718
45719 /* Adjust the this parameter. */
45720 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
45721 if (TARGET_64BIT
45722 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
45723 {
45724 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
45725 emit_move_insn (tmp2, GEN_INT (vcall_offset));
45726 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
45727 }
45728
45729 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
45730 if (Pmode != ptr_mode)
45731 emit_insn (gen_addsi_1_zext (this_reg,
45732 gen_rtx_REG (ptr_mode,
45733 REGNO (this_reg)),
45734 vcall_mem));
45735 else
45736 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
45737 }
45738
45739 /* If necessary, drop THIS back to its stack slot. */
45740 if (this_reg && this_reg != this_param)
45741 emit_move_insn (this_param, this_reg);
45742
45743 fnaddr = XEXP (DECL_RTL (function), 0);
45744 if (TARGET_64BIT)
45745 {
45746 if (!flag_pic || targetm.binds_local_p (function)
45747 || TARGET_PECOFF)
45748 ;
45749 else
45750 {
45751 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
45752 tmp = gen_rtx_CONST (Pmode, tmp);
45753 fnaddr = gen_const_mem (Pmode, tmp);
45754 }
45755 }
45756 else
45757 {
45758 if (!flag_pic || targetm.binds_local_p (function))
45759 ;
45760 #if TARGET_MACHO
45761 else if (TARGET_MACHO)
45762 {
45763 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
45764 fnaddr = XEXP (fnaddr, 0);
45765 }
45766 #endif /* TARGET_MACHO */
45767 else
45768 {
45769 tmp = gen_rtx_REG (Pmode, CX_REG);
45770 output_set_got (tmp, NULL_RTX);
45771
45772 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
45773 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
45774 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
45775 fnaddr = gen_const_mem (Pmode, fnaddr);
45776 }
45777 }
45778
45779 /* Our sibling call patterns do not allow memories, because we have no
45780 predicate that can distinguish between frame and non-frame memory.
45781 For our purposes here, we can get away with (ab)using a jump pattern,
45782 because we're going to do no optimization. */
45783 if (MEM_P (fnaddr))
45784 {
45785 if (sibcall_insn_operand (fnaddr, word_mode))
45786 {
45787 fnaddr = XEXP (DECL_RTL (function), 0);
45788 tmp = gen_rtx_MEM (QImode, fnaddr);
45789 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
45790 tmp = emit_call_insn (tmp);
45791 SIBLING_CALL_P (tmp) = 1;
45792 }
45793 else
45794 emit_jump_insn (gen_indirect_jump (fnaddr));
45795 }
45796 else
45797 {
45798 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
45799 {
45800 // CM_LARGE_PIC always uses pseudo PIC register which is
45801 // uninitialized. Since FUNCTION is local and calling it
45802 // doesn't go through PLT, we use scratch register %r11 as
45803 // PIC register and initialize it here.
45804 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
45805 ix86_init_large_pic_reg (tmp_regno);
45806 fnaddr = legitimize_pic_address (fnaddr,
45807 gen_rtx_REG (Pmode, tmp_regno));
45808 }
45809
45810 if (!sibcall_insn_operand (fnaddr, word_mode))
45811 {
45812 tmp = gen_rtx_REG (word_mode, tmp_regno);
45813 if (GET_MODE (fnaddr) != word_mode)
45814 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
45815 emit_move_insn (tmp, fnaddr);
45816 fnaddr = tmp;
45817 }
45818
45819 tmp = gen_rtx_MEM (QImode, fnaddr);
45820 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
45821 tmp = emit_call_insn (tmp);
45822 SIBLING_CALL_P (tmp) = 1;
45823 }
45824 emit_barrier ();
45825
45826 /* Emit just enough of rest_of_compilation to get the insns emitted.
45827 Note that use_thunk calls assemble_start_function et al. */
45828 insn = get_insns ();
45829 shorten_branches (insn);
45830 final_start_function (insn, file, 1);
45831 final (insn, file, 1);
45832 final_end_function ();
45833 }
45834
45835 static void
45836 x86_file_start (void)
45837 {
45838 default_file_start ();
45839 if (TARGET_16BIT)
45840 fputs ("\t.code16gcc\n", asm_out_file);
45841 #if TARGET_MACHO
45842 darwin_file_start ();
45843 #endif
45844 if (X86_FILE_START_VERSION_DIRECTIVE)
45845 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
45846 if (X86_FILE_START_FLTUSED)
45847 fputs ("\t.global\t__fltused\n", asm_out_file);
45848 if (ix86_asm_dialect == ASM_INTEL)
45849 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
45850 }
45851
45852 int
45853 x86_field_alignment (tree field, int computed)
45854 {
45855 machine_mode mode;
45856 tree type = TREE_TYPE (field);
45857
45858 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
45859 return computed;
45860 if (TARGET_IAMCU)
45861 return iamcu_alignment (type, computed);
45862 mode = TYPE_MODE (strip_array_types (type));
45863 if (mode == DFmode || mode == DCmode
45864 || GET_MODE_CLASS (mode) == MODE_INT
45865 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
45866 return MIN (32, computed);
45867 return computed;
45868 }
45869
45870 /* Print call to TARGET to FILE. */
45871
45872 static void
45873 x86_print_call_or_nop (FILE *file, const char *target)
45874 {
45875 if (flag_nop_mcount)
45876 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
45877 else
45878 fprintf (file, "1:\tcall\t%s\n", target);
45879 }
45880
45881 /* Output assembler code to FILE to increment profiler label # LABELNO
45882 for profiling a function entry. */
45883 void
45884 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
45885 {
45886 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
45887 : MCOUNT_NAME);
45888 if (TARGET_64BIT)
45889 {
45890 #ifndef NO_PROFILE_COUNTERS
45891 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
45892 #endif
45893
45894 if (!TARGET_PECOFF && flag_pic)
45895 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
45896 else
45897 x86_print_call_or_nop (file, mcount_name);
45898 }
45899 else if (flag_pic)
45900 {
45901 #ifndef NO_PROFILE_COUNTERS
45902 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
45903 LPREFIX, labelno);
45904 #endif
45905 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
45906 }
45907 else
45908 {
45909 #ifndef NO_PROFILE_COUNTERS
45910 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
45911 LPREFIX, labelno);
45912 #endif
45913 x86_print_call_or_nop (file, mcount_name);
45914 }
45915
45916 if (flag_record_mcount)
45917 {
45918 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
45919 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
45920 fprintf (file, "\t.previous\n");
45921 }
45922 }
45923
45924 /* We don't have exact information about the insn sizes, but we may assume
45925 quite safely that we are informed about all 1 byte insns and memory
45926 address sizes. This is enough to eliminate unnecessary padding in
45927 99% of cases. */
45928
45929 static int
45930 min_insn_size (rtx_insn *insn)
45931 {
45932 int l = 0, len;
45933
45934 if (!INSN_P (insn) || !active_insn_p (insn))
45935 return 0;
45936
45937 /* Discard alignments we've emit and jump instructions. */
45938 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
45939 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
45940 return 0;
45941
45942 /* Important case - calls are always 5 bytes.
45943 It is common to have many calls in the row. */
45944 if (CALL_P (insn)
45945 && symbolic_reference_mentioned_p (PATTERN (insn))
45946 && !SIBLING_CALL_P (insn))
45947 return 5;
45948 len = get_attr_length (insn);
45949 if (len <= 1)
45950 return 1;
45951
45952 /* For normal instructions we rely on get_attr_length being exact,
45953 with a few exceptions. */
45954 if (!JUMP_P (insn))
45955 {
45956 enum attr_type type = get_attr_type (insn);
45957
45958 switch (type)
45959 {
45960 case TYPE_MULTI:
45961 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
45962 || asm_noperands (PATTERN (insn)) >= 0)
45963 return 0;
45964 break;
45965 case TYPE_OTHER:
45966 case TYPE_FCMP:
45967 break;
45968 default:
45969 /* Otherwise trust get_attr_length. */
45970 return len;
45971 }
45972
45973 l = get_attr_length_address (insn);
45974 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
45975 l = 4;
45976 }
45977 if (l)
45978 return 1+l;
45979 else
45980 return 2;
45981 }
45982
45983 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45984
45985 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
45986 window. */
45987
45988 static void
45989 ix86_avoid_jump_mispredicts (void)
45990 {
45991 rtx_insn *insn, *start = get_insns ();
45992 int nbytes = 0, njumps = 0;
45993 bool isjump = false;
45994
45995 /* Look for all minimal intervals of instructions containing 4 jumps.
45996 The intervals are bounded by START and INSN. NBYTES is the total
45997 size of instructions in the interval including INSN and not including
45998 START. When the NBYTES is smaller than 16 bytes, it is possible
45999 that the end of START and INSN ends up in the same 16byte page.
46000
46001 The smallest offset in the page INSN can start is the case where START
46002 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
46003 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
46004
46005 Don't consider asm goto as jump, while it can contain a jump, it doesn't
46006 have to, control transfer to label(s) can be performed through other
46007 means, and also we estimate minimum length of all asm stmts as 0. */
46008 for (insn = start; insn; insn = NEXT_INSN (insn))
46009 {
46010 int min_size;
46011
46012 if (LABEL_P (insn))
46013 {
46014 int align = label_to_alignment (insn);
46015 int max_skip = label_to_max_skip (insn);
46016
46017 if (max_skip > 15)
46018 max_skip = 15;
46019 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
46020 already in the current 16 byte page, because otherwise
46021 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
46022 bytes to reach 16 byte boundary. */
46023 if (align <= 0
46024 || (align <= 3 && max_skip != (1 << align) - 1))
46025 max_skip = 0;
46026 if (dump_file)
46027 fprintf (dump_file, "Label %i with max_skip %i\n",
46028 INSN_UID (insn), max_skip);
46029 if (max_skip)
46030 {
46031 while (nbytes + max_skip >= 16)
46032 {
46033 start = NEXT_INSN (start);
46034 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
46035 || CALL_P (start))
46036 njumps--, isjump = true;
46037 else
46038 isjump = false;
46039 nbytes -= min_insn_size (start);
46040 }
46041 }
46042 continue;
46043 }
46044
46045 min_size = min_insn_size (insn);
46046 nbytes += min_size;
46047 if (dump_file)
46048 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
46049 INSN_UID (insn), min_size);
46050 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
46051 || CALL_P (insn))
46052 njumps++;
46053 else
46054 continue;
46055
46056 while (njumps > 3)
46057 {
46058 start = NEXT_INSN (start);
46059 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
46060 || CALL_P (start))
46061 njumps--, isjump = true;
46062 else
46063 isjump = false;
46064 nbytes -= min_insn_size (start);
46065 }
46066 gcc_assert (njumps >= 0);
46067 if (dump_file)
46068 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
46069 INSN_UID (start), INSN_UID (insn), nbytes);
46070
46071 if (njumps == 3 && isjump && nbytes < 16)
46072 {
46073 int padsize = 15 - nbytes + min_insn_size (insn);
46074
46075 if (dump_file)
46076 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
46077 INSN_UID (insn), padsize);
46078 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
46079 }
46080 }
46081 }
46082 #endif
46083
46084 /* AMD Athlon works faster
46085 when RET is not destination of conditional jump or directly preceded
46086 by other jump instruction. We avoid the penalty by inserting NOP just
46087 before the RET instructions in such cases. */
46088 static void
46089 ix86_pad_returns (void)
46090 {
46091 edge e;
46092 edge_iterator ei;
46093
46094 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
46095 {
46096 basic_block bb = e->src;
46097 rtx_insn *ret = BB_END (bb);
46098 rtx_insn *prev;
46099 bool replace = false;
46100
46101 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
46102 || optimize_bb_for_size_p (bb))
46103 continue;
46104 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
46105 if (active_insn_p (prev) || LABEL_P (prev))
46106 break;
46107 if (prev && LABEL_P (prev))
46108 {
46109 edge e;
46110 edge_iterator ei;
46111
46112 FOR_EACH_EDGE (e, ei, bb->preds)
46113 if (EDGE_FREQUENCY (e) && e->src->index >= 0
46114 && !(e->flags & EDGE_FALLTHRU))
46115 {
46116 replace = true;
46117 break;
46118 }
46119 }
46120 if (!replace)
46121 {
46122 prev = prev_active_insn (ret);
46123 if (prev
46124 && ((JUMP_P (prev) && any_condjump_p (prev))
46125 || CALL_P (prev)))
46126 replace = true;
46127 /* Empty functions get branch mispredict even when
46128 the jump destination is not visible to us. */
46129 if (!prev && !optimize_function_for_size_p (cfun))
46130 replace = true;
46131 }
46132 if (replace)
46133 {
46134 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
46135 delete_insn (ret);
46136 }
46137 }
46138 }
46139
46140 /* Count the minimum number of instructions in BB. Return 4 if the
46141 number of instructions >= 4. */
46142
46143 static int
46144 ix86_count_insn_bb (basic_block bb)
46145 {
46146 rtx_insn *insn;
46147 int insn_count = 0;
46148
46149 /* Count number of instructions in this block. Return 4 if the number
46150 of instructions >= 4. */
46151 FOR_BB_INSNS (bb, insn)
46152 {
46153 /* Only happen in exit blocks. */
46154 if (JUMP_P (insn)
46155 && ANY_RETURN_P (PATTERN (insn)))
46156 break;
46157
46158 if (NONDEBUG_INSN_P (insn)
46159 && GET_CODE (PATTERN (insn)) != USE
46160 && GET_CODE (PATTERN (insn)) != CLOBBER)
46161 {
46162 insn_count++;
46163 if (insn_count >= 4)
46164 return insn_count;
46165 }
46166 }
46167
46168 return insn_count;
46169 }
46170
46171
46172 /* Count the minimum number of instructions in code path in BB.
46173 Return 4 if the number of instructions >= 4. */
46174
46175 static int
46176 ix86_count_insn (basic_block bb)
46177 {
46178 edge e;
46179 edge_iterator ei;
46180 int min_prev_count;
46181
46182 /* Only bother counting instructions along paths with no
46183 more than 2 basic blocks between entry and exit. Given
46184 that BB has an edge to exit, determine if a predecessor
46185 of BB has an edge from entry. If so, compute the number
46186 of instructions in the predecessor block. If there
46187 happen to be multiple such blocks, compute the minimum. */
46188 min_prev_count = 4;
46189 FOR_EACH_EDGE (e, ei, bb->preds)
46190 {
46191 edge prev_e;
46192 edge_iterator prev_ei;
46193
46194 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
46195 {
46196 min_prev_count = 0;
46197 break;
46198 }
46199 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
46200 {
46201 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
46202 {
46203 int count = ix86_count_insn_bb (e->src);
46204 if (count < min_prev_count)
46205 min_prev_count = count;
46206 break;
46207 }
46208 }
46209 }
46210
46211 if (min_prev_count < 4)
46212 min_prev_count += ix86_count_insn_bb (bb);
46213
46214 return min_prev_count;
46215 }
46216
46217 /* Pad short function to 4 instructions. */
46218
46219 static void
46220 ix86_pad_short_function (void)
46221 {
46222 edge e;
46223 edge_iterator ei;
46224
46225 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
46226 {
46227 rtx_insn *ret = BB_END (e->src);
46228 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
46229 {
46230 int insn_count = ix86_count_insn (e->src);
46231
46232 /* Pad short function. */
46233 if (insn_count < 4)
46234 {
46235 rtx_insn *insn = ret;
46236
46237 /* Find epilogue. */
46238 while (insn
46239 && (!NOTE_P (insn)
46240 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
46241 insn = PREV_INSN (insn);
46242
46243 if (!insn)
46244 insn = ret;
46245
46246 /* Two NOPs count as one instruction. */
46247 insn_count = 2 * (4 - insn_count);
46248 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
46249 }
46250 }
46251 }
46252 }
46253
46254 /* Fix up a Windows system unwinder issue. If an EH region falls through into
46255 the epilogue, the Windows system unwinder will apply epilogue logic and
46256 produce incorrect offsets. This can be avoided by adding a nop between
46257 the last insn that can throw and the first insn of the epilogue. */
46258
46259 static void
46260 ix86_seh_fixup_eh_fallthru (void)
46261 {
46262 edge e;
46263 edge_iterator ei;
46264
46265 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
46266 {
46267 rtx_insn *insn, *next;
46268
46269 /* Find the beginning of the epilogue. */
46270 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
46271 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
46272 break;
46273 if (insn == NULL)
46274 continue;
46275
46276 /* We only care about preceding insns that can throw. */
46277 insn = prev_active_insn (insn);
46278 if (insn == NULL || !can_throw_internal (insn))
46279 continue;
46280
46281 /* Do not separate calls from their debug information. */
46282 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
46283 if (NOTE_P (next)
46284 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
46285 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
46286 insn = next;
46287 else
46288 break;
46289
46290 emit_insn_after (gen_nops (const1_rtx), insn);
46291 }
46292 }
46293
46294 /* Given a register number BASE, the lowest of a group of registers, update
46295 regsets IN and OUT with the registers that should be avoided in input
46296 and output operands respectively when trying to avoid generating a modr/m
46297 byte for -fmitigate-rop. */
46298
46299 static void
46300 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
46301 {
46302 SET_HARD_REG_BIT (out, base);
46303 SET_HARD_REG_BIT (out, base + 1);
46304 SET_HARD_REG_BIT (in, base + 2);
46305 SET_HARD_REG_BIT (in, base + 3);
46306 }
46307
46308 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
46309 that certain encodings of modr/m bytes do not occur. */
46310 static void
46311 ix86_mitigate_rop (void)
46312 {
46313 HARD_REG_SET input_risky;
46314 HARD_REG_SET output_risky;
46315 HARD_REG_SET inout_risky;
46316
46317 CLEAR_HARD_REG_SET (output_risky);
46318 CLEAR_HARD_REG_SET (input_risky);
46319 SET_HARD_REG_BIT (output_risky, AX_REG);
46320 SET_HARD_REG_BIT (output_risky, CX_REG);
46321 SET_HARD_REG_BIT (input_risky, BX_REG);
46322 SET_HARD_REG_BIT (input_risky, DX_REG);
46323 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
46324 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
46325 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
46326 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
46327 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
46328 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
46329 COPY_HARD_REG_SET (inout_risky, input_risky);
46330 IOR_HARD_REG_SET (inout_risky, output_risky);
46331
46332 df_note_add_problem ();
46333 /* Fix up what stack-regs did. */
46334 df_insn_rescan_all ();
46335 df_analyze ();
46336
46337 regrename_init (true);
46338 regrename_analyze (NULL);
46339
46340 auto_vec<du_head_p> cands;
46341
46342 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
46343 {
46344 if (!NONDEBUG_INSN_P (insn))
46345 continue;
46346
46347 if (GET_CODE (PATTERN (insn)) == USE
46348 || GET_CODE (PATTERN (insn)) == CLOBBER)
46349 continue;
46350
46351 extract_insn (insn);
46352
46353 int opno0, opno1;
46354 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
46355 recog_data.n_operands, &opno0,
46356 &opno1);
46357
46358 if (!ix86_rop_should_change_byte_p (modrm))
46359 continue;
46360
46361 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
46362
46363 /* This happens when regrename has to fail a block. */
46364 if (!info->op_info)
46365 continue;
46366
46367 if (info->op_info[opno0].n_chains != 0)
46368 {
46369 gcc_assert (info->op_info[opno0].n_chains == 1);
46370 du_head_p op0c;
46371 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
46372 if (op0c->target_data_1 + op0c->target_data_2 == 0
46373 && !op0c->cannot_rename)
46374 cands.safe_push (op0c);
46375
46376 op0c->target_data_1++;
46377 }
46378 if (info->op_info[opno1].n_chains != 0)
46379 {
46380 gcc_assert (info->op_info[opno1].n_chains == 1);
46381 du_head_p op1c;
46382 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
46383 if (op1c->target_data_1 + op1c->target_data_2 == 0
46384 && !op1c->cannot_rename)
46385 cands.safe_push (op1c);
46386
46387 op1c->target_data_2++;
46388 }
46389 }
46390
46391 int i;
46392 du_head_p head;
46393 FOR_EACH_VEC_ELT (cands, i, head)
46394 {
46395 int old_reg, best_reg;
46396 HARD_REG_SET unavailable;
46397
46398 CLEAR_HARD_REG_SET (unavailable);
46399 if (head->target_data_1)
46400 IOR_HARD_REG_SET (unavailable, output_risky);
46401 if (head->target_data_2)
46402 IOR_HARD_REG_SET (unavailable, input_risky);
46403
46404 int n_uses;
46405 reg_class superclass = regrename_find_superclass (head, &n_uses,
46406 &unavailable);
46407 old_reg = head->regno;
46408 best_reg = find_rename_reg (head, superclass, &unavailable,
46409 old_reg, false);
46410 bool ok = regrename_do_replace (head, best_reg);
46411 gcc_assert (ok);
46412 if (dump_file)
46413 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
46414 reg_names[best_reg], reg_class_names[superclass]);
46415
46416 }
46417
46418 regrename_finish ();
46419
46420 df_analyze ();
46421
46422 basic_block bb;
46423 regset_head live;
46424
46425 INIT_REG_SET (&live);
46426
46427 FOR_EACH_BB_FN (bb, cfun)
46428 {
46429 rtx_insn *insn;
46430
46431 COPY_REG_SET (&live, DF_LR_OUT (bb));
46432 df_simulate_initialize_backwards (bb, &live);
46433
46434 FOR_BB_INSNS_REVERSE (bb, insn)
46435 {
46436 if (!NONDEBUG_INSN_P (insn))
46437 continue;
46438
46439 df_simulate_one_insn_backwards (bb, insn, &live);
46440
46441 if (GET_CODE (PATTERN (insn)) == USE
46442 || GET_CODE (PATTERN (insn)) == CLOBBER)
46443 continue;
46444
46445 extract_insn (insn);
46446 constrain_operands_cached (insn, reload_completed);
46447 int opno0, opno1;
46448 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
46449 recog_data.n_operands, &opno0,
46450 &opno1);
46451 if (modrm < 0
46452 || !ix86_rop_should_change_byte_p (modrm)
46453 || opno0 == opno1)
46454 continue;
46455
46456 rtx oldreg = recog_data.operand[opno1];
46457 preprocess_constraints (insn);
46458 const operand_alternative *alt = which_op_alt ();
46459
46460 int i;
46461 for (i = 0; i < recog_data.n_operands; i++)
46462 if (i != opno1
46463 && alt[i].earlyclobber
46464 && reg_overlap_mentioned_p (recog_data.operand[i],
46465 oldreg))
46466 break;
46467
46468 if (i < recog_data.n_operands)
46469 continue;
46470
46471 if (dump_file)
46472 fprintf (dump_file,
46473 "attempting to fix modrm byte in insn %d:"
46474 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
46475 reg_class_names[alt[opno1].cl]);
46476
46477 HARD_REG_SET unavailable;
46478 REG_SET_TO_HARD_REG_SET (unavailable, &live);
46479 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
46480 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
46481 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
46482 IOR_HARD_REG_SET (unavailable, output_risky);
46483 IOR_COMPL_HARD_REG_SET (unavailable,
46484 reg_class_contents[alt[opno1].cl]);
46485
46486 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
46487 if (!TEST_HARD_REG_BIT (unavailable, i))
46488 break;
46489 if (i == FIRST_PSEUDO_REGISTER)
46490 {
46491 if (dump_file)
46492 fprintf (dump_file, ", none available\n");
46493 continue;
46494 }
46495 if (dump_file)
46496 fprintf (dump_file, " -> %d\n", i);
46497 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
46498 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
46499 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
46500 }
46501 }
46502 }
46503
46504 /* Implement machine specific optimizations. We implement padding of returns
46505 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
46506 static void
46507 ix86_reorg (void)
46508 {
46509 /* We are freeing block_for_insn in the toplev to keep compatibility
46510 with old MDEP_REORGS that are not CFG based. Recompute it now. */
46511 compute_bb_for_insn ();
46512
46513 if (flag_mitigate_rop)
46514 ix86_mitigate_rop ();
46515
46516 if (TARGET_SEH && current_function_has_exception_handlers ())
46517 ix86_seh_fixup_eh_fallthru ();
46518
46519 if (optimize && optimize_function_for_speed_p (cfun))
46520 {
46521 if (TARGET_PAD_SHORT_FUNCTION)
46522 ix86_pad_short_function ();
46523 else if (TARGET_PAD_RETURNS)
46524 ix86_pad_returns ();
46525 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
46526 if (TARGET_FOUR_JUMP_LIMIT)
46527 ix86_avoid_jump_mispredicts ();
46528 #endif
46529 }
46530 }
46531
46532 /* Return nonzero when QImode register that must be represented via REX prefix
46533 is used. */
46534 bool
46535 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
46536 {
46537 int i;
46538 extract_insn_cached (insn);
46539 for (i = 0; i < recog_data.n_operands; i++)
46540 if (GENERAL_REG_P (recog_data.operand[i])
46541 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
46542 return true;
46543 return false;
46544 }
46545
46546 /* Return true when INSN mentions register that must be encoded using REX
46547 prefix. */
46548 bool
46549 x86_extended_reg_mentioned_p (rtx insn)
46550 {
46551 subrtx_iterator::array_type array;
46552 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
46553 {
46554 const_rtx x = *iter;
46555 if (REG_P (x)
46556 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
46557 return true;
46558 }
46559 return false;
46560 }
46561
46562 /* If profitable, negate (without causing overflow) integer constant
46563 of mode MODE at location LOC. Return true in this case. */
46564 bool
46565 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
46566 {
46567 HOST_WIDE_INT val;
46568
46569 if (!CONST_INT_P (*loc))
46570 return false;
46571
46572 switch (mode)
46573 {
46574 case DImode:
46575 /* DImode x86_64 constants must fit in 32 bits. */
46576 gcc_assert (x86_64_immediate_operand (*loc, mode));
46577
46578 mode = SImode;
46579 break;
46580
46581 case SImode:
46582 case HImode:
46583 case QImode:
46584 break;
46585
46586 default:
46587 gcc_unreachable ();
46588 }
46589
46590 /* Avoid overflows. */
46591 if (mode_signbit_p (mode, *loc))
46592 return false;
46593
46594 val = INTVAL (*loc);
46595
46596 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
46597 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
46598 if ((val < 0 && val != -128)
46599 || val == 128)
46600 {
46601 *loc = GEN_INT (-val);
46602 return true;
46603 }
46604
46605 return false;
46606 }
46607
46608 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
46609 optabs would emit if we didn't have TFmode patterns. */
46610
46611 void
46612 x86_emit_floatuns (rtx operands[2])
46613 {
46614 rtx_code_label *neglab, *donelab;
46615 rtx i0, i1, f0, in, out;
46616 machine_mode mode, inmode;
46617
46618 inmode = GET_MODE (operands[1]);
46619 gcc_assert (inmode == SImode || inmode == DImode);
46620
46621 out = operands[0];
46622 in = force_reg (inmode, operands[1]);
46623 mode = GET_MODE (out);
46624 neglab = gen_label_rtx ();
46625 donelab = gen_label_rtx ();
46626 f0 = gen_reg_rtx (mode);
46627
46628 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
46629
46630 expand_float (out, in, 0);
46631
46632 emit_jump_insn (gen_jump (donelab));
46633 emit_barrier ();
46634
46635 emit_label (neglab);
46636
46637 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
46638 1, OPTAB_DIRECT);
46639 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
46640 1, OPTAB_DIRECT);
46641 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
46642
46643 expand_float (f0, i0, 0);
46644
46645 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
46646
46647 emit_label (donelab);
46648 }
46649 \f
46650 static bool canonicalize_perm (struct expand_vec_perm_d *d);
46651 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
46652 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
46653 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
46654
46655 /* Get a vector mode of the same size as the original but with elements
46656 twice as wide. This is only guaranteed to apply to integral vectors. */
46657
46658 static inline machine_mode
46659 get_mode_wider_vector (machine_mode o)
46660 {
46661 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
46662 machine_mode n = GET_MODE_WIDER_MODE (o);
46663 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
46664 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
46665 return n;
46666 }
46667
46668 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
46669 fill target with val via vec_duplicate. */
46670
46671 static bool
46672 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
46673 {
46674 bool ok;
46675 rtx_insn *insn;
46676 rtx dup;
46677
46678 /* First attempt to recognize VAL as-is. */
46679 dup = gen_rtx_VEC_DUPLICATE (mode, val);
46680 insn = emit_insn (gen_rtx_SET (target, dup));
46681 if (recog_memoized (insn) < 0)
46682 {
46683 rtx_insn *seq;
46684 /* If that fails, force VAL into a register. */
46685
46686 start_sequence ();
46687 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
46688 seq = get_insns ();
46689 end_sequence ();
46690 if (seq)
46691 emit_insn_before (seq, insn);
46692
46693 ok = recog_memoized (insn) >= 0;
46694 gcc_assert (ok);
46695 }
46696 return true;
46697 }
46698
46699 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
46700 with all elements equal to VAR. Return true if successful. */
46701
46702 static bool
46703 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
46704 rtx target, rtx val)
46705 {
46706 bool ok;
46707
46708 switch (mode)
46709 {
46710 case V2SImode:
46711 case V2SFmode:
46712 if (!mmx_ok)
46713 return false;
46714 /* FALLTHRU */
46715
46716 case V4DFmode:
46717 case V4DImode:
46718 case V8SFmode:
46719 case V8SImode:
46720 case V2DFmode:
46721 case V2DImode:
46722 case V4SFmode:
46723 case V4SImode:
46724 case V16SImode:
46725 case V8DImode:
46726 case V16SFmode:
46727 case V8DFmode:
46728 return ix86_vector_duplicate_value (mode, target, val);
46729
46730 case V4HImode:
46731 if (!mmx_ok)
46732 return false;
46733 if (TARGET_SSE || TARGET_3DNOW_A)
46734 {
46735 rtx x;
46736
46737 val = gen_lowpart (SImode, val);
46738 x = gen_rtx_TRUNCATE (HImode, val);
46739 x = gen_rtx_VEC_DUPLICATE (mode, x);
46740 emit_insn (gen_rtx_SET (target, x));
46741 return true;
46742 }
46743 goto widen;
46744
46745 case V8QImode:
46746 if (!mmx_ok)
46747 return false;
46748 goto widen;
46749
46750 case V8HImode:
46751 if (TARGET_AVX2)
46752 return ix86_vector_duplicate_value (mode, target, val);
46753
46754 if (TARGET_SSE2)
46755 {
46756 struct expand_vec_perm_d dperm;
46757 rtx tmp1, tmp2;
46758
46759 permute:
46760 memset (&dperm, 0, sizeof (dperm));
46761 dperm.target = target;
46762 dperm.vmode = mode;
46763 dperm.nelt = GET_MODE_NUNITS (mode);
46764 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
46765 dperm.one_operand_p = true;
46766
46767 /* Extend to SImode using a paradoxical SUBREG. */
46768 tmp1 = gen_reg_rtx (SImode);
46769 emit_move_insn (tmp1, gen_lowpart (SImode, val));
46770
46771 /* Insert the SImode value as low element of a V4SImode vector. */
46772 tmp2 = gen_reg_rtx (V4SImode);
46773 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
46774 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
46775
46776 ok = (expand_vec_perm_1 (&dperm)
46777 || expand_vec_perm_broadcast_1 (&dperm));
46778 gcc_assert (ok);
46779 return ok;
46780 }
46781 goto widen;
46782
46783 case V16QImode:
46784 if (TARGET_AVX2)
46785 return ix86_vector_duplicate_value (mode, target, val);
46786
46787 if (TARGET_SSE2)
46788 goto permute;
46789 goto widen;
46790
46791 widen:
46792 /* Replicate the value once into the next wider mode and recurse. */
46793 {
46794 machine_mode smode, wsmode, wvmode;
46795 rtx x;
46796
46797 smode = GET_MODE_INNER (mode);
46798 wvmode = get_mode_wider_vector (mode);
46799 wsmode = GET_MODE_INNER (wvmode);
46800
46801 val = convert_modes (wsmode, smode, val, true);
46802 x = expand_simple_binop (wsmode, ASHIFT, val,
46803 GEN_INT (GET_MODE_BITSIZE (smode)),
46804 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46805 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
46806
46807 x = gen_reg_rtx (wvmode);
46808 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
46809 gcc_assert (ok);
46810 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
46811 return ok;
46812 }
46813
46814 case V16HImode:
46815 case V32QImode:
46816 if (TARGET_AVX2)
46817 return ix86_vector_duplicate_value (mode, target, val);
46818 else
46819 {
46820 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
46821 rtx x = gen_reg_rtx (hvmode);
46822
46823 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
46824 gcc_assert (ok);
46825
46826 x = gen_rtx_VEC_CONCAT (mode, x, x);
46827 emit_insn (gen_rtx_SET (target, x));
46828 }
46829 return true;
46830
46831 case V64QImode:
46832 case V32HImode:
46833 if (TARGET_AVX512BW)
46834 return ix86_vector_duplicate_value (mode, target, val);
46835 else
46836 {
46837 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
46838 rtx x = gen_reg_rtx (hvmode);
46839
46840 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
46841 gcc_assert (ok);
46842
46843 x = gen_rtx_VEC_CONCAT (mode, x, x);
46844 emit_insn (gen_rtx_SET (target, x));
46845 }
46846 return true;
46847
46848 default:
46849 return false;
46850 }
46851 }
46852
46853 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
46854 whose ONE_VAR element is VAR, and other elements are zero. Return true
46855 if successful. */
46856
46857 static bool
46858 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
46859 rtx target, rtx var, int one_var)
46860 {
46861 machine_mode vsimode;
46862 rtx new_target;
46863 rtx x, tmp;
46864 bool use_vector_set = false;
46865
46866 switch (mode)
46867 {
46868 case V2DImode:
46869 /* For SSE4.1, we normally use vector set. But if the second
46870 element is zero and inter-unit moves are OK, we use movq
46871 instead. */
46872 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
46873 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
46874 && one_var == 0));
46875 break;
46876 case V16QImode:
46877 case V4SImode:
46878 case V4SFmode:
46879 use_vector_set = TARGET_SSE4_1;
46880 break;
46881 case V8HImode:
46882 use_vector_set = TARGET_SSE2;
46883 break;
46884 case V4HImode:
46885 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
46886 break;
46887 case V32QImode:
46888 case V16HImode:
46889 case V8SImode:
46890 case V8SFmode:
46891 case V4DFmode:
46892 use_vector_set = TARGET_AVX;
46893 break;
46894 case V4DImode:
46895 /* Use ix86_expand_vector_set in 64bit mode only. */
46896 use_vector_set = TARGET_AVX && TARGET_64BIT;
46897 break;
46898 default:
46899 break;
46900 }
46901
46902 if (use_vector_set)
46903 {
46904 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
46905 var = force_reg (GET_MODE_INNER (mode), var);
46906 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46907 return true;
46908 }
46909
46910 switch (mode)
46911 {
46912 case V2SFmode:
46913 case V2SImode:
46914 if (!mmx_ok)
46915 return false;
46916 /* FALLTHRU */
46917
46918 case V2DFmode:
46919 case V2DImode:
46920 if (one_var != 0)
46921 return false;
46922 var = force_reg (GET_MODE_INNER (mode), var);
46923 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
46924 emit_insn (gen_rtx_SET (target, x));
46925 return true;
46926
46927 case V4SFmode:
46928 case V4SImode:
46929 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
46930 new_target = gen_reg_rtx (mode);
46931 else
46932 new_target = target;
46933 var = force_reg (GET_MODE_INNER (mode), var);
46934 x = gen_rtx_VEC_DUPLICATE (mode, var);
46935 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
46936 emit_insn (gen_rtx_SET (new_target, x));
46937 if (one_var != 0)
46938 {
46939 /* We need to shuffle the value to the correct position, so
46940 create a new pseudo to store the intermediate result. */
46941
46942 /* With SSE2, we can use the integer shuffle insns. */
46943 if (mode != V4SFmode && TARGET_SSE2)
46944 {
46945 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
46946 const1_rtx,
46947 GEN_INT (one_var == 1 ? 0 : 1),
46948 GEN_INT (one_var == 2 ? 0 : 1),
46949 GEN_INT (one_var == 3 ? 0 : 1)));
46950 if (target != new_target)
46951 emit_move_insn (target, new_target);
46952 return true;
46953 }
46954
46955 /* Otherwise convert the intermediate result to V4SFmode and
46956 use the SSE1 shuffle instructions. */
46957 if (mode != V4SFmode)
46958 {
46959 tmp = gen_reg_rtx (V4SFmode);
46960 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
46961 }
46962 else
46963 tmp = new_target;
46964
46965 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
46966 const1_rtx,
46967 GEN_INT (one_var == 1 ? 0 : 1),
46968 GEN_INT (one_var == 2 ? 0+4 : 1+4),
46969 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
46970
46971 if (mode != V4SFmode)
46972 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
46973 else if (tmp != target)
46974 emit_move_insn (target, tmp);
46975 }
46976 else if (target != new_target)
46977 emit_move_insn (target, new_target);
46978 return true;
46979
46980 case V8HImode:
46981 case V16QImode:
46982 vsimode = V4SImode;
46983 goto widen;
46984 case V4HImode:
46985 case V8QImode:
46986 if (!mmx_ok)
46987 return false;
46988 vsimode = V2SImode;
46989 goto widen;
46990 widen:
46991 if (one_var != 0)
46992 return false;
46993
46994 /* Zero extend the variable element to SImode and recurse. */
46995 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
46996
46997 x = gen_reg_rtx (vsimode);
46998 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
46999 var, one_var))
47000 gcc_unreachable ();
47001
47002 emit_move_insn (target, gen_lowpart (mode, x));
47003 return true;
47004
47005 default:
47006 return false;
47007 }
47008 }
47009
47010 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
47011 consisting of the values in VALS. It is known that all elements
47012 except ONE_VAR are constants. Return true if successful. */
47013
47014 static bool
47015 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
47016 rtx target, rtx vals, int one_var)
47017 {
47018 rtx var = XVECEXP (vals, 0, one_var);
47019 machine_mode wmode;
47020 rtx const_vec, x;
47021
47022 const_vec = copy_rtx (vals);
47023 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
47024 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
47025
47026 switch (mode)
47027 {
47028 case V2DFmode:
47029 case V2DImode:
47030 case V2SFmode:
47031 case V2SImode:
47032 /* For the two element vectors, it's just as easy to use
47033 the general case. */
47034 return false;
47035
47036 case V4DImode:
47037 /* Use ix86_expand_vector_set in 64bit mode only. */
47038 if (!TARGET_64BIT)
47039 return false;
47040 /* FALLTHRU */
47041 case V4DFmode:
47042 case V8SFmode:
47043 case V8SImode:
47044 case V16HImode:
47045 case V32QImode:
47046 case V4SFmode:
47047 case V4SImode:
47048 case V8HImode:
47049 case V4HImode:
47050 break;
47051
47052 case V16QImode:
47053 if (TARGET_SSE4_1)
47054 break;
47055 wmode = V8HImode;
47056 goto widen;
47057 case V8QImode:
47058 wmode = V4HImode;
47059 goto widen;
47060 widen:
47061 /* There's no way to set one QImode entry easily. Combine
47062 the variable value with its adjacent constant value, and
47063 promote to an HImode set. */
47064 x = XVECEXP (vals, 0, one_var ^ 1);
47065 if (one_var & 1)
47066 {
47067 var = convert_modes (HImode, QImode, var, true);
47068 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
47069 NULL_RTX, 1, OPTAB_LIB_WIDEN);
47070 x = GEN_INT (INTVAL (x) & 0xff);
47071 }
47072 else
47073 {
47074 var = convert_modes (HImode, QImode, var, true);
47075 x = gen_int_mode (INTVAL (x) << 8, HImode);
47076 }
47077 if (x != const0_rtx)
47078 var = expand_simple_binop (HImode, IOR, var, x, var,
47079 1, OPTAB_LIB_WIDEN);
47080
47081 x = gen_reg_rtx (wmode);
47082 emit_move_insn (x, gen_lowpart (wmode, const_vec));
47083 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
47084
47085 emit_move_insn (target, gen_lowpart (mode, x));
47086 return true;
47087
47088 default:
47089 return false;
47090 }
47091
47092 emit_move_insn (target, const_vec);
47093 ix86_expand_vector_set (mmx_ok, target, var, one_var);
47094 return true;
47095 }
47096
47097 /* A subroutine of ix86_expand_vector_init_general. Use vector
47098 concatenate to handle the most general case: all values variable,
47099 and none identical. */
47100
47101 static void
47102 ix86_expand_vector_init_concat (machine_mode mode,
47103 rtx target, rtx *ops, int n)
47104 {
47105 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
47106 rtx first[16], second[8], third[4];
47107 rtvec v;
47108 int i, j;
47109
47110 switch (n)
47111 {
47112 case 2:
47113 switch (mode)
47114 {
47115 case V16SImode:
47116 cmode = V8SImode;
47117 break;
47118 case V16SFmode:
47119 cmode = V8SFmode;
47120 break;
47121 case V8DImode:
47122 cmode = V4DImode;
47123 break;
47124 case V8DFmode:
47125 cmode = V4DFmode;
47126 break;
47127 case V8SImode:
47128 cmode = V4SImode;
47129 break;
47130 case V8SFmode:
47131 cmode = V4SFmode;
47132 break;
47133 case V4DImode:
47134 cmode = V2DImode;
47135 break;
47136 case V4DFmode:
47137 cmode = V2DFmode;
47138 break;
47139 case V4SImode:
47140 cmode = V2SImode;
47141 break;
47142 case V4SFmode:
47143 cmode = V2SFmode;
47144 break;
47145 case V2DImode:
47146 cmode = DImode;
47147 break;
47148 case V2SImode:
47149 cmode = SImode;
47150 break;
47151 case V2DFmode:
47152 cmode = DFmode;
47153 break;
47154 case V2SFmode:
47155 cmode = SFmode;
47156 break;
47157 default:
47158 gcc_unreachable ();
47159 }
47160
47161 if (!register_operand (ops[1], cmode))
47162 ops[1] = force_reg (cmode, ops[1]);
47163 if (!register_operand (ops[0], cmode))
47164 ops[0] = force_reg (cmode, ops[0]);
47165 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
47166 ops[1])));
47167 break;
47168
47169 case 4:
47170 switch (mode)
47171 {
47172 case V4DImode:
47173 cmode = V2DImode;
47174 break;
47175 case V4DFmode:
47176 cmode = V2DFmode;
47177 break;
47178 case V4SImode:
47179 cmode = V2SImode;
47180 break;
47181 case V4SFmode:
47182 cmode = V2SFmode;
47183 break;
47184 default:
47185 gcc_unreachable ();
47186 }
47187 goto half;
47188
47189 case 8:
47190 switch (mode)
47191 {
47192 case V8DImode:
47193 cmode = V2DImode;
47194 hmode = V4DImode;
47195 break;
47196 case V8DFmode:
47197 cmode = V2DFmode;
47198 hmode = V4DFmode;
47199 break;
47200 case V8SImode:
47201 cmode = V2SImode;
47202 hmode = V4SImode;
47203 break;
47204 case V8SFmode:
47205 cmode = V2SFmode;
47206 hmode = V4SFmode;
47207 break;
47208 default:
47209 gcc_unreachable ();
47210 }
47211 goto half;
47212
47213 case 16:
47214 switch (mode)
47215 {
47216 case V16SImode:
47217 cmode = V2SImode;
47218 hmode = V4SImode;
47219 gmode = V8SImode;
47220 break;
47221 case V16SFmode:
47222 cmode = V2SFmode;
47223 hmode = V4SFmode;
47224 gmode = V8SFmode;
47225 break;
47226 default:
47227 gcc_unreachable ();
47228 }
47229 goto half;
47230
47231 half:
47232 /* FIXME: We process inputs backward to help RA. PR 36222. */
47233 i = n - 1;
47234 j = (n >> 1) - 1;
47235 for (; i > 0; i -= 2, j--)
47236 {
47237 first[j] = gen_reg_rtx (cmode);
47238 v = gen_rtvec (2, ops[i - 1], ops[i]);
47239 ix86_expand_vector_init (false, first[j],
47240 gen_rtx_PARALLEL (cmode, v));
47241 }
47242
47243 n >>= 1;
47244 if (n > 4)
47245 {
47246 gcc_assert (hmode != VOIDmode);
47247 gcc_assert (gmode != VOIDmode);
47248 for (i = j = 0; i < n; i += 2, j++)
47249 {
47250 second[j] = gen_reg_rtx (hmode);
47251 ix86_expand_vector_init_concat (hmode, second [j],
47252 &first [i], 2);
47253 }
47254 n >>= 1;
47255 for (i = j = 0; i < n; i += 2, j++)
47256 {
47257 third[j] = gen_reg_rtx (gmode);
47258 ix86_expand_vector_init_concat (gmode, third[j],
47259 &second[i], 2);
47260 }
47261 n >>= 1;
47262 ix86_expand_vector_init_concat (mode, target, third, n);
47263 }
47264 else if (n > 2)
47265 {
47266 gcc_assert (hmode != VOIDmode);
47267 for (i = j = 0; i < n; i += 2, j++)
47268 {
47269 second[j] = gen_reg_rtx (hmode);
47270 ix86_expand_vector_init_concat (hmode, second [j],
47271 &first [i], 2);
47272 }
47273 n >>= 1;
47274 ix86_expand_vector_init_concat (mode, target, second, n);
47275 }
47276 else
47277 ix86_expand_vector_init_concat (mode, target, first, n);
47278 break;
47279
47280 default:
47281 gcc_unreachable ();
47282 }
47283 }
47284
47285 /* A subroutine of ix86_expand_vector_init_general. Use vector
47286 interleave to handle the most general case: all values variable,
47287 and none identical. */
47288
47289 static void
47290 ix86_expand_vector_init_interleave (machine_mode mode,
47291 rtx target, rtx *ops, int n)
47292 {
47293 machine_mode first_imode, second_imode, third_imode, inner_mode;
47294 int i, j;
47295 rtx op0, op1;
47296 rtx (*gen_load_even) (rtx, rtx, rtx);
47297 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
47298 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
47299
47300 switch (mode)
47301 {
47302 case V8HImode:
47303 gen_load_even = gen_vec_setv8hi;
47304 gen_interleave_first_low = gen_vec_interleave_lowv4si;
47305 gen_interleave_second_low = gen_vec_interleave_lowv2di;
47306 inner_mode = HImode;
47307 first_imode = V4SImode;
47308 second_imode = V2DImode;
47309 third_imode = VOIDmode;
47310 break;
47311 case V16QImode:
47312 gen_load_even = gen_vec_setv16qi;
47313 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
47314 gen_interleave_second_low = gen_vec_interleave_lowv4si;
47315 inner_mode = QImode;
47316 first_imode = V8HImode;
47317 second_imode = V4SImode;
47318 third_imode = V2DImode;
47319 break;
47320 default:
47321 gcc_unreachable ();
47322 }
47323
47324 for (i = 0; i < n; i++)
47325 {
47326 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
47327 op0 = gen_reg_rtx (SImode);
47328 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
47329
47330 /* Insert the SImode value as low element of V4SImode vector. */
47331 op1 = gen_reg_rtx (V4SImode);
47332 op0 = gen_rtx_VEC_MERGE (V4SImode,
47333 gen_rtx_VEC_DUPLICATE (V4SImode,
47334 op0),
47335 CONST0_RTX (V4SImode),
47336 const1_rtx);
47337 emit_insn (gen_rtx_SET (op1, op0));
47338
47339 /* Cast the V4SImode vector back to a vector in orignal mode. */
47340 op0 = gen_reg_rtx (mode);
47341 emit_move_insn (op0, gen_lowpart (mode, op1));
47342
47343 /* Load even elements into the second position. */
47344 emit_insn (gen_load_even (op0,
47345 force_reg (inner_mode,
47346 ops [i + i + 1]),
47347 const1_rtx));
47348
47349 /* Cast vector to FIRST_IMODE vector. */
47350 ops[i] = gen_reg_rtx (first_imode);
47351 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
47352 }
47353
47354 /* Interleave low FIRST_IMODE vectors. */
47355 for (i = j = 0; i < n; i += 2, j++)
47356 {
47357 op0 = gen_reg_rtx (first_imode);
47358 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
47359
47360 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
47361 ops[j] = gen_reg_rtx (second_imode);
47362 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
47363 }
47364
47365 /* Interleave low SECOND_IMODE vectors. */
47366 switch (second_imode)
47367 {
47368 case V4SImode:
47369 for (i = j = 0; i < n / 2; i += 2, j++)
47370 {
47371 op0 = gen_reg_rtx (second_imode);
47372 emit_insn (gen_interleave_second_low (op0, ops[i],
47373 ops[i + 1]));
47374
47375 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
47376 vector. */
47377 ops[j] = gen_reg_rtx (third_imode);
47378 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
47379 }
47380 second_imode = V2DImode;
47381 gen_interleave_second_low = gen_vec_interleave_lowv2di;
47382 /* FALLTHRU */
47383
47384 case V2DImode:
47385 op0 = gen_reg_rtx (second_imode);
47386 emit_insn (gen_interleave_second_low (op0, ops[0],
47387 ops[1]));
47388
47389 /* Cast the SECOND_IMODE vector back to a vector on original
47390 mode. */
47391 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
47392 break;
47393
47394 default:
47395 gcc_unreachable ();
47396 }
47397 }
47398
47399 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
47400 all values variable, and none identical. */
47401
47402 static void
47403 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
47404 rtx target, rtx vals)
47405 {
47406 rtx ops[64], op0, op1, op2, op3, op4, op5;
47407 machine_mode half_mode = VOIDmode;
47408 machine_mode quarter_mode = VOIDmode;
47409 int n, i;
47410
47411 switch (mode)
47412 {
47413 case V2SFmode:
47414 case V2SImode:
47415 if (!mmx_ok && !TARGET_SSE)
47416 break;
47417 /* FALLTHRU */
47418
47419 case V16SImode:
47420 case V16SFmode:
47421 case V8DFmode:
47422 case V8DImode:
47423 case V8SFmode:
47424 case V8SImode:
47425 case V4DFmode:
47426 case V4DImode:
47427 case V4SFmode:
47428 case V4SImode:
47429 case V2DFmode:
47430 case V2DImode:
47431 n = GET_MODE_NUNITS (mode);
47432 for (i = 0; i < n; i++)
47433 ops[i] = XVECEXP (vals, 0, i);
47434 ix86_expand_vector_init_concat (mode, target, ops, n);
47435 return;
47436
47437 case V32QImode:
47438 half_mode = V16QImode;
47439 goto half;
47440
47441 case V16HImode:
47442 half_mode = V8HImode;
47443 goto half;
47444
47445 half:
47446 n = GET_MODE_NUNITS (mode);
47447 for (i = 0; i < n; i++)
47448 ops[i] = XVECEXP (vals, 0, i);
47449 op0 = gen_reg_rtx (half_mode);
47450 op1 = gen_reg_rtx (half_mode);
47451 ix86_expand_vector_init_interleave (half_mode, op0, ops,
47452 n >> 2);
47453 ix86_expand_vector_init_interleave (half_mode, op1,
47454 &ops [n >> 1], n >> 2);
47455 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
47456 return;
47457
47458 case V64QImode:
47459 quarter_mode = V16QImode;
47460 half_mode = V32QImode;
47461 goto quarter;
47462
47463 case V32HImode:
47464 quarter_mode = V8HImode;
47465 half_mode = V16HImode;
47466 goto quarter;
47467
47468 quarter:
47469 n = GET_MODE_NUNITS (mode);
47470 for (i = 0; i < n; i++)
47471 ops[i] = XVECEXP (vals, 0, i);
47472 op0 = gen_reg_rtx (quarter_mode);
47473 op1 = gen_reg_rtx (quarter_mode);
47474 op2 = gen_reg_rtx (quarter_mode);
47475 op3 = gen_reg_rtx (quarter_mode);
47476 op4 = gen_reg_rtx (half_mode);
47477 op5 = gen_reg_rtx (half_mode);
47478 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
47479 n >> 3);
47480 ix86_expand_vector_init_interleave (quarter_mode, op1,
47481 &ops [n >> 2], n >> 3);
47482 ix86_expand_vector_init_interleave (quarter_mode, op2,
47483 &ops [n >> 1], n >> 3);
47484 ix86_expand_vector_init_interleave (quarter_mode, op3,
47485 &ops [(n >> 1) | (n >> 2)], n >> 3);
47486 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
47487 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
47488 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
47489 return;
47490
47491 case V16QImode:
47492 if (!TARGET_SSE4_1)
47493 break;
47494 /* FALLTHRU */
47495
47496 case V8HImode:
47497 if (!TARGET_SSE2)
47498 break;
47499
47500 /* Don't use ix86_expand_vector_init_interleave if we can't
47501 move from GPR to SSE register directly. */
47502 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
47503 break;
47504
47505 n = GET_MODE_NUNITS (mode);
47506 for (i = 0; i < n; i++)
47507 ops[i] = XVECEXP (vals, 0, i);
47508 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
47509 return;
47510
47511 case V4HImode:
47512 case V8QImode:
47513 break;
47514
47515 default:
47516 gcc_unreachable ();
47517 }
47518
47519 {
47520 int i, j, n_elts, n_words, n_elt_per_word;
47521 machine_mode inner_mode;
47522 rtx words[4], shift;
47523
47524 inner_mode = GET_MODE_INNER (mode);
47525 n_elts = GET_MODE_NUNITS (mode);
47526 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47527 n_elt_per_word = n_elts / n_words;
47528 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
47529
47530 for (i = 0; i < n_words; ++i)
47531 {
47532 rtx word = NULL_RTX;
47533
47534 for (j = 0; j < n_elt_per_word; ++j)
47535 {
47536 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
47537 elt = convert_modes (word_mode, inner_mode, elt, true);
47538
47539 if (j == 0)
47540 word = elt;
47541 else
47542 {
47543 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
47544 word, 1, OPTAB_LIB_WIDEN);
47545 word = expand_simple_binop (word_mode, IOR, word, elt,
47546 word, 1, OPTAB_LIB_WIDEN);
47547 }
47548 }
47549
47550 words[i] = word;
47551 }
47552
47553 if (n_words == 1)
47554 emit_move_insn (target, gen_lowpart (mode, words[0]));
47555 else if (n_words == 2)
47556 {
47557 rtx tmp = gen_reg_rtx (mode);
47558 emit_clobber (tmp);
47559 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
47560 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
47561 emit_move_insn (target, tmp);
47562 }
47563 else if (n_words == 4)
47564 {
47565 rtx tmp = gen_reg_rtx (V4SImode);
47566 gcc_assert (word_mode == SImode);
47567 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
47568 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
47569 emit_move_insn (target, gen_lowpart (mode, tmp));
47570 }
47571 else
47572 gcc_unreachable ();
47573 }
47574 }
47575
47576 /* Initialize vector TARGET via VALS. Suppress the use of MMX
47577 instructions unless MMX_OK is true. */
47578
47579 void
47580 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
47581 {
47582 machine_mode mode = GET_MODE (target);
47583 machine_mode inner_mode = GET_MODE_INNER (mode);
47584 int n_elts = GET_MODE_NUNITS (mode);
47585 int n_var = 0, one_var = -1;
47586 bool all_same = true, all_const_zero = true;
47587 int i;
47588 rtx x;
47589
47590 for (i = 0; i < n_elts; ++i)
47591 {
47592 x = XVECEXP (vals, 0, i);
47593 if (!(CONST_SCALAR_INT_P (x)
47594 || CONST_DOUBLE_P (x)
47595 || CONST_FIXED_P (x)))
47596 n_var++, one_var = i;
47597 else if (x != CONST0_RTX (inner_mode))
47598 all_const_zero = false;
47599 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
47600 all_same = false;
47601 }
47602
47603 /* Constants are best loaded from the constant pool. */
47604 if (n_var == 0)
47605 {
47606 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
47607 return;
47608 }
47609
47610 /* If all values are identical, broadcast the value. */
47611 if (all_same
47612 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
47613 XVECEXP (vals, 0, 0)))
47614 return;
47615
47616 /* Values where only one field is non-constant are best loaded from
47617 the pool and overwritten via move later. */
47618 if (n_var == 1)
47619 {
47620 if (all_const_zero
47621 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
47622 XVECEXP (vals, 0, one_var),
47623 one_var))
47624 return;
47625
47626 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
47627 return;
47628 }
47629
47630 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
47631 }
47632
47633 void
47634 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
47635 {
47636 machine_mode mode = GET_MODE (target);
47637 machine_mode inner_mode = GET_MODE_INNER (mode);
47638 machine_mode half_mode;
47639 bool use_vec_merge = false;
47640 rtx tmp;
47641 static rtx (*gen_extract[6][2]) (rtx, rtx)
47642 = {
47643 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
47644 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
47645 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
47646 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
47647 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
47648 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
47649 };
47650 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
47651 = {
47652 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
47653 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
47654 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
47655 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
47656 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
47657 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
47658 };
47659 int i, j, n;
47660 machine_mode mmode = VOIDmode;
47661 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
47662
47663 switch (mode)
47664 {
47665 case V2SFmode:
47666 case V2SImode:
47667 if (mmx_ok)
47668 {
47669 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
47670 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
47671 if (elt == 0)
47672 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
47673 else
47674 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
47675 emit_insn (gen_rtx_SET (target, tmp));
47676 return;
47677 }
47678 break;
47679
47680 case V2DImode:
47681 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
47682 if (use_vec_merge)
47683 break;
47684
47685 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
47686 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
47687 if (elt == 0)
47688 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
47689 else
47690 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
47691 emit_insn (gen_rtx_SET (target, tmp));
47692 return;
47693
47694 case V2DFmode:
47695 {
47696 rtx op0, op1;
47697
47698 /* For the two element vectors, we implement a VEC_CONCAT with
47699 the extraction of the other element. */
47700
47701 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
47702 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
47703
47704 if (elt == 0)
47705 op0 = val, op1 = tmp;
47706 else
47707 op0 = tmp, op1 = val;
47708
47709 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
47710 emit_insn (gen_rtx_SET (target, tmp));
47711 }
47712 return;
47713
47714 case V4SFmode:
47715 use_vec_merge = TARGET_SSE4_1;
47716 if (use_vec_merge)
47717 break;
47718
47719 switch (elt)
47720 {
47721 case 0:
47722 use_vec_merge = true;
47723 break;
47724
47725 case 1:
47726 /* tmp = target = A B C D */
47727 tmp = copy_to_reg (target);
47728 /* target = A A B B */
47729 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
47730 /* target = X A B B */
47731 ix86_expand_vector_set (false, target, val, 0);
47732 /* target = A X C D */
47733 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
47734 const1_rtx, const0_rtx,
47735 GEN_INT (2+4), GEN_INT (3+4)));
47736 return;
47737
47738 case 2:
47739 /* tmp = target = A B C D */
47740 tmp = copy_to_reg (target);
47741 /* tmp = X B C D */
47742 ix86_expand_vector_set (false, tmp, val, 0);
47743 /* target = A B X D */
47744 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
47745 const0_rtx, const1_rtx,
47746 GEN_INT (0+4), GEN_INT (3+4)));
47747 return;
47748
47749 case 3:
47750 /* tmp = target = A B C D */
47751 tmp = copy_to_reg (target);
47752 /* tmp = X B C D */
47753 ix86_expand_vector_set (false, tmp, val, 0);
47754 /* target = A B X D */
47755 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
47756 const0_rtx, const1_rtx,
47757 GEN_INT (2+4), GEN_INT (0+4)));
47758 return;
47759
47760 default:
47761 gcc_unreachable ();
47762 }
47763 break;
47764
47765 case V4SImode:
47766 use_vec_merge = TARGET_SSE4_1;
47767 if (use_vec_merge)
47768 break;
47769
47770 /* Element 0 handled by vec_merge below. */
47771 if (elt == 0)
47772 {
47773 use_vec_merge = true;
47774 break;
47775 }
47776
47777 if (TARGET_SSE2)
47778 {
47779 /* With SSE2, use integer shuffles to swap element 0 and ELT,
47780 store into element 0, then shuffle them back. */
47781
47782 rtx order[4];
47783
47784 order[0] = GEN_INT (elt);
47785 order[1] = const1_rtx;
47786 order[2] = const2_rtx;
47787 order[3] = GEN_INT (3);
47788 order[elt] = const0_rtx;
47789
47790 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
47791 order[1], order[2], order[3]));
47792
47793 ix86_expand_vector_set (false, target, val, 0);
47794
47795 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
47796 order[1], order[2], order[3]));
47797 }
47798 else
47799 {
47800 /* For SSE1, we have to reuse the V4SF code. */
47801 rtx t = gen_reg_rtx (V4SFmode);
47802 emit_move_insn (t, gen_lowpart (V4SFmode, target));
47803 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
47804 emit_move_insn (target, gen_lowpart (mode, t));
47805 }
47806 return;
47807
47808 case V8HImode:
47809 use_vec_merge = TARGET_SSE2;
47810 break;
47811 case V4HImode:
47812 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47813 break;
47814
47815 case V16QImode:
47816 use_vec_merge = TARGET_SSE4_1;
47817 break;
47818
47819 case V8QImode:
47820 break;
47821
47822 case V32QImode:
47823 half_mode = V16QImode;
47824 j = 0;
47825 n = 16;
47826 goto half;
47827
47828 case V16HImode:
47829 half_mode = V8HImode;
47830 j = 1;
47831 n = 8;
47832 goto half;
47833
47834 case V8SImode:
47835 half_mode = V4SImode;
47836 j = 2;
47837 n = 4;
47838 goto half;
47839
47840 case V4DImode:
47841 half_mode = V2DImode;
47842 j = 3;
47843 n = 2;
47844 goto half;
47845
47846 case V8SFmode:
47847 half_mode = V4SFmode;
47848 j = 4;
47849 n = 4;
47850 goto half;
47851
47852 case V4DFmode:
47853 half_mode = V2DFmode;
47854 j = 5;
47855 n = 2;
47856 goto half;
47857
47858 half:
47859 /* Compute offset. */
47860 i = elt / n;
47861 elt %= n;
47862
47863 gcc_assert (i <= 1);
47864
47865 /* Extract the half. */
47866 tmp = gen_reg_rtx (half_mode);
47867 emit_insn (gen_extract[j][i] (tmp, target));
47868
47869 /* Put val in tmp at elt. */
47870 ix86_expand_vector_set (false, tmp, val, elt);
47871
47872 /* Put it back. */
47873 emit_insn (gen_insert[j][i] (target, target, tmp));
47874 return;
47875
47876 case V8DFmode:
47877 if (TARGET_AVX512F)
47878 {
47879 mmode = QImode;
47880 gen_blendm = gen_avx512f_blendmv8df;
47881 }
47882 break;
47883
47884 case V8DImode:
47885 if (TARGET_AVX512F)
47886 {
47887 mmode = QImode;
47888 gen_blendm = gen_avx512f_blendmv8di;
47889 }
47890 break;
47891
47892 case V16SFmode:
47893 if (TARGET_AVX512F)
47894 {
47895 mmode = HImode;
47896 gen_blendm = gen_avx512f_blendmv16sf;
47897 }
47898 break;
47899
47900 case V16SImode:
47901 if (TARGET_AVX512F)
47902 {
47903 mmode = HImode;
47904 gen_blendm = gen_avx512f_blendmv16si;
47905 }
47906 break;
47907
47908 case V32HImode:
47909 if (TARGET_AVX512F && TARGET_AVX512BW)
47910 {
47911 mmode = SImode;
47912 gen_blendm = gen_avx512bw_blendmv32hi;
47913 }
47914 break;
47915
47916 case V64QImode:
47917 if (TARGET_AVX512F && TARGET_AVX512BW)
47918 {
47919 mmode = DImode;
47920 gen_blendm = gen_avx512bw_blendmv64qi;
47921 }
47922 break;
47923
47924 default:
47925 break;
47926 }
47927
47928 if (mmode != VOIDmode)
47929 {
47930 tmp = gen_reg_rtx (mode);
47931 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
47932 /* The avx512*_blendm<mode> expanders have different operand order
47933 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
47934 elements where the mask is set and second input operand otherwise,
47935 in {sse,avx}*_*blend* the first input operand is used for elements
47936 where the mask is clear and second input operand otherwise. */
47937 emit_insn (gen_blendm (target, target, tmp,
47938 force_reg (mmode,
47939 gen_int_mode (1 << elt, mmode))));
47940 }
47941 else if (use_vec_merge)
47942 {
47943 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
47944 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
47945 emit_insn (gen_rtx_SET (target, tmp));
47946 }
47947 else
47948 {
47949 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47950
47951 emit_move_insn (mem, target);
47952
47953 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47954 emit_move_insn (tmp, val);
47955
47956 emit_move_insn (target, mem);
47957 }
47958 }
47959
47960 void
47961 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
47962 {
47963 machine_mode mode = GET_MODE (vec);
47964 machine_mode inner_mode = GET_MODE_INNER (mode);
47965 bool use_vec_extr = false;
47966 rtx tmp;
47967
47968 switch (mode)
47969 {
47970 case V2SImode:
47971 case V2SFmode:
47972 if (!mmx_ok)
47973 break;
47974 /* FALLTHRU */
47975
47976 case V2DFmode:
47977 case V2DImode:
47978 use_vec_extr = true;
47979 break;
47980
47981 case V4SFmode:
47982 use_vec_extr = TARGET_SSE4_1;
47983 if (use_vec_extr)
47984 break;
47985
47986 switch (elt)
47987 {
47988 case 0:
47989 tmp = vec;
47990 break;
47991
47992 case 1:
47993 case 3:
47994 tmp = gen_reg_rtx (mode);
47995 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
47996 GEN_INT (elt), GEN_INT (elt),
47997 GEN_INT (elt+4), GEN_INT (elt+4)));
47998 break;
47999
48000 case 2:
48001 tmp = gen_reg_rtx (mode);
48002 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
48003 break;
48004
48005 default:
48006 gcc_unreachable ();
48007 }
48008 vec = tmp;
48009 use_vec_extr = true;
48010 elt = 0;
48011 break;
48012
48013 case V4SImode:
48014 use_vec_extr = TARGET_SSE4_1;
48015 if (use_vec_extr)
48016 break;
48017
48018 if (TARGET_SSE2)
48019 {
48020 switch (elt)
48021 {
48022 case 0:
48023 tmp = vec;
48024 break;
48025
48026 case 1:
48027 case 3:
48028 tmp = gen_reg_rtx (mode);
48029 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
48030 GEN_INT (elt), GEN_INT (elt),
48031 GEN_INT (elt), GEN_INT (elt)));
48032 break;
48033
48034 case 2:
48035 tmp = gen_reg_rtx (mode);
48036 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
48037 break;
48038
48039 default:
48040 gcc_unreachable ();
48041 }
48042 vec = tmp;
48043 use_vec_extr = true;
48044 elt = 0;
48045 }
48046 else
48047 {
48048 /* For SSE1, we have to reuse the V4SF code. */
48049 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
48050 gen_lowpart (V4SFmode, vec), elt);
48051 return;
48052 }
48053 break;
48054
48055 case V8HImode:
48056 use_vec_extr = TARGET_SSE2;
48057 break;
48058 case V4HImode:
48059 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
48060 break;
48061
48062 case V16QImode:
48063 use_vec_extr = TARGET_SSE4_1;
48064 break;
48065
48066 case V8SFmode:
48067 if (TARGET_AVX)
48068 {
48069 tmp = gen_reg_rtx (V4SFmode);
48070 if (elt < 4)
48071 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
48072 else
48073 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
48074 ix86_expand_vector_extract (false, target, tmp, elt & 3);
48075 return;
48076 }
48077 break;
48078
48079 case V4DFmode:
48080 if (TARGET_AVX)
48081 {
48082 tmp = gen_reg_rtx (V2DFmode);
48083 if (elt < 2)
48084 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
48085 else
48086 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
48087 ix86_expand_vector_extract (false, target, tmp, elt & 1);
48088 return;
48089 }
48090 break;
48091
48092 case V32QImode:
48093 if (TARGET_AVX)
48094 {
48095 tmp = gen_reg_rtx (V16QImode);
48096 if (elt < 16)
48097 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
48098 else
48099 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
48100 ix86_expand_vector_extract (false, target, tmp, elt & 15);
48101 return;
48102 }
48103 break;
48104
48105 case V16HImode:
48106 if (TARGET_AVX)
48107 {
48108 tmp = gen_reg_rtx (V8HImode);
48109 if (elt < 8)
48110 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
48111 else
48112 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
48113 ix86_expand_vector_extract (false, target, tmp, elt & 7);
48114 return;
48115 }
48116 break;
48117
48118 case V8SImode:
48119 if (TARGET_AVX)
48120 {
48121 tmp = gen_reg_rtx (V4SImode);
48122 if (elt < 4)
48123 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
48124 else
48125 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
48126 ix86_expand_vector_extract (false, target, tmp, elt & 3);
48127 return;
48128 }
48129 break;
48130
48131 case V4DImode:
48132 if (TARGET_AVX)
48133 {
48134 tmp = gen_reg_rtx (V2DImode);
48135 if (elt < 2)
48136 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
48137 else
48138 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
48139 ix86_expand_vector_extract (false, target, tmp, elt & 1);
48140 return;
48141 }
48142 break;
48143
48144 case V32HImode:
48145 if (TARGET_AVX512BW)
48146 {
48147 tmp = gen_reg_rtx (V16HImode);
48148 if (elt < 16)
48149 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
48150 else
48151 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
48152 ix86_expand_vector_extract (false, target, tmp, elt & 15);
48153 return;
48154 }
48155 break;
48156
48157 case V64QImode:
48158 if (TARGET_AVX512BW)
48159 {
48160 tmp = gen_reg_rtx (V32QImode);
48161 if (elt < 32)
48162 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
48163 else
48164 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
48165 ix86_expand_vector_extract (false, target, tmp, elt & 31);
48166 return;
48167 }
48168 break;
48169
48170 case V16SFmode:
48171 tmp = gen_reg_rtx (V8SFmode);
48172 if (elt < 8)
48173 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
48174 else
48175 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
48176 ix86_expand_vector_extract (false, target, tmp, elt & 7);
48177 return;
48178
48179 case V8DFmode:
48180 tmp = gen_reg_rtx (V4DFmode);
48181 if (elt < 4)
48182 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
48183 else
48184 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
48185 ix86_expand_vector_extract (false, target, tmp, elt & 3);
48186 return;
48187
48188 case V16SImode:
48189 tmp = gen_reg_rtx (V8SImode);
48190 if (elt < 8)
48191 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
48192 else
48193 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
48194 ix86_expand_vector_extract (false, target, tmp, elt & 7);
48195 return;
48196
48197 case V8DImode:
48198 tmp = gen_reg_rtx (V4DImode);
48199 if (elt < 4)
48200 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
48201 else
48202 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
48203 ix86_expand_vector_extract (false, target, tmp, elt & 3);
48204 return;
48205
48206 case V8QImode:
48207 /* ??? Could extract the appropriate HImode element and shift. */
48208 default:
48209 break;
48210 }
48211
48212 if (use_vec_extr)
48213 {
48214 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
48215 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
48216
48217 /* Let the rtl optimizers know about the zero extension performed. */
48218 if (inner_mode == QImode || inner_mode == HImode)
48219 {
48220 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
48221 target = gen_lowpart (SImode, target);
48222 }
48223
48224 emit_insn (gen_rtx_SET (target, tmp));
48225 }
48226 else
48227 {
48228 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
48229
48230 emit_move_insn (mem, vec);
48231
48232 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
48233 emit_move_insn (target, tmp);
48234 }
48235 }
48236
48237 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
48238 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
48239 The upper bits of DEST are undefined, though they shouldn't cause
48240 exceptions (some bits from src or all zeros are ok). */
48241
48242 static void
48243 emit_reduc_half (rtx dest, rtx src, int i)
48244 {
48245 rtx tem, d = dest;
48246 switch (GET_MODE (src))
48247 {
48248 case V4SFmode:
48249 if (i == 128)
48250 tem = gen_sse_movhlps (dest, src, src);
48251 else
48252 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
48253 GEN_INT (1 + 4), GEN_INT (1 + 4));
48254 break;
48255 case V2DFmode:
48256 tem = gen_vec_interleave_highv2df (dest, src, src);
48257 break;
48258 case V16QImode:
48259 case V8HImode:
48260 case V4SImode:
48261 case V2DImode:
48262 d = gen_reg_rtx (V1TImode);
48263 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
48264 GEN_INT (i / 2));
48265 break;
48266 case V8SFmode:
48267 if (i == 256)
48268 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
48269 else
48270 tem = gen_avx_shufps256 (dest, src, src,
48271 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
48272 break;
48273 case V4DFmode:
48274 if (i == 256)
48275 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
48276 else
48277 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
48278 break;
48279 case V32QImode:
48280 case V16HImode:
48281 case V8SImode:
48282 case V4DImode:
48283 if (i == 256)
48284 {
48285 if (GET_MODE (dest) != V4DImode)
48286 d = gen_reg_rtx (V4DImode);
48287 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
48288 gen_lowpart (V4DImode, src),
48289 const1_rtx);
48290 }
48291 else
48292 {
48293 d = gen_reg_rtx (V2TImode);
48294 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
48295 GEN_INT (i / 2));
48296 }
48297 break;
48298 case V64QImode:
48299 case V32HImode:
48300 case V16SImode:
48301 case V16SFmode:
48302 case V8DImode:
48303 case V8DFmode:
48304 if (i > 128)
48305 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
48306 gen_lowpart (V16SImode, src),
48307 gen_lowpart (V16SImode, src),
48308 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
48309 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
48310 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
48311 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
48312 GEN_INT (0xC), GEN_INT (0xD),
48313 GEN_INT (0xE), GEN_INT (0xF),
48314 GEN_INT (0x10), GEN_INT (0x11),
48315 GEN_INT (0x12), GEN_INT (0x13),
48316 GEN_INT (0x14), GEN_INT (0x15),
48317 GEN_INT (0x16), GEN_INT (0x17));
48318 else
48319 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
48320 gen_lowpart (V16SImode, src),
48321 GEN_INT (i == 128 ? 0x2 : 0x1),
48322 GEN_INT (0x3),
48323 GEN_INT (0x3),
48324 GEN_INT (0x3),
48325 GEN_INT (i == 128 ? 0x6 : 0x5),
48326 GEN_INT (0x7),
48327 GEN_INT (0x7),
48328 GEN_INT (0x7),
48329 GEN_INT (i == 128 ? 0xA : 0x9),
48330 GEN_INT (0xB),
48331 GEN_INT (0xB),
48332 GEN_INT (0xB),
48333 GEN_INT (i == 128 ? 0xE : 0xD),
48334 GEN_INT (0xF),
48335 GEN_INT (0xF),
48336 GEN_INT (0xF));
48337 break;
48338 default:
48339 gcc_unreachable ();
48340 }
48341 emit_insn (tem);
48342 if (d != dest)
48343 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
48344 }
48345
48346 /* Expand a vector reduction. FN is the binary pattern to reduce;
48347 DEST is the destination; IN is the input vector. */
48348
48349 void
48350 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
48351 {
48352 rtx half, dst, vec = in;
48353 machine_mode mode = GET_MODE (in);
48354 int i;
48355
48356 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
48357 if (TARGET_SSE4_1
48358 && mode == V8HImode
48359 && fn == gen_uminv8hi3)
48360 {
48361 emit_insn (gen_sse4_1_phminposuw (dest, in));
48362 return;
48363 }
48364
48365 for (i = GET_MODE_BITSIZE (mode);
48366 i > GET_MODE_UNIT_BITSIZE (mode);
48367 i >>= 1)
48368 {
48369 half = gen_reg_rtx (mode);
48370 emit_reduc_half (half, vec, i);
48371 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
48372 dst = dest;
48373 else
48374 dst = gen_reg_rtx (mode);
48375 emit_insn (fn (dst, half, vec));
48376 vec = dst;
48377 }
48378 }
48379 \f
48380 /* Target hook for scalar_mode_supported_p. */
48381 static bool
48382 ix86_scalar_mode_supported_p (machine_mode mode)
48383 {
48384 if (DECIMAL_FLOAT_MODE_P (mode))
48385 return default_decimal_float_supported_p ();
48386 else if (mode == TFmode)
48387 return true;
48388 else
48389 return default_scalar_mode_supported_p (mode);
48390 }
48391
48392 /* Implements target hook vector_mode_supported_p. */
48393 static bool
48394 ix86_vector_mode_supported_p (machine_mode mode)
48395 {
48396 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
48397 return true;
48398 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
48399 return true;
48400 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
48401 return true;
48402 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
48403 return true;
48404 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
48405 return true;
48406 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
48407 return true;
48408 return false;
48409 }
48410
48411 /* Implement target hook libgcc_floating_mode_supported_p. */
48412 static bool
48413 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
48414 {
48415 switch (mode)
48416 {
48417 case SFmode:
48418 case DFmode:
48419 case XFmode:
48420 return true;
48421
48422 case TFmode:
48423 #ifdef IX86_NO_LIBGCC_TFMODE
48424 return false;
48425 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
48426 return TARGET_LONG_DOUBLE_128;
48427 #else
48428 return true;
48429 #endif
48430
48431 default:
48432 return false;
48433 }
48434 }
48435
48436 /* Target hook for c_mode_for_suffix. */
48437 static machine_mode
48438 ix86_c_mode_for_suffix (char suffix)
48439 {
48440 if (suffix == 'q')
48441 return TFmode;
48442 if (suffix == 'w')
48443 return XFmode;
48444
48445 return VOIDmode;
48446 }
48447
48448 /* Worker function for TARGET_MD_ASM_ADJUST.
48449
48450 We implement asm flag outputs, and maintain source compatibility
48451 with the old cc0-based compiler. */
48452
48453 static rtx_insn *
48454 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
48455 vec<const char *> &constraints,
48456 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
48457 {
48458 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
48459 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
48460
48461 bool saw_asm_flag = false;
48462
48463 start_sequence ();
48464 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
48465 {
48466 const char *con = constraints[i];
48467 if (strncmp (con, "=@cc", 4) != 0)
48468 continue;
48469 con += 4;
48470 if (strchr (con, ',') != NULL)
48471 {
48472 error ("alternatives not allowed in asm flag output");
48473 continue;
48474 }
48475
48476 bool invert = false;
48477 if (con[0] == 'n')
48478 invert = true, con++;
48479
48480 machine_mode mode = CCmode;
48481 rtx_code code = UNKNOWN;
48482
48483 switch (con[0])
48484 {
48485 case 'a':
48486 if (con[1] == 0)
48487 mode = CCAmode, code = EQ;
48488 else if (con[1] == 'e' && con[2] == 0)
48489 mode = CCCmode, code = NE;
48490 break;
48491 case 'b':
48492 if (con[1] == 0)
48493 mode = CCCmode, code = EQ;
48494 else if (con[1] == 'e' && con[2] == 0)
48495 mode = CCAmode, code = NE;
48496 break;
48497 case 'c':
48498 if (con[1] == 0)
48499 mode = CCCmode, code = EQ;
48500 break;
48501 case 'e':
48502 if (con[1] == 0)
48503 mode = CCZmode, code = EQ;
48504 break;
48505 case 'g':
48506 if (con[1] == 0)
48507 mode = CCGCmode, code = GT;
48508 else if (con[1] == 'e' && con[2] == 0)
48509 mode = CCGCmode, code = GE;
48510 break;
48511 case 'l':
48512 if (con[1] == 0)
48513 mode = CCGCmode, code = LT;
48514 else if (con[1] == 'e' && con[2] == 0)
48515 mode = CCGCmode, code = LE;
48516 break;
48517 case 'o':
48518 if (con[1] == 0)
48519 mode = CCOmode, code = EQ;
48520 break;
48521 case 'p':
48522 if (con[1] == 0)
48523 mode = CCPmode, code = EQ;
48524 break;
48525 case 's':
48526 if (con[1] == 0)
48527 mode = CCSmode, code = EQ;
48528 break;
48529 case 'z':
48530 if (con[1] == 0)
48531 mode = CCZmode, code = EQ;
48532 break;
48533 }
48534 if (code == UNKNOWN)
48535 {
48536 error ("unknown asm flag output %qs", constraints[i]);
48537 continue;
48538 }
48539 if (invert)
48540 code = reverse_condition (code);
48541
48542 rtx dest = outputs[i];
48543 if (!saw_asm_flag)
48544 {
48545 /* This is the first asm flag output. Here we put the flags
48546 register in as the real output and adjust the condition to
48547 allow it. */
48548 constraints[i] = "=Bf";
48549 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
48550 saw_asm_flag = true;
48551 }
48552 else
48553 {
48554 /* We don't need the flags register as output twice. */
48555 constraints[i] = "=X";
48556 outputs[i] = gen_rtx_SCRATCH (SImode);
48557 }
48558
48559 rtx x = gen_rtx_REG (mode, FLAGS_REG);
48560 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
48561
48562 machine_mode dest_mode = GET_MODE (dest);
48563 if (!SCALAR_INT_MODE_P (dest_mode))
48564 {
48565 error ("invalid type for asm flag output");
48566 continue;
48567 }
48568
48569 if (dest_mode == DImode && !TARGET_64BIT)
48570 dest_mode = SImode;
48571
48572 if (dest_mode != QImode)
48573 {
48574 rtx destqi = gen_reg_rtx (QImode);
48575 emit_insn (gen_rtx_SET (destqi, x));
48576
48577 if (TARGET_ZERO_EXTEND_WITH_AND
48578 && optimize_function_for_speed_p (cfun))
48579 {
48580 x = force_reg (dest_mode, const0_rtx);
48581
48582 emit_insn (gen_movstrictqi
48583 (gen_lowpart (QImode, x), destqi));
48584 }
48585 else
48586 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
48587 }
48588
48589 if (dest_mode != GET_MODE (dest))
48590 {
48591 rtx tmp = gen_reg_rtx (SImode);
48592
48593 emit_insn (gen_rtx_SET (tmp, x));
48594 emit_insn (gen_zero_extendsidi2 (dest, tmp));
48595 }
48596 else
48597 emit_insn (gen_rtx_SET (dest, x));
48598 }
48599 rtx_insn *seq = get_insns ();
48600 end_sequence ();
48601
48602 if (saw_asm_flag)
48603 return seq;
48604 else
48605 {
48606 /* If we had no asm flag outputs, clobber the flags. */
48607 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
48608 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
48609 return NULL;
48610 }
48611 }
48612
48613 /* Implements target vector targetm.asm.encode_section_info. */
48614
48615 static void ATTRIBUTE_UNUSED
48616 ix86_encode_section_info (tree decl, rtx rtl, int first)
48617 {
48618 default_encode_section_info (decl, rtl, first);
48619
48620 if (ix86_in_large_data_p (decl))
48621 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
48622 }
48623
48624 /* Worker function for REVERSE_CONDITION. */
48625
48626 enum rtx_code
48627 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
48628 {
48629 return (mode != CCFPmode && mode != CCFPUmode
48630 ? reverse_condition (code)
48631 : reverse_condition_maybe_unordered (code));
48632 }
48633
48634 /* Output code to perform an x87 FP register move, from OPERANDS[1]
48635 to OPERANDS[0]. */
48636
48637 const char *
48638 output_387_reg_move (rtx insn, rtx *operands)
48639 {
48640 if (REG_P (operands[0]))
48641 {
48642 if (REG_P (operands[1])
48643 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
48644 {
48645 if (REGNO (operands[0]) == FIRST_STACK_REG)
48646 return output_387_ffreep (operands, 0);
48647 return "fstp\t%y0";
48648 }
48649 if (STACK_TOP_P (operands[0]))
48650 return "fld%Z1\t%y1";
48651 return "fst\t%y0";
48652 }
48653 else if (MEM_P (operands[0]))
48654 {
48655 gcc_assert (REG_P (operands[1]));
48656 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
48657 return "fstp%Z0\t%y0";
48658 else
48659 {
48660 /* There is no non-popping store to memory for XFmode.
48661 So if we need one, follow the store with a load. */
48662 if (GET_MODE (operands[0]) == XFmode)
48663 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
48664 else
48665 return "fst%Z0\t%y0";
48666 }
48667 }
48668 else
48669 gcc_unreachable();
48670 }
48671
48672 /* Output code to perform a conditional jump to LABEL, if C2 flag in
48673 FP status register is set. */
48674
48675 void
48676 ix86_emit_fp_unordered_jump (rtx label)
48677 {
48678 rtx reg = gen_reg_rtx (HImode);
48679 rtx temp;
48680
48681 emit_insn (gen_x86_fnstsw_1 (reg));
48682
48683 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
48684 {
48685 emit_insn (gen_x86_sahf_1 (reg));
48686
48687 temp = gen_rtx_REG (CCmode, FLAGS_REG);
48688 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
48689 }
48690 else
48691 {
48692 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
48693
48694 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
48695 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
48696 }
48697
48698 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
48699 gen_rtx_LABEL_REF (VOIDmode, label),
48700 pc_rtx);
48701 temp = gen_rtx_SET (pc_rtx, temp);
48702
48703 emit_jump_insn (temp);
48704 predict_jump (REG_BR_PROB_BASE * 10 / 100);
48705 }
48706
48707 /* Output code to perform a log1p XFmode calculation. */
48708
48709 void ix86_emit_i387_log1p (rtx op0, rtx op1)
48710 {
48711 rtx_code_label *label1 = gen_label_rtx ();
48712 rtx_code_label *label2 = gen_label_rtx ();
48713
48714 rtx tmp = gen_reg_rtx (XFmode);
48715 rtx tmp2 = gen_reg_rtx (XFmode);
48716 rtx test;
48717
48718 emit_insn (gen_absxf2 (tmp, op1));
48719 test = gen_rtx_GE (VOIDmode, tmp,
48720 const_double_from_real_value (
48721 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
48722 XFmode));
48723 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
48724
48725 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
48726 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
48727 emit_jump (label2);
48728
48729 emit_label (label1);
48730 emit_move_insn (tmp, CONST1_RTX (XFmode));
48731 emit_insn (gen_addxf3 (tmp, op1, tmp));
48732 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
48733 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
48734
48735 emit_label (label2);
48736 }
48737
48738 /* Emit code for round calculation. */
48739 void ix86_emit_i387_round (rtx op0, rtx op1)
48740 {
48741 machine_mode inmode = GET_MODE (op1);
48742 machine_mode outmode = GET_MODE (op0);
48743 rtx e1, e2, res, tmp, tmp1, half;
48744 rtx scratch = gen_reg_rtx (HImode);
48745 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
48746 rtx_code_label *jump_label = gen_label_rtx ();
48747 rtx insn;
48748 rtx (*gen_abs) (rtx, rtx);
48749 rtx (*gen_neg) (rtx, rtx);
48750
48751 switch (inmode)
48752 {
48753 case SFmode:
48754 gen_abs = gen_abssf2;
48755 break;
48756 case DFmode:
48757 gen_abs = gen_absdf2;
48758 break;
48759 case XFmode:
48760 gen_abs = gen_absxf2;
48761 break;
48762 default:
48763 gcc_unreachable ();
48764 }
48765
48766 switch (outmode)
48767 {
48768 case SFmode:
48769 gen_neg = gen_negsf2;
48770 break;
48771 case DFmode:
48772 gen_neg = gen_negdf2;
48773 break;
48774 case XFmode:
48775 gen_neg = gen_negxf2;
48776 break;
48777 case HImode:
48778 gen_neg = gen_neghi2;
48779 break;
48780 case SImode:
48781 gen_neg = gen_negsi2;
48782 break;
48783 case DImode:
48784 gen_neg = gen_negdi2;
48785 break;
48786 default:
48787 gcc_unreachable ();
48788 }
48789
48790 e1 = gen_reg_rtx (inmode);
48791 e2 = gen_reg_rtx (inmode);
48792 res = gen_reg_rtx (outmode);
48793
48794 half = const_double_from_real_value (dconsthalf, inmode);
48795
48796 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
48797
48798 /* scratch = fxam(op1) */
48799 emit_insn (gen_rtx_SET (scratch,
48800 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
48801 UNSPEC_FXAM)));
48802 /* e1 = fabs(op1) */
48803 emit_insn (gen_abs (e1, op1));
48804
48805 /* e2 = e1 + 0.5 */
48806 half = force_reg (inmode, half);
48807 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
48808
48809 /* res = floor(e2) */
48810 if (inmode != XFmode)
48811 {
48812 tmp1 = gen_reg_rtx (XFmode);
48813
48814 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
48815 }
48816 else
48817 tmp1 = e2;
48818
48819 switch (outmode)
48820 {
48821 case SFmode:
48822 case DFmode:
48823 {
48824 rtx tmp0 = gen_reg_rtx (XFmode);
48825
48826 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
48827
48828 emit_insn (gen_rtx_SET (res,
48829 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
48830 UNSPEC_TRUNC_NOOP)));
48831 }
48832 break;
48833 case XFmode:
48834 emit_insn (gen_frndintxf2_floor (res, tmp1));
48835 break;
48836 case HImode:
48837 emit_insn (gen_lfloorxfhi2 (res, tmp1));
48838 break;
48839 case SImode:
48840 emit_insn (gen_lfloorxfsi2 (res, tmp1));
48841 break;
48842 case DImode:
48843 emit_insn (gen_lfloorxfdi2 (res, tmp1));
48844 break;
48845 default:
48846 gcc_unreachable ();
48847 }
48848
48849 /* flags = signbit(a) */
48850 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
48851
48852 /* if (flags) then res = -res */
48853 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
48854 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
48855 gen_rtx_LABEL_REF (VOIDmode, jump_label),
48856 pc_rtx);
48857 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48858 predict_jump (REG_BR_PROB_BASE * 50 / 100);
48859 JUMP_LABEL (insn) = jump_label;
48860
48861 emit_insn (gen_neg (res, res));
48862
48863 emit_label (jump_label);
48864 LABEL_NUSES (jump_label) = 1;
48865
48866 emit_move_insn (op0, res);
48867 }
48868
48869 /* Output code to perform a Newton-Rhapson approximation of a single precision
48870 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
48871
48872 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
48873 {
48874 rtx x0, x1, e0, e1;
48875
48876 x0 = gen_reg_rtx (mode);
48877 e0 = gen_reg_rtx (mode);
48878 e1 = gen_reg_rtx (mode);
48879 x1 = gen_reg_rtx (mode);
48880
48881 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
48882
48883 b = force_reg (mode, b);
48884
48885 /* x0 = rcp(b) estimate */
48886 if (mode == V16SFmode || mode == V8DFmode)
48887 {
48888 if (TARGET_AVX512ER)
48889 {
48890 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
48891 UNSPEC_RCP28)));
48892 /* res = a * x0 */
48893 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
48894 return;
48895 }
48896 else
48897 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
48898 UNSPEC_RCP14)));
48899 }
48900 else
48901 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
48902 UNSPEC_RCP)));
48903
48904 /* e0 = x0 * b */
48905 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
48906
48907 /* e0 = x0 * e0 */
48908 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
48909
48910 /* e1 = x0 + x0 */
48911 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
48912
48913 /* x1 = e1 - e0 */
48914 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
48915
48916 /* res = a * x1 */
48917 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
48918 }
48919
48920 /* Output code to perform a Newton-Rhapson approximation of a
48921 single precision floating point [reciprocal] square root. */
48922
48923 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
48924 {
48925 rtx x0, e0, e1, e2, e3, mthree, mhalf;
48926 REAL_VALUE_TYPE r;
48927 int unspec;
48928
48929 x0 = gen_reg_rtx (mode);
48930 e0 = gen_reg_rtx (mode);
48931 e1 = gen_reg_rtx (mode);
48932 e2 = gen_reg_rtx (mode);
48933 e3 = gen_reg_rtx (mode);
48934
48935 if (TARGET_AVX512ER && mode == V16SFmode)
48936 {
48937 if (recip)
48938 /* res = rsqrt28(a) estimate */
48939 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
48940 UNSPEC_RSQRT28)));
48941 else
48942 {
48943 /* x0 = rsqrt28(a) estimate */
48944 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
48945 UNSPEC_RSQRT28)));
48946 /* res = rcp28(x0) estimate */
48947 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
48948 UNSPEC_RCP28)));
48949 }
48950 return;
48951 }
48952
48953 real_from_integer (&r, VOIDmode, -3, SIGNED);
48954 mthree = const_double_from_real_value (r, SFmode);
48955
48956 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
48957 mhalf = const_double_from_real_value (r, SFmode);
48958 unspec = UNSPEC_RSQRT;
48959
48960 if (VECTOR_MODE_P (mode))
48961 {
48962 mthree = ix86_build_const_vector (mode, true, mthree);
48963 mhalf = ix86_build_const_vector (mode, true, mhalf);
48964 /* There is no 512-bit rsqrt. There is however rsqrt14. */
48965 if (GET_MODE_SIZE (mode) == 64)
48966 unspec = UNSPEC_RSQRT14;
48967 }
48968
48969 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
48970 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
48971
48972 a = force_reg (mode, a);
48973
48974 /* x0 = rsqrt(a) estimate */
48975 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
48976 unspec)));
48977
48978 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
48979 if (!recip)
48980 {
48981 rtx zero = force_reg (mode, CONST0_RTX(mode));
48982 rtx mask;
48983
48984 /* Handle masked compare. */
48985 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
48986 {
48987 mask = gen_reg_rtx (HImode);
48988 /* Imm value 0x4 corresponds to not-equal comparison. */
48989 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
48990 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
48991 }
48992 else
48993 {
48994 mask = gen_reg_rtx (mode);
48995 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
48996 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
48997 }
48998 }
48999
49000 /* e0 = x0 * a */
49001 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
49002 /* e1 = e0 * x0 */
49003 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
49004
49005 /* e2 = e1 - 3. */
49006 mthree = force_reg (mode, mthree);
49007 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
49008
49009 mhalf = force_reg (mode, mhalf);
49010 if (recip)
49011 /* e3 = -.5 * x0 */
49012 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
49013 else
49014 /* e3 = -.5 * e0 */
49015 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
49016 /* ret = e2 * e3 */
49017 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
49018 }
49019
49020 #ifdef TARGET_SOLARIS
49021 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
49022
49023 static void
49024 i386_solaris_elf_named_section (const char *name, unsigned int flags,
49025 tree decl)
49026 {
49027 /* With Binutils 2.15, the "@unwind" marker must be specified on
49028 every occurrence of the ".eh_frame" section, not just the first
49029 one. */
49030 if (TARGET_64BIT
49031 && strcmp (name, ".eh_frame") == 0)
49032 {
49033 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
49034 flags & SECTION_WRITE ? "aw" : "a");
49035 return;
49036 }
49037
49038 #ifndef USE_GAS
49039 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
49040 {
49041 solaris_elf_asm_comdat_section (name, flags, decl);
49042 return;
49043 }
49044 #endif
49045
49046 default_elf_asm_named_section (name, flags, decl);
49047 }
49048 #endif /* TARGET_SOLARIS */
49049
49050 /* Return the mangling of TYPE if it is an extended fundamental type. */
49051
49052 static const char *
49053 ix86_mangle_type (const_tree type)
49054 {
49055 type = TYPE_MAIN_VARIANT (type);
49056
49057 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
49058 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
49059 return NULL;
49060
49061 switch (TYPE_MODE (type))
49062 {
49063 case TFmode:
49064 /* __float128 is "g". */
49065 return "g";
49066 case XFmode:
49067 /* "long double" or __float80 is "e". */
49068 return "e";
49069 default:
49070 return NULL;
49071 }
49072 }
49073
49074 /* For 32-bit code we can save PIC register setup by using
49075 __stack_chk_fail_local hidden function instead of calling
49076 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
49077 register, so it is better to call __stack_chk_fail directly. */
49078
49079 static tree ATTRIBUTE_UNUSED
49080 ix86_stack_protect_fail (void)
49081 {
49082 return TARGET_64BIT
49083 ? default_external_stack_protect_fail ()
49084 : default_hidden_stack_protect_fail ();
49085 }
49086
49087 /* Select a format to encode pointers in exception handling data. CODE
49088 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
49089 true if the symbol may be affected by dynamic relocations.
49090
49091 ??? All x86 object file formats are capable of representing this.
49092 After all, the relocation needed is the same as for the call insn.
49093 Whether or not a particular assembler allows us to enter such, I
49094 guess we'll have to see. */
49095 int
49096 asm_preferred_eh_data_format (int code, int global)
49097 {
49098 if (flag_pic)
49099 {
49100 int type = DW_EH_PE_sdata8;
49101 if (!TARGET_64BIT
49102 || ix86_cmodel == CM_SMALL_PIC
49103 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
49104 type = DW_EH_PE_sdata4;
49105 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
49106 }
49107 if (ix86_cmodel == CM_SMALL
49108 || (ix86_cmodel == CM_MEDIUM && code))
49109 return DW_EH_PE_udata4;
49110 return DW_EH_PE_absptr;
49111 }
49112 \f
49113 /* Expand copysign from SIGN to the positive value ABS_VALUE
49114 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
49115 the sign-bit. */
49116 static void
49117 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
49118 {
49119 machine_mode mode = GET_MODE (sign);
49120 rtx sgn = gen_reg_rtx (mode);
49121 if (mask == NULL_RTX)
49122 {
49123 machine_mode vmode;
49124
49125 if (mode == SFmode)
49126 vmode = V4SFmode;
49127 else if (mode == DFmode)
49128 vmode = V2DFmode;
49129 else
49130 vmode = mode;
49131
49132 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
49133 if (!VECTOR_MODE_P (mode))
49134 {
49135 /* We need to generate a scalar mode mask in this case. */
49136 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
49137 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
49138 mask = gen_reg_rtx (mode);
49139 emit_insn (gen_rtx_SET (mask, tmp));
49140 }
49141 }
49142 else
49143 mask = gen_rtx_NOT (mode, mask);
49144 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
49145 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
49146 }
49147
49148 /* Expand fabs (OP0) and return a new rtx that holds the result. The
49149 mask for masking out the sign-bit is stored in *SMASK, if that is
49150 non-null. */
49151 static rtx
49152 ix86_expand_sse_fabs (rtx op0, rtx *smask)
49153 {
49154 machine_mode vmode, mode = GET_MODE (op0);
49155 rtx xa, mask;
49156
49157 xa = gen_reg_rtx (mode);
49158 if (mode == SFmode)
49159 vmode = V4SFmode;
49160 else if (mode == DFmode)
49161 vmode = V2DFmode;
49162 else
49163 vmode = mode;
49164 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
49165 if (!VECTOR_MODE_P (mode))
49166 {
49167 /* We need to generate a scalar mode mask in this case. */
49168 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
49169 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
49170 mask = gen_reg_rtx (mode);
49171 emit_insn (gen_rtx_SET (mask, tmp));
49172 }
49173 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
49174
49175 if (smask)
49176 *smask = mask;
49177
49178 return xa;
49179 }
49180
49181 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
49182 swapping the operands if SWAP_OPERANDS is true. The expanded
49183 code is a forward jump to a newly created label in case the
49184 comparison is true. The generated label rtx is returned. */
49185 static rtx_code_label *
49186 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
49187 bool swap_operands)
49188 {
49189 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
49190 rtx_code_label *label;
49191 rtx tmp;
49192
49193 if (swap_operands)
49194 std::swap (op0, op1);
49195
49196 label = gen_label_rtx ();
49197 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
49198 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
49199 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
49200 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
49201 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
49202 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
49203 JUMP_LABEL (tmp) = label;
49204
49205 return label;
49206 }
49207
49208 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
49209 using comparison code CODE. Operands are swapped for the comparison if
49210 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
49211 static rtx
49212 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
49213 bool swap_operands)
49214 {
49215 rtx (*insn)(rtx, rtx, rtx, rtx);
49216 machine_mode mode = GET_MODE (op0);
49217 rtx mask = gen_reg_rtx (mode);
49218
49219 if (swap_operands)
49220 std::swap (op0, op1);
49221
49222 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
49223
49224 emit_insn (insn (mask, op0, op1,
49225 gen_rtx_fmt_ee (code, mode, op0, op1)));
49226 return mask;
49227 }
49228
49229 /* Generate and return a rtx of mode MODE for 2**n where n is the number
49230 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
49231 static rtx
49232 ix86_gen_TWO52 (machine_mode mode)
49233 {
49234 REAL_VALUE_TYPE TWO52r;
49235 rtx TWO52;
49236
49237 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
49238 TWO52 = const_double_from_real_value (TWO52r, mode);
49239 TWO52 = force_reg (mode, TWO52);
49240
49241 return TWO52;
49242 }
49243
49244 /* Expand SSE sequence for computing lround from OP1 storing
49245 into OP0. */
49246 void
49247 ix86_expand_lround (rtx op0, rtx op1)
49248 {
49249 /* C code for the stuff we're doing below:
49250 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
49251 return (long)tmp;
49252 */
49253 machine_mode mode = GET_MODE (op1);
49254 const struct real_format *fmt;
49255 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
49256 rtx adj;
49257
49258 /* load nextafter (0.5, 0.0) */
49259 fmt = REAL_MODE_FORMAT (mode);
49260 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
49261 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
49262
49263 /* adj = copysign (0.5, op1) */
49264 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
49265 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
49266
49267 /* adj = op1 + adj */
49268 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
49269
49270 /* op0 = (imode)adj */
49271 expand_fix (op0, adj, 0);
49272 }
49273
49274 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
49275 into OPERAND0. */
49276 void
49277 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
49278 {
49279 /* C code for the stuff we're doing below (for do_floor):
49280 xi = (long)op1;
49281 xi -= (double)xi > op1 ? 1 : 0;
49282 return xi;
49283 */
49284 machine_mode fmode = GET_MODE (op1);
49285 machine_mode imode = GET_MODE (op0);
49286 rtx ireg, freg, tmp;
49287 rtx_code_label *label;
49288
49289 /* reg = (long)op1 */
49290 ireg = gen_reg_rtx (imode);
49291 expand_fix (ireg, op1, 0);
49292
49293 /* freg = (double)reg */
49294 freg = gen_reg_rtx (fmode);
49295 expand_float (freg, ireg, 0);
49296
49297 /* ireg = (freg > op1) ? ireg - 1 : ireg */
49298 label = ix86_expand_sse_compare_and_jump (UNLE,
49299 freg, op1, !do_floor);
49300 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
49301 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
49302 emit_move_insn (ireg, tmp);
49303
49304 emit_label (label);
49305 LABEL_NUSES (label) = 1;
49306
49307 emit_move_insn (op0, ireg);
49308 }
49309
49310 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
49311 result in OPERAND0. */
49312 void
49313 ix86_expand_rint (rtx operand0, rtx operand1)
49314 {
49315 /* C code for the stuff we're doing below:
49316 xa = fabs (operand1);
49317 if (!isless (xa, 2**52))
49318 return operand1;
49319 xa = xa + 2**52 - 2**52;
49320 return copysign (xa, operand1);
49321 */
49322 machine_mode mode = GET_MODE (operand0);
49323 rtx res, xa, TWO52, mask;
49324 rtx_code_label *label;
49325
49326 res = gen_reg_rtx (mode);
49327 emit_move_insn (res, operand1);
49328
49329 /* xa = abs (operand1) */
49330 xa = ix86_expand_sse_fabs (res, &mask);
49331
49332 /* if (!isless (xa, TWO52)) goto label; */
49333 TWO52 = ix86_gen_TWO52 (mode);
49334 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49335
49336 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
49337 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
49338
49339 ix86_sse_copysign_to_positive (res, xa, res, mask);
49340
49341 emit_label (label);
49342 LABEL_NUSES (label) = 1;
49343
49344 emit_move_insn (operand0, res);
49345 }
49346
49347 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
49348 into OPERAND0. */
49349 void
49350 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
49351 {
49352 /* C code for the stuff we expand below.
49353 double xa = fabs (x), x2;
49354 if (!isless (xa, TWO52))
49355 return x;
49356 xa = xa + TWO52 - TWO52;
49357 x2 = copysign (xa, x);
49358 Compensate. Floor:
49359 if (x2 > x)
49360 x2 -= 1;
49361 Compensate. Ceil:
49362 if (x2 < x)
49363 x2 -= -1;
49364 return x2;
49365 */
49366 machine_mode mode = GET_MODE (operand0);
49367 rtx xa, TWO52, tmp, one, res, mask;
49368 rtx_code_label *label;
49369
49370 TWO52 = ix86_gen_TWO52 (mode);
49371
49372 /* Temporary for holding the result, initialized to the input
49373 operand to ease control flow. */
49374 res = gen_reg_rtx (mode);
49375 emit_move_insn (res, operand1);
49376
49377 /* xa = abs (operand1) */
49378 xa = ix86_expand_sse_fabs (res, &mask);
49379
49380 /* if (!isless (xa, TWO52)) goto label; */
49381 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49382
49383 /* xa = xa + TWO52 - TWO52; */
49384 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
49385 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
49386
49387 /* xa = copysign (xa, operand1) */
49388 ix86_sse_copysign_to_positive (xa, xa, res, mask);
49389
49390 /* generate 1.0 or -1.0 */
49391 one = force_reg (mode,
49392 const_double_from_real_value (do_floor
49393 ? dconst1 : dconstm1, mode));
49394
49395 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
49396 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
49397 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
49398 /* We always need to subtract here to preserve signed zero. */
49399 tmp = expand_simple_binop (mode, MINUS,
49400 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
49401 emit_move_insn (res, tmp);
49402
49403 emit_label (label);
49404 LABEL_NUSES (label) = 1;
49405
49406 emit_move_insn (operand0, res);
49407 }
49408
49409 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
49410 into OPERAND0. */
49411 void
49412 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
49413 {
49414 /* C code for the stuff we expand below.
49415 double xa = fabs (x), x2;
49416 if (!isless (xa, TWO52))
49417 return x;
49418 x2 = (double)(long)x;
49419 Compensate. Floor:
49420 if (x2 > x)
49421 x2 -= 1;
49422 Compensate. Ceil:
49423 if (x2 < x)
49424 x2 += 1;
49425 if (HONOR_SIGNED_ZEROS (mode))
49426 return copysign (x2, x);
49427 return x2;
49428 */
49429 machine_mode mode = GET_MODE (operand0);
49430 rtx xa, xi, TWO52, tmp, one, res, mask;
49431 rtx_code_label *label;
49432
49433 TWO52 = ix86_gen_TWO52 (mode);
49434
49435 /* Temporary for holding the result, initialized to the input
49436 operand to ease control flow. */
49437 res = gen_reg_rtx (mode);
49438 emit_move_insn (res, operand1);
49439
49440 /* xa = abs (operand1) */
49441 xa = ix86_expand_sse_fabs (res, &mask);
49442
49443 /* if (!isless (xa, TWO52)) goto label; */
49444 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49445
49446 /* xa = (double)(long)x */
49447 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
49448 expand_fix (xi, res, 0);
49449 expand_float (xa, xi, 0);
49450
49451 /* generate 1.0 */
49452 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
49453
49454 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
49455 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
49456 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
49457 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
49458 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
49459 emit_move_insn (res, tmp);
49460
49461 if (HONOR_SIGNED_ZEROS (mode))
49462 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
49463
49464 emit_label (label);
49465 LABEL_NUSES (label) = 1;
49466
49467 emit_move_insn (operand0, res);
49468 }
49469
49470 /* Expand SSE sequence for computing round from OPERAND1 storing
49471 into OPERAND0. Sequence that works without relying on DImode truncation
49472 via cvttsd2siq that is only available on 64bit targets. */
49473 void
49474 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
49475 {
49476 /* C code for the stuff we expand below.
49477 double xa = fabs (x), xa2, x2;
49478 if (!isless (xa, TWO52))
49479 return x;
49480 Using the absolute value and copying back sign makes
49481 -0.0 -> -0.0 correct.
49482 xa2 = xa + TWO52 - TWO52;
49483 Compensate.
49484 dxa = xa2 - xa;
49485 if (dxa <= -0.5)
49486 xa2 += 1;
49487 else if (dxa > 0.5)
49488 xa2 -= 1;
49489 x2 = copysign (xa2, x);
49490 return x2;
49491 */
49492 machine_mode mode = GET_MODE (operand0);
49493 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
49494 rtx_code_label *label;
49495
49496 TWO52 = ix86_gen_TWO52 (mode);
49497
49498 /* Temporary for holding the result, initialized to the input
49499 operand to ease control flow. */
49500 res = gen_reg_rtx (mode);
49501 emit_move_insn (res, operand1);
49502
49503 /* xa = abs (operand1) */
49504 xa = ix86_expand_sse_fabs (res, &mask);
49505
49506 /* if (!isless (xa, TWO52)) goto label; */
49507 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49508
49509 /* xa2 = xa + TWO52 - TWO52; */
49510 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
49511 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
49512
49513 /* dxa = xa2 - xa; */
49514 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
49515
49516 /* generate 0.5, 1.0 and -0.5 */
49517 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
49518 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
49519 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
49520 0, OPTAB_DIRECT);
49521
49522 /* Compensate. */
49523 tmp = gen_reg_rtx (mode);
49524 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
49525 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
49526 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
49527 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
49528 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
49529 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
49530 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
49531 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
49532
49533 /* res = copysign (xa2, operand1) */
49534 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
49535
49536 emit_label (label);
49537 LABEL_NUSES (label) = 1;
49538
49539 emit_move_insn (operand0, res);
49540 }
49541
49542 /* Expand SSE sequence for computing trunc from OPERAND1 storing
49543 into OPERAND0. */
49544 void
49545 ix86_expand_trunc (rtx operand0, rtx operand1)
49546 {
49547 /* C code for SSE variant we expand below.
49548 double xa = fabs (x), x2;
49549 if (!isless (xa, TWO52))
49550 return x;
49551 x2 = (double)(long)x;
49552 if (HONOR_SIGNED_ZEROS (mode))
49553 return copysign (x2, x);
49554 return x2;
49555 */
49556 machine_mode mode = GET_MODE (operand0);
49557 rtx xa, xi, TWO52, res, mask;
49558 rtx_code_label *label;
49559
49560 TWO52 = ix86_gen_TWO52 (mode);
49561
49562 /* Temporary for holding the result, initialized to the input
49563 operand to ease control flow. */
49564 res = gen_reg_rtx (mode);
49565 emit_move_insn (res, operand1);
49566
49567 /* xa = abs (operand1) */
49568 xa = ix86_expand_sse_fabs (res, &mask);
49569
49570 /* if (!isless (xa, TWO52)) goto label; */
49571 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49572
49573 /* x = (double)(long)x */
49574 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
49575 expand_fix (xi, res, 0);
49576 expand_float (res, xi, 0);
49577
49578 if (HONOR_SIGNED_ZEROS (mode))
49579 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
49580
49581 emit_label (label);
49582 LABEL_NUSES (label) = 1;
49583
49584 emit_move_insn (operand0, res);
49585 }
49586
49587 /* Expand SSE sequence for computing trunc from OPERAND1 storing
49588 into OPERAND0. */
49589 void
49590 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
49591 {
49592 machine_mode mode = GET_MODE (operand0);
49593 rtx xa, mask, TWO52, one, res, smask, tmp;
49594 rtx_code_label *label;
49595
49596 /* C code for SSE variant we expand below.
49597 double xa = fabs (x), x2;
49598 if (!isless (xa, TWO52))
49599 return x;
49600 xa2 = xa + TWO52 - TWO52;
49601 Compensate:
49602 if (xa2 > xa)
49603 xa2 -= 1.0;
49604 x2 = copysign (xa2, x);
49605 return x2;
49606 */
49607
49608 TWO52 = ix86_gen_TWO52 (mode);
49609
49610 /* Temporary for holding the result, initialized to the input
49611 operand to ease control flow. */
49612 res = gen_reg_rtx (mode);
49613 emit_move_insn (res, operand1);
49614
49615 /* xa = abs (operand1) */
49616 xa = ix86_expand_sse_fabs (res, &smask);
49617
49618 /* if (!isless (xa, TWO52)) goto label; */
49619 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49620
49621 /* res = xa + TWO52 - TWO52; */
49622 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
49623 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
49624 emit_move_insn (res, tmp);
49625
49626 /* generate 1.0 */
49627 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
49628
49629 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
49630 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
49631 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
49632 tmp = expand_simple_binop (mode, MINUS,
49633 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
49634 emit_move_insn (res, tmp);
49635
49636 /* res = copysign (res, operand1) */
49637 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
49638
49639 emit_label (label);
49640 LABEL_NUSES (label) = 1;
49641
49642 emit_move_insn (operand0, res);
49643 }
49644
49645 /* Expand SSE sequence for computing round from OPERAND1 storing
49646 into OPERAND0. */
49647 void
49648 ix86_expand_round (rtx operand0, rtx operand1)
49649 {
49650 /* C code for the stuff we're doing below:
49651 double xa = fabs (x);
49652 if (!isless (xa, TWO52))
49653 return x;
49654 xa = (double)(long)(xa + nextafter (0.5, 0.0));
49655 return copysign (xa, x);
49656 */
49657 machine_mode mode = GET_MODE (operand0);
49658 rtx res, TWO52, xa, xi, half, mask;
49659 rtx_code_label *label;
49660 const struct real_format *fmt;
49661 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
49662
49663 /* Temporary for holding the result, initialized to the input
49664 operand to ease control flow. */
49665 res = gen_reg_rtx (mode);
49666 emit_move_insn (res, operand1);
49667
49668 TWO52 = ix86_gen_TWO52 (mode);
49669 xa = ix86_expand_sse_fabs (res, &mask);
49670 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
49671
49672 /* load nextafter (0.5, 0.0) */
49673 fmt = REAL_MODE_FORMAT (mode);
49674 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
49675 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
49676
49677 /* xa = xa + 0.5 */
49678 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
49679 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
49680
49681 /* xa = (double)(int64_t)xa */
49682 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
49683 expand_fix (xi, xa, 0);
49684 expand_float (xa, xi, 0);
49685
49686 /* res = copysign (xa, operand1) */
49687 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
49688
49689 emit_label (label);
49690 LABEL_NUSES (label) = 1;
49691
49692 emit_move_insn (operand0, res);
49693 }
49694
49695 /* Expand SSE sequence for computing round
49696 from OP1 storing into OP0 using sse4 round insn. */
49697 void
49698 ix86_expand_round_sse4 (rtx op0, rtx op1)
49699 {
49700 machine_mode mode = GET_MODE (op0);
49701 rtx e1, e2, res, half;
49702 const struct real_format *fmt;
49703 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
49704 rtx (*gen_copysign) (rtx, rtx, rtx);
49705 rtx (*gen_round) (rtx, rtx, rtx);
49706
49707 switch (mode)
49708 {
49709 case SFmode:
49710 gen_copysign = gen_copysignsf3;
49711 gen_round = gen_sse4_1_roundsf2;
49712 break;
49713 case DFmode:
49714 gen_copysign = gen_copysigndf3;
49715 gen_round = gen_sse4_1_rounddf2;
49716 break;
49717 default:
49718 gcc_unreachable ();
49719 }
49720
49721 /* round (a) = trunc (a + copysign (0.5, a)) */
49722
49723 /* load nextafter (0.5, 0.0) */
49724 fmt = REAL_MODE_FORMAT (mode);
49725 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
49726 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
49727 half = const_double_from_real_value (pred_half, mode);
49728
49729 /* e1 = copysign (0.5, op1) */
49730 e1 = gen_reg_rtx (mode);
49731 emit_insn (gen_copysign (e1, half, op1));
49732
49733 /* e2 = op1 + e1 */
49734 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
49735
49736 /* res = trunc (e2) */
49737 res = gen_reg_rtx (mode);
49738 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
49739
49740 emit_move_insn (op0, res);
49741 }
49742 \f
49743
49744 /* Table of valid machine attributes. */
49745 static const struct attribute_spec ix86_attribute_table[] =
49746 {
49747 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
49748 affects_type_identity } */
49749 /* Stdcall attribute says callee is responsible for popping arguments
49750 if they are not variable. */
49751 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
49752 true },
49753 /* Fastcall attribute says callee is responsible for popping arguments
49754 if they are not variable. */
49755 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
49756 true },
49757 /* Thiscall attribute says callee is responsible for popping arguments
49758 if they are not variable. */
49759 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
49760 true },
49761 /* Cdecl attribute says the callee is a normal C declaration */
49762 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
49763 true },
49764 /* Regparm attribute specifies how many integer arguments are to be
49765 passed in registers. */
49766 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
49767 true },
49768 /* Sseregparm attribute says we are using x86_64 calling conventions
49769 for FP arguments. */
49770 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
49771 true },
49772 /* The transactional memory builtins are implicitly regparm or fastcall
49773 depending on the ABI. Override the generic do-nothing attribute that
49774 these builtins were declared with. */
49775 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
49776 true },
49777 /* force_align_arg_pointer says this function realigns the stack at entry. */
49778 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
49779 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
49780 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49781 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
49782 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
49783 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
49784 false },
49785 #endif
49786 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
49787 false },
49788 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
49789 false },
49790 #ifdef SUBTARGET_ATTRIBUTE_TABLE
49791 SUBTARGET_ATTRIBUTE_TABLE,
49792 #endif
49793 /* ms_abi and sysv_abi calling convention function attributes. */
49794 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
49795 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
49796 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
49797 false },
49798 { "callee_pop_aggregate_return", 1, 1, false, true, true,
49799 ix86_handle_callee_pop_aggregate_return, true },
49800 { "interrupt", 0, 0, false, true, true,
49801 ix86_handle_interrupt_attribute, false },
49802 { "no_caller_saved_registers", 0, 0, false, true, true,
49803 ix86_handle_no_caller_saved_registers_attribute, false },
49804
49805 /* End element. */
49806 { NULL, 0, 0, false, false, false, NULL, false }
49807 };
49808
49809 /* Implement targetm.vectorize.builtin_vectorization_cost. */
49810 static int
49811 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
49812 tree vectype, int)
49813 {
49814 switch (type_of_cost)
49815 {
49816 case scalar_stmt:
49817 return ix86_cost->scalar_stmt_cost;
49818
49819 case scalar_load:
49820 return ix86_cost->scalar_load_cost;
49821
49822 case scalar_store:
49823 return ix86_cost->scalar_store_cost;
49824
49825 case vector_stmt:
49826 return ix86_cost->vec_stmt_cost;
49827
49828 case vector_load:
49829 return ix86_cost->vec_align_load_cost;
49830
49831 case vector_store:
49832 return ix86_cost->vec_store_cost;
49833
49834 case vec_to_scalar:
49835 return ix86_cost->vec_to_scalar_cost;
49836
49837 case scalar_to_vec:
49838 return ix86_cost->scalar_to_vec_cost;
49839
49840 case unaligned_load:
49841 case unaligned_store:
49842 return ix86_cost->vec_unalign_load_cost;
49843
49844 case cond_branch_taken:
49845 return ix86_cost->cond_taken_branch_cost;
49846
49847 case cond_branch_not_taken:
49848 return ix86_cost->cond_not_taken_branch_cost;
49849
49850 case vec_perm:
49851 case vec_promote_demote:
49852 return ix86_cost->vec_stmt_cost;
49853
49854 case vec_construct:
49855 return ix86_cost->vec_stmt_cost * (TYPE_VECTOR_SUBPARTS (vectype) - 1);
49856
49857 default:
49858 gcc_unreachable ();
49859 }
49860 }
49861
49862 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
49863 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
49864 insn every time. */
49865
49866 static GTY(()) rtx_insn *vselect_insn;
49867
49868 /* Initialize vselect_insn. */
49869
49870 static void
49871 init_vselect_insn (void)
49872 {
49873 unsigned i;
49874 rtx x;
49875
49876 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
49877 for (i = 0; i < MAX_VECT_LEN; ++i)
49878 XVECEXP (x, 0, i) = const0_rtx;
49879 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
49880 const0_rtx), x);
49881 x = gen_rtx_SET (const0_rtx, x);
49882 start_sequence ();
49883 vselect_insn = emit_insn (x);
49884 end_sequence ();
49885 }
49886
49887 /* Construct (set target (vec_select op0 (parallel perm))) and
49888 return true if that's a valid instruction in the active ISA. */
49889
49890 static bool
49891 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
49892 unsigned nelt, bool testing_p)
49893 {
49894 unsigned int i;
49895 rtx x, save_vconcat;
49896 int icode;
49897
49898 if (vselect_insn == NULL_RTX)
49899 init_vselect_insn ();
49900
49901 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
49902 PUT_NUM_ELEM (XVEC (x, 0), nelt);
49903 for (i = 0; i < nelt; ++i)
49904 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
49905 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
49906 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
49907 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
49908 SET_DEST (PATTERN (vselect_insn)) = target;
49909 icode = recog_memoized (vselect_insn);
49910
49911 if (icode >= 0 && !testing_p)
49912 emit_insn (copy_rtx (PATTERN (vselect_insn)));
49913
49914 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
49915 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
49916 INSN_CODE (vselect_insn) = -1;
49917
49918 return icode >= 0;
49919 }
49920
49921 /* Similar, but generate a vec_concat from op0 and op1 as well. */
49922
49923 static bool
49924 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
49925 const unsigned char *perm, unsigned nelt,
49926 bool testing_p)
49927 {
49928 machine_mode v2mode;
49929 rtx x;
49930 bool ok;
49931
49932 if (vselect_insn == NULL_RTX)
49933 init_vselect_insn ();
49934
49935 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
49936 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
49937 PUT_MODE (x, v2mode);
49938 XEXP (x, 0) = op0;
49939 XEXP (x, 1) = op1;
49940 ok = expand_vselect (target, x, perm, nelt, testing_p);
49941 XEXP (x, 0) = const0_rtx;
49942 XEXP (x, 1) = const0_rtx;
49943 return ok;
49944 }
49945
49946 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49947 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
49948
49949 static bool
49950 expand_vec_perm_blend (struct expand_vec_perm_d *d)
49951 {
49952 machine_mode mmode, vmode = d->vmode;
49953 unsigned i, mask, nelt = d->nelt;
49954 rtx target, op0, op1, maskop, x;
49955 rtx rperm[32], vperm;
49956
49957 if (d->one_operand_p)
49958 return false;
49959 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
49960 && (TARGET_AVX512BW
49961 || GET_MODE_UNIT_SIZE (vmode) >= 4))
49962 ;
49963 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49964 ;
49965 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49966 ;
49967 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49968 ;
49969 else
49970 return false;
49971
49972 /* This is a blend, not a permute. Elements must stay in their
49973 respective lanes. */
49974 for (i = 0; i < nelt; ++i)
49975 {
49976 unsigned e = d->perm[i];
49977 if (!(e == i || e == i + nelt))
49978 return false;
49979 }
49980
49981 if (d->testing_p)
49982 return true;
49983
49984 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
49985 decision should be extracted elsewhere, so that we only try that
49986 sequence once all budget==3 options have been tried. */
49987 target = d->target;
49988 op0 = d->op0;
49989 op1 = d->op1;
49990 mask = 0;
49991
49992 switch (vmode)
49993 {
49994 case V8DFmode:
49995 case V16SFmode:
49996 case V4DFmode:
49997 case V8SFmode:
49998 case V2DFmode:
49999 case V4SFmode:
50000 case V8HImode:
50001 case V8SImode:
50002 case V32HImode:
50003 case V64QImode:
50004 case V16SImode:
50005 case V8DImode:
50006 for (i = 0; i < nelt; ++i)
50007 mask |= (d->perm[i] >= nelt) << i;
50008 break;
50009
50010 case V2DImode:
50011 for (i = 0; i < 2; ++i)
50012 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
50013 vmode = V8HImode;
50014 goto do_subreg;
50015
50016 case V4SImode:
50017 for (i = 0; i < 4; ++i)
50018 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
50019 vmode = V8HImode;
50020 goto do_subreg;
50021
50022 case V16QImode:
50023 /* See if bytes move in pairs so we can use pblendw with
50024 an immediate argument, rather than pblendvb with a vector
50025 argument. */
50026 for (i = 0; i < 16; i += 2)
50027 if (d->perm[i] + 1 != d->perm[i + 1])
50028 {
50029 use_pblendvb:
50030 for (i = 0; i < nelt; ++i)
50031 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
50032
50033 finish_pblendvb:
50034 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
50035 vperm = force_reg (vmode, vperm);
50036
50037 if (GET_MODE_SIZE (vmode) == 16)
50038 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
50039 else
50040 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
50041 if (target != d->target)
50042 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
50043 return true;
50044 }
50045
50046 for (i = 0; i < 8; ++i)
50047 mask |= (d->perm[i * 2] >= 16) << i;
50048 vmode = V8HImode;
50049 /* FALLTHRU */
50050
50051 do_subreg:
50052 target = gen_reg_rtx (vmode);
50053 op0 = gen_lowpart (vmode, op0);
50054 op1 = gen_lowpart (vmode, op1);
50055 break;
50056
50057 case V32QImode:
50058 /* See if bytes move in pairs. If not, vpblendvb must be used. */
50059 for (i = 0; i < 32; i += 2)
50060 if (d->perm[i] + 1 != d->perm[i + 1])
50061 goto use_pblendvb;
50062 /* See if bytes move in quadruplets. If yes, vpblendd
50063 with immediate can be used. */
50064 for (i = 0; i < 32; i += 4)
50065 if (d->perm[i] + 2 != d->perm[i + 2])
50066 break;
50067 if (i < 32)
50068 {
50069 /* See if bytes move the same in both lanes. If yes,
50070 vpblendw with immediate can be used. */
50071 for (i = 0; i < 16; i += 2)
50072 if (d->perm[i] + 16 != d->perm[i + 16])
50073 goto use_pblendvb;
50074
50075 /* Use vpblendw. */
50076 for (i = 0; i < 16; ++i)
50077 mask |= (d->perm[i * 2] >= 32) << i;
50078 vmode = V16HImode;
50079 goto do_subreg;
50080 }
50081
50082 /* Use vpblendd. */
50083 for (i = 0; i < 8; ++i)
50084 mask |= (d->perm[i * 4] >= 32) << i;
50085 vmode = V8SImode;
50086 goto do_subreg;
50087
50088 case V16HImode:
50089 /* See if words move in pairs. If yes, vpblendd can be used. */
50090 for (i = 0; i < 16; i += 2)
50091 if (d->perm[i] + 1 != d->perm[i + 1])
50092 break;
50093 if (i < 16)
50094 {
50095 /* See if words move the same in both lanes. If not,
50096 vpblendvb must be used. */
50097 for (i = 0; i < 8; i++)
50098 if (d->perm[i] + 8 != d->perm[i + 8])
50099 {
50100 /* Use vpblendvb. */
50101 for (i = 0; i < 32; ++i)
50102 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
50103
50104 vmode = V32QImode;
50105 nelt = 32;
50106 target = gen_reg_rtx (vmode);
50107 op0 = gen_lowpart (vmode, op0);
50108 op1 = gen_lowpart (vmode, op1);
50109 goto finish_pblendvb;
50110 }
50111
50112 /* Use vpblendw. */
50113 for (i = 0; i < 16; ++i)
50114 mask |= (d->perm[i] >= 16) << i;
50115 break;
50116 }
50117
50118 /* Use vpblendd. */
50119 for (i = 0; i < 8; ++i)
50120 mask |= (d->perm[i * 2] >= 16) << i;
50121 vmode = V8SImode;
50122 goto do_subreg;
50123
50124 case V4DImode:
50125 /* Use vpblendd. */
50126 for (i = 0; i < 4; ++i)
50127 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
50128 vmode = V8SImode;
50129 goto do_subreg;
50130
50131 default:
50132 gcc_unreachable ();
50133 }
50134
50135 switch (vmode)
50136 {
50137 case V8DFmode:
50138 case V8DImode:
50139 mmode = QImode;
50140 break;
50141 case V16SFmode:
50142 case V16SImode:
50143 mmode = HImode;
50144 break;
50145 case V32HImode:
50146 mmode = SImode;
50147 break;
50148 case V64QImode:
50149 mmode = DImode;
50150 break;
50151 default:
50152 mmode = VOIDmode;
50153 }
50154
50155 if (mmode != VOIDmode)
50156 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
50157 else
50158 maskop = GEN_INT (mask);
50159
50160 /* This matches five different patterns with the different modes. */
50161 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
50162 x = gen_rtx_SET (target, x);
50163 emit_insn (x);
50164 if (target != d->target)
50165 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
50166
50167 return true;
50168 }
50169
50170 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
50171 in terms of the variable form of vpermilps.
50172
50173 Note that we will have already failed the immediate input vpermilps,
50174 which requires that the high and low part shuffle be identical; the
50175 variable form doesn't require that. */
50176
50177 static bool
50178 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
50179 {
50180 rtx rperm[8], vperm;
50181 unsigned i;
50182
50183 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
50184 return false;
50185
50186 /* We can only permute within the 128-bit lane. */
50187 for (i = 0; i < 8; ++i)
50188 {
50189 unsigned e = d->perm[i];
50190 if (i < 4 ? e >= 4 : e < 4)
50191 return false;
50192 }
50193
50194 if (d->testing_p)
50195 return true;
50196
50197 for (i = 0; i < 8; ++i)
50198 {
50199 unsigned e = d->perm[i];
50200
50201 /* Within each 128-bit lane, the elements of op0 are numbered
50202 from 0 and the elements of op1 are numbered from 4. */
50203 if (e >= 8 + 4)
50204 e -= 8;
50205 else if (e >= 4)
50206 e -= 4;
50207
50208 rperm[i] = GEN_INT (e);
50209 }
50210
50211 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
50212 vperm = force_reg (V8SImode, vperm);
50213 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
50214
50215 return true;
50216 }
50217
50218 /* Return true if permutation D can be performed as VMODE permutation
50219 instead. */
50220
50221 static bool
50222 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
50223 {
50224 unsigned int i, j, chunk;
50225
50226 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
50227 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
50228 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
50229 return false;
50230
50231 if (GET_MODE_NUNITS (vmode) >= d->nelt)
50232 return true;
50233
50234 chunk = d->nelt / GET_MODE_NUNITS (vmode);
50235 for (i = 0; i < d->nelt; i += chunk)
50236 if (d->perm[i] & (chunk - 1))
50237 return false;
50238 else
50239 for (j = 1; j < chunk; ++j)
50240 if (d->perm[i] + j != d->perm[i + j])
50241 return false;
50242
50243 return true;
50244 }
50245
50246 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
50247 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
50248
50249 static bool
50250 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
50251 {
50252 unsigned i, nelt, eltsz, mask;
50253 unsigned char perm[64];
50254 machine_mode vmode = V16QImode;
50255 rtx rperm[64], vperm, target, op0, op1;
50256
50257 nelt = d->nelt;
50258
50259 if (!d->one_operand_p)
50260 {
50261 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
50262 {
50263 if (TARGET_AVX2
50264 && valid_perm_using_mode_p (V2TImode, d))
50265 {
50266 if (d->testing_p)
50267 return true;
50268
50269 /* Use vperm2i128 insn. The pattern uses
50270 V4DImode instead of V2TImode. */
50271 target = d->target;
50272 if (d->vmode != V4DImode)
50273 target = gen_reg_rtx (V4DImode);
50274 op0 = gen_lowpart (V4DImode, d->op0);
50275 op1 = gen_lowpart (V4DImode, d->op1);
50276 rperm[0]
50277 = GEN_INT ((d->perm[0] / (nelt / 2))
50278 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
50279 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
50280 if (target != d->target)
50281 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
50282 return true;
50283 }
50284 return false;
50285 }
50286 }
50287 else
50288 {
50289 if (GET_MODE_SIZE (d->vmode) == 16)
50290 {
50291 if (!TARGET_SSSE3)
50292 return false;
50293 }
50294 else if (GET_MODE_SIZE (d->vmode) == 32)
50295 {
50296 if (!TARGET_AVX2)
50297 return false;
50298
50299 /* V4DImode should be already handled through
50300 expand_vselect by vpermq instruction. */
50301 gcc_assert (d->vmode != V4DImode);
50302
50303 vmode = V32QImode;
50304 if (d->vmode == V8SImode
50305 || d->vmode == V16HImode
50306 || d->vmode == V32QImode)
50307 {
50308 /* First see if vpermq can be used for
50309 V8SImode/V16HImode/V32QImode. */
50310 if (valid_perm_using_mode_p (V4DImode, d))
50311 {
50312 for (i = 0; i < 4; i++)
50313 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
50314 if (d->testing_p)
50315 return true;
50316 target = gen_reg_rtx (V4DImode);
50317 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
50318 perm, 4, false))
50319 {
50320 emit_move_insn (d->target,
50321 gen_lowpart (d->vmode, target));
50322 return true;
50323 }
50324 return false;
50325 }
50326
50327 /* Next see if vpermd can be used. */
50328 if (valid_perm_using_mode_p (V8SImode, d))
50329 vmode = V8SImode;
50330 }
50331 /* Or if vpermps can be used. */
50332 else if (d->vmode == V8SFmode)
50333 vmode = V8SImode;
50334
50335 if (vmode == V32QImode)
50336 {
50337 /* vpshufb only works intra lanes, it is not
50338 possible to shuffle bytes in between the lanes. */
50339 for (i = 0; i < nelt; ++i)
50340 if ((d->perm[i] ^ i) & (nelt / 2))
50341 return false;
50342 }
50343 }
50344 else if (GET_MODE_SIZE (d->vmode) == 64)
50345 {
50346 if (!TARGET_AVX512BW)
50347 return false;
50348
50349 /* If vpermq didn't work, vpshufb won't work either. */
50350 if (d->vmode == V8DFmode || d->vmode == V8DImode)
50351 return false;
50352
50353 vmode = V64QImode;
50354 if (d->vmode == V16SImode
50355 || d->vmode == V32HImode
50356 || d->vmode == V64QImode)
50357 {
50358 /* First see if vpermq can be used for
50359 V16SImode/V32HImode/V64QImode. */
50360 if (valid_perm_using_mode_p (V8DImode, d))
50361 {
50362 for (i = 0; i < 8; i++)
50363 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
50364 if (d->testing_p)
50365 return true;
50366 target = gen_reg_rtx (V8DImode);
50367 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
50368 perm, 8, false))
50369 {
50370 emit_move_insn (d->target,
50371 gen_lowpart (d->vmode, target));
50372 return true;
50373 }
50374 return false;
50375 }
50376
50377 /* Next see if vpermd can be used. */
50378 if (valid_perm_using_mode_p (V16SImode, d))
50379 vmode = V16SImode;
50380 }
50381 /* Or if vpermps can be used. */
50382 else if (d->vmode == V16SFmode)
50383 vmode = V16SImode;
50384 if (vmode == V64QImode)
50385 {
50386 /* vpshufb only works intra lanes, it is not
50387 possible to shuffle bytes in between the lanes. */
50388 for (i = 0; i < nelt; ++i)
50389 if ((d->perm[i] ^ i) & (nelt / 4))
50390 return false;
50391 }
50392 }
50393 else
50394 return false;
50395 }
50396
50397 if (d->testing_p)
50398 return true;
50399
50400 if (vmode == V8SImode)
50401 for (i = 0; i < 8; ++i)
50402 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
50403 else if (vmode == V16SImode)
50404 for (i = 0; i < 16; ++i)
50405 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
50406 else
50407 {
50408 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50409 if (!d->one_operand_p)
50410 mask = 2 * nelt - 1;
50411 else if (vmode == V16QImode)
50412 mask = nelt - 1;
50413 else if (vmode == V64QImode)
50414 mask = nelt / 4 - 1;
50415 else
50416 mask = nelt / 2 - 1;
50417
50418 for (i = 0; i < nelt; ++i)
50419 {
50420 unsigned j, e = d->perm[i] & mask;
50421 for (j = 0; j < eltsz; ++j)
50422 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
50423 }
50424 }
50425
50426 vperm = gen_rtx_CONST_VECTOR (vmode,
50427 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
50428 vperm = force_reg (vmode, vperm);
50429
50430 target = d->target;
50431 if (d->vmode != vmode)
50432 target = gen_reg_rtx (vmode);
50433 op0 = gen_lowpart (vmode, d->op0);
50434 if (d->one_operand_p)
50435 {
50436 if (vmode == V16QImode)
50437 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
50438 else if (vmode == V32QImode)
50439 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
50440 else if (vmode == V64QImode)
50441 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
50442 else if (vmode == V8SFmode)
50443 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
50444 else if (vmode == V8SImode)
50445 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
50446 else if (vmode == V16SFmode)
50447 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
50448 else if (vmode == V16SImode)
50449 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
50450 else
50451 gcc_unreachable ();
50452 }
50453 else
50454 {
50455 op1 = gen_lowpart (vmode, d->op1);
50456 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
50457 }
50458 if (target != d->target)
50459 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
50460
50461 return true;
50462 }
50463
50464 /* For V*[QHS]Imode permutations, check if the same permutation
50465 can't be performed in a 2x, 4x or 8x wider inner mode. */
50466
50467 static bool
50468 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
50469 struct expand_vec_perm_d *nd)
50470 {
50471 int i;
50472 enum machine_mode mode = VOIDmode;
50473
50474 switch (d->vmode)
50475 {
50476 case V16QImode: mode = V8HImode; break;
50477 case V32QImode: mode = V16HImode; break;
50478 case V64QImode: mode = V32HImode; break;
50479 case V8HImode: mode = V4SImode; break;
50480 case V16HImode: mode = V8SImode; break;
50481 case V32HImode: mode = V16SImode; break;
50482 case V4SImode: mode = V2DImode; break;
50483 case V8SImode: mode = V4DImode; break;
50484 case V16SImode: mode = V8DImode; break;
50485 default: return false;
50486 }
50487 for (i = 0; i < d->nelt; i += 2)
50488 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
50489 return false;
50490 nd->vmode = mode;
50491 nd->nelt = d->nelt / 2;
50492 for (i = 0; i < nd->nelt; i++)
50493 nd->perm[i] = d->perm[2 * i] / 2;
50494 if (GET_MODE_INNER (mode) != DImode)
50495 canonicalize_vector_int_perm (nd, nd);
50496 if (nd != d)
50497 {
50498 nd->one_operand_p = d->one_operand_p;
50499 nd->testing_p = d->testing_p;
50500 if (d->op0 == d->op1)
50501 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
50502 else
50503 {
50504 nd->op0 = gen_lowpart (nd->vmode, d->op0);
50505 nd->op1 = gen_lowpart (nd->vmode, d->op1);
50506 }
50507 if (d->testing_p)
50508 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
50509 else
50510 nd->target = gen_reg_rtx (nd->vmode);
50511 }
50512 return true;
50513 }
50514
50515 /* Try to expand one-operand permutation with constant mask. */
50516
50517 static bool
50518 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
50519 {
50520 machine_mode mode = GET_MODE (d->op0);
50521 machine_mode maskmode = mode;
50522 rtx (*gen) (rtx, rtx, rtx) = NULL;
50523 rtx target, op0, mask;
50524 rtx vec[64];
50525
50526 if (!rtx_equal_p (d->op0, d->op1))
50527 return false;
50528
50529 if (!TARGET_AVX512F)
50530 return false;
50531
50532 switch (mode)
50533 {
50534 case V16SImode:
50535 gen = gen_avx512f_permvarv16si;
50536 break;
50537 case V16SFmode:
50538 gen = gen_avx512f_permvarv16sf;
50539 maskmode = V16SImode;
50540 break;
50541 case V8DImode:
50542 gen = gen_avx512f_permvarv8di;
50543 break;
50544 case V8DFmode:
50545 gen = gen_avx512f_permvarv8df;
50546 maskmode = V8DImode;
50547 break;
50548 default:
50549 return false;
50550 }
50551
50552 target = d->target;
50553 op0 = d->op0;
50554 for (int i = 0; i < d->nelt; ++i)
50555 vec[i] = GEN_INT (d->perm[i]);
50556 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
50557 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
50558 return true;
50559 }
50560
50561 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
50562 in a single instruction. */
50563
50564 static bool
50565 expand_vec_perm_1 (struct expand_vec_perm_d *d)
50566 {
50567 unsigned i, nelt = d->nelt;
50568 struct expand_vec_perm_d nd;
50569
50570 /* Check plain VEC_SELECT first, because AVX has instructions that could
50571 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
50572 input where SEL+CONCAT may not. */
50573 if (d->one_operand_p)
50574 {
50575 int mask = nelt - 1;
50576 bool identity_perm = true;
50577 bool broadcast_perm = true;
50578
50579 for (i = 0; i < nelt; i++)
50580 {
50581 nd.perm[i] = d->perm[i] & mask;
50582 if (nd.perm[i] != i)
50583 identity_perm = false;
50584 if (nd.perm[i])
50585 broadcast_perm = false;
50586 }
50587
50588 if (identity_perm)
50589 {
50590 if (!d->testing_p)
50591 emit_move_insn (d->target, d->op0);
50592 return true;
50593 }
50594 else if (broadcast_perm && TARGET_AVX2)
50595 {
50596 /* Use vpbroadcast{b,w,d}. */
50597 rtx (*gen) (rtx, rtx) = NULL;
50598 switch (d->vmode)
50599 {
50600 case V64QImode:
50601 if (TARGET_AVX512BW)
50602 gen = gen_avx512bw_vec_dupv64qi_1;
50603 break;
50604 case V32QImode:
50605 gen = gen_avx2_pbroadcastv32qi_1;
50606 break;
50607 case V32HImode:
50608 if (TARGET_AVX512BW)
50609 gen = gen_avx512bw_vec_dupv32hi_1;
50610 break;
50611 case V16HImode:
50612 gen = gen_avx2_pbroadcastv16hi_1;
50613 break;
50614 case V16SImode:
50615 if (TARGET_AVX512F)
50616 gen = gen_avx512f_vec_dupv16si_1;
50617 break;
50618 case V8SImode:
50619 gen = gen_avx2_pbroadcastv8si_1;
50620 break;
50621 case V16QImode:
50622 gen = gen_avx2_pbroadcastv16qi;
50623 break;
50624 case V8HImode:
50625 gen = gen_avx2_pbroadcastv8hi;
50626 break;
50627 case V16SFmode:
50628 if (TARGET_AVX512F)
50629 gen = gen_avx512f_vec_dupv16sf_1;
50630 break;
50631 case V8SFmode:
50632 gen = gen_avx2_vec_dupv8sf_1;
50633 break;
50634 case V8DFmode:
50635 if (TARGET_AVX512F)
50636 gen = gen_avx512f_vec_dupv8df_1;
50637 break;
50638 case V8DImode:
50639 if (TARGET_AVX512F)
50640 gen = gen_avx512f_vec_dupv8di_1;
50641 break;
50642 /* For other modes prefer other shuffles this function creates. */
50643 default: break;
50644 }
50645 if (gen != NULL)
50646 {
50647 if (!d->testing_p)
50648 emit_insn (gen (d->target, d->op0));
50649 return true;
50650 }
50651 }
50652
50653 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
50654 return true;
50655
50656 /* There are plenty of patterns in sse.md that are written for
50657 SEL+CONCAT and are not replicated for a single op. Perhaps
50658 that should be changed, to avoid the nastiness here. */
50659
50660 /* Recognize interleave style patterns, which means incrementing
50661 every other permutation operand. */
50662 for (i = 0; i < nelt; i += 2)
50663 {
50664 nd.perm[i] = d->perm[i] & mask;
50665 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
50666 }
50667 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
50668 d->testing_p))
50669 return true;
50670
50671 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
50672 if (nelt >= 4)
50673 {
50674 for (i = 0; i < nelt; i += 4)
50675 {
50676 nd.perm[i + 0] = d->perm[i + 0] & mask;
50677 nd.perm[i + 1] = d->perm[i + 1] & mask;
50678 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
50679 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
50680 }
50681
50682 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
50683 d->testing_p))
50684 return true;
50685 }
50686 }
50687
50688 /* Finally, try the fully general two operand permute. */
50689 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
50690 d->testing_p))
50691 return true;
50692
50693 /* Recognize interleave style patterns with reversed operands. */
50694 if (!d->one_operand_p)
50695 {
50696 for (i = 0; i < nelt; ++i)
50697 {
50698 unsigned e = d->perm[i];
50699 if (e >= nelt)
50700 e -= nelt;
50701 else
50702 e += nelt;
50703 nd.perm[i] = e;
50704 }
50705
50706 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
50707 d->testing_p))
50708 return true;
50709 }
50710
50711 /* Try the SSE4.1 blend variable merge instructions. */
50712 if (expand_vec_perm_blend (d))
50713 return true;
50714
50715 /* Try one of the AVX vpermil variable permutations. */
50716 if (expand_vec_perm_vpermil (d))
50717 return true;
50718
50719 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
50720 vpshufb, vpermd, vpermps or vpermq variable permutation. */
50721 if (expand_vec_perm_pshufb (d))
50722 return true;
50723
50724 /* Try the AVX2 vpalignr instruction. */
50725 if (expand_vec_perm_palignr (d, true))
50726 return true;
50727
50728 /* Try the AVX512F vperm{s,d} instructions. */
50729 if (ix86_expand_vec_one_operand_perm_avx512 (d))
50730 return true;
50731
50732 /* Try the AVX512F vpermi2 instructions. */
50733 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
50734 return true;
50735
50736 /* See if we can get the same permutation in different vector integer
50737 mode. */
50738 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
50739 {
50740 if (!d->testing_p)
50741 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
50742 return true;
50743 }
50744 return false;
50745 }
50746
50747 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
50748 in terms of a pair of pshuflw + pshufhw instructions. */
50749
50750 static bool
50751 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
50752 {
50753 unsigned char perm2[MAX_VECT_LEN];
50754 unsigned i;
50755 bool ok;
50756
50757 if (d->vmode != V8HImode || !d->one_operand_p)
50758 return false;
50759
50760 /* The two permutations only operate in 64-bit lanes. */
50761 for (i = 0; i < 4; ++i)
50762 if (d->perm[i] >= 4)
50763 return false;
50764 for (i = 4; i < 8; ++i)
50765 if (d->perm[i] < 4)
50766 return false;
50767
50768 if (d->testing_p)
50769 return true;
50770
50771 /* Emit the pshuflw. */
50772 memcpy (perm2, d->perm, 4);
50773 for (i = 4; i < 8; ++i)
50774 perm2[i] = i;
50775 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
50776 gcc_assert (ok);
50777
50778 /* Emit the pshufhw. */
50779 memcpy (perm2 + 4, d->perm + 4, 4);
50780 for (i = 0; i < 4; ++i)
50781 perm2[i] = i;
50782 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
50783 gcc_assert (ok);
50784
50785 return true;
50786 }
50787
50788 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50789 the permutation using the SSSE3 palignr instruction. This succeeds
50790 when all of the elements in PERM fit within one vector and we merely
50791 need to shift them down so that a single vector permutation has a
50792 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
50793 the vpalignr instruction itself can perform the requested permutation. */
50794
50795 static bool
50796 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
50797 {
50798 unsigned i, nelt = d->nelt;
50799 unsigned min, max, minswap, maxswap;
50800 bool in_order, ok, swap = false;
50801 rtx shift, target;
50802 struct expand_vec_perm_d dcopy;
50803
50804 /* Even with AVX, palignr only operates on 128-bit vectors,
50805 in AVX2 palignr operates on both 128-bit lanes. */
50806 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50807 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
50808 return false;
50809
50810 min = 2 * nelt;
50811 max = 0;
50812 minswap = 2 * nelt;
50813 maxswap = 0;
50814 for (i = 0; i < nelt; ++i)
50815 {
50816 unsigned e = d->perm[i];
50817 unsigned eswap = d->perm[i] ^ nelt;
50818 if (GET_MODE_SIZE (d->vmode) == 32)
50819 {
50820 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
50821 eswap = e ^ (nelt / 2);
50822 }
50823 if (e < min)
50824 min = e;
50825 if (e > max)
50826 max = e;
50827 if (eswap < minswap)
50828 minswap = eswap;
50829 if (eswap > maxswap)
50830 maxswap = eswap;
50831 }
50832 if (min == 0
50833 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
50834 {
50835 if (d->one_operand_p
50836 || minswap == 0
50837 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
50838 ? nelt / 2 : nelt))
50839 return false;
50840 swap = true;
50841 min = minswap;
50842 max = maxswap;
50843 }
50844
50845 /* Given that we have SSSE3, we know we'll be able to implement the
50846 single operand permutation after the palignr with pshufb for
50847 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
50848 first. */
50849 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
50850 return true;
50851
50852 dcopy = *d;
50853 if (swap)
50854 {
50855 dcopy.op0 = d->op1;
50856 dcopy.op1 = d->op0;
50857 for (i = 0; i < nelt; ++i)
50858 dcopy.perm[i] ^= nelt;
50859 }
50860
50861 in_order = true;
50862 for (i = 0; i < nelt; ++i)
50863 {
50864 unsigned e = dcopy.perm[i];
50865 if (GET_MODE_SIZE (d->vmode) == 32
50866 && e >= nelt
50867 && (e & (nelt / 2 - 1)) < min)
50868 e = e - min - (nelt / 2);
50869 else
50870 e = e - min;
50871 if (e != i)
50872 in_order = false;
50873 dcopy.perm[i] = e;
50874 }
50875 dcopy.one_operand_p = true;
50876
50877 if (single_insn_only_p && !in_order)
50878 return false;
50879
50880 /* For AVX2, test whether we can permute the result in one instruction. */
50881 if (d->testing_p)
50882 {
50883 if (in_order)
50884 return true;
50885 dcopy.op1 = dcopy.op0;
50886 return expand_vec_perm_1 (&dcopy);
50887 }
50888
50889 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
50890 if (GET_MODE_SIZE (d->vmode) == 16)
50891 {
50892 target = gen_reg_rtx (TImode);
50893 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
50894 gen_lowpart (TImode, dcopy.op0), shift));
50895 }
50896 else
50897 {
50898 target = gen_reg_rtx (V2TImode);
50899 emit_insn (gen_avx2_palignrv2ti (target,
50900 gen_lowpart (V2TImode, dcopy.op1),
50901 gen_lowpart (V2TImode, dcopy.op0),
50902 shift));
50903 }
50904
50905 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
50906
50907 /* Test for the degenerate case where the alignment by itself
50908 produces the desired permutation. */
50909 if (in_order)
50910 {
50911 emit_move_insn (d->target, dcopy.op0);
50912 return true;
50913 }
50914
50915 ok = expand_vec_perm_1 (&dcopy);
50916 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
50917
50918 return ok;
50919 }
50920
50921 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
50922 the permutation using the SSE4_1 pblendv instruction. Potentially
50923 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
50924
50925 static bool
50926 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
50927 {
50928 unsigned i, which, nelt = d->nelt;
50929 struct expand_vec_perm_d dcopy, dcopy1;
50930 machine_mode vmode = d->vmode;
50931 bool ok;
50932
50933 /* Use the same checks as in expand_vec_perm_blend. */
50934 if (d->one_operand_p)
50935 return false;
50936 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
50937 ;
50938 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
50939 ;
50940 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
50941 ;
50942 else
50943 return false;
50944
50945 /* Figure out where permutation elements stay not in their
50946 respective lanes. */
50947 for (i = 0, which = 0; i < nelt; ++i)
50948 {
50949 unsigned e = d->perm[i];
50950 if (e != i)
50951 which |= (e < nelt ? 1 : 2);
50952 }
50953 /* We can pblend the part where elements stay not in their
50954 respective lanes only when these elements are all in one
50955 half of a permutation.
50956 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
50957 lanes, but both 8 and 9 >= 8
50958 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
50959 respective lanes and 8 >= 8, but 2 not. */
50960 if (which != 1 && which != 2)
50961 return false;
50962 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
50963 return true;
50964
50965 /* First we apply one operand permutation to the part where
50966 elements stay not in their respective lanes. */
50967 dcopy = *d;
50968 if (which == 2)
50969 dcopy.op0 = dcopy.op1 = d->op1;
50970 else
50971 dcopy.op0 = dcopy.op1 = d->op0;
50972 if (!d->testing_p)
50973 dcopy.target = gen_reg_rtx (vmode);
50974 dcopy.one_operand_p = true;
50975
50976 for (i = 0; i < nelt; ++i)
50977 dcopy.perm[i] = d->perm[i] & (nelt - 1);
50978
50979 ok = expand_vec_perm_1 (&dcopy);
50980 if (GET_MODE_SIZE (vmode) != 16 && !ok)
50981 return false;
50982 else
50983 gcc_assert (ok);
50984 if (d->testing_p)
50985 return true;
50986
50987 /* Next we put permuted elements into their positions. */
50988 dcopy1 = *d;
50989 if (which == 2)
50990 dcopy1.op1 = dcopy.target;
50991 else
50992 dcopy1.op0 = dcopy.target;
50993
50994 for (i = 0; i < nelt; ++i)
50995 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
50996
50997 ok = expand_vec_perm_blend (&dcopy1);
50998 gcc_assert (ok);
50999
51000 return true;
51001 }
51002
51003 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
51004
51005 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
51006 a two vector permutation into a single vector permutation by using
51007 an interleave operation to merge the vectors. */
51008
51009 static bool
51010 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
51011 {
51012 struct expand_vec_perm_d dremap, dfinal;
51013 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
51014 unsigned HOST_WIDE_INT contents;
51015 unsigned char remap[2 * MAX_VECT_LEN];
51016 rtx_insn *seq;
51017 bool ok, same_halves = false;
51018
51019 if (GET_MODE_SIZE (d->vmode) == 16)
51020 {
51021 if (d->one_operand_p)
51022 return false;
51023 }
51024 else if (GET_MODE_SIZE (d->vmode) == 32)
51025 {
51026 if (!TARGET_AVX)
51027 return false;
51028 /* For 32-byte modes allow even d->one_operand_p.
51029 The lack of cross-lane shuffling in some instructions
51030 might prevent a single insn shuffle. */
51031 dfinal = *d;
51032 dfinal.testing_p = true;
51033 /* If expand_vec_perm_interleave3 can expand this into
51034 a 3 insn sequence, give up and let it be expanded as
51035 3 insn sequence. While that is one insn longer,
51036 it doesn't need a memory operand and in the common
51037 case that both interleave low and high permutations
51038 with the same operands are adjacent needs 4 insns
51039 for both after CSE. */
51040 if (expand_vec_perm_interleave3 (&dfinal))
51041 return false;
51042 }
51043 else
51044 return false;
51045
51046 /* Examine from whence the elements come. */
51047 contents = 0;
51048 for (i = 0; i < nelt; ++i)
51049 contents |= HOST_WIDE_INT_1U << d->perm[i];
51050
51051 memset (remap, 0xff, sizeof (remap));
51052 dremap = *d;
51053
51054 if (GET_MODE_SIZE (d->vmode) == 16)
51055 {
51056 unsigned HOST_WIDE_INT h1, h2, h3, h4;
51057
51058 /* Split the two input vectors into 4 halves. */
51059 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
51060 h2 = h1 << nelt2;
51061 h3 = h2 << nelt2;
51062 h4 = h3 << nelt2;
51063
51064 /* If the elements from the low halves use interleave low, and similarly
51065 for interleave high. If the elements are from mis-matched halves, we
51066 can use shufps for V4SF/V4SI or do a DImode shuffle. */
51067 if ((contents & (h1 | h3)) == contents)
51068 {
51069 /* punpckl* */
51070 for (i = 0; i < nelt2; ++i)
51071 {
51072 remap[i] = i * 2;
51073 remap[i + nelt] = i * 2 + 1;
51074 dremap.perm[i * 2] = i;
51075 dremap.perm[i * 2 + 1] = i + nelt;
51076 }
51077 if (!TARGET_SSE2 && d->vmode == V4SImode)
51078 dremap.vmode = V4SFmode;
51079 }
51080 else if ((contents & (h2 | h4)) == contents)
51081 {
51082 /* punpckh* */
51083 for (i = 0; i < nelt2; ++i)
51084 {
51085 remap[i + nelt2] = i * 2;
51086 remap[i + nelt + nelt2] = i * 2 + 1;
51087 dremap.perm[i * 2] = i + nelt2;
51088 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
51089 }
51090 if (!TARGET_SSE2 && d->vmode == V4SImode)
51091 dremap.vmode = V4SFmode;
51092 }
51093 else if ((contents & (h1 | h4)) == contents)
51094 {
51095 /* shufps */
51096 for (i = 0; i < nelt2; ++i)
51097 {
51098 remap[i] = i;
51099 remap[i + nelt + nelt2] = i + nelt2;
51100 dremap.perm[i] = i;
51101 dremap.perm[i + nelt2] = i + nelt + nelt2;
51102 }
51103 if (nelt != 4)
51104 {
51105 /* shufpd */
51106 dremap.vmode = V2DImode;
51107 dremap.nelt = 2;
51108 dremap.perm[0] = 0;
51109 dremap.perm[1] = 3;
51110 }
51111 }
51112 else if ((contents & (h2 | h3)) == contents)
51113 {
51114 /* shufps */
51115 for (i = 0; i < nelt2; ++i)
51116 {
51117 remap[i + nelt2] = i;
51118 remap[i + nelt] = i + nelt2;
51119 dremap.perm[i] = i + nelt2;
51120 dremap.perm[i + nelt2] = i + nelt;
51121 }
51122 if (nelt != 4)
51123 {
51124 /* shufpd */
51125 dremap.vmode = V2DImode;
51126 dremap.nelt = 2;
51127 dremap.perm[0] = 1;
51128 dremap.perm[1] = 2;
51129 }
51130 }
51131 else
51132 return false;
51133 }
51134 else
51135 {
51136 unsigned int nelt4 = nelt / 4, nzcnt = 0;
51137 unsigned HOST_WIDE_INT q[8];
51138 unsigned int nonzero_halves[4];
51139
51140 /* Split the two input vectors into 8 quarters. */
51141 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
51142 for (i = 1; i < 8; ++i)
51143 q[i] = q[0] << (nelt4 * i);
51144 for (i = 0; i < 4; ++i)
51145 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
51146 {
51147 nonzero_halves[nzcnt] = i;
51148 ++nzcnt;
51149 }
51150
51151 if (nzcnt == 1)
51152 {
51153 gcc_assert (d->one_operand_p);
51154 nonzero_halves[1] = nonzero_halves[0];
51155 same_halves = true;
51156 }
51157 else if (d->one_operand_p)
51158 {
51159 gcc_assert (nonzero_halves[0] == 0);
51160 gcc_assert (nonzero_halves[1] == 1);
51161 }
51162
51163 if (nzcnt <= 2)
51164 {
51165 if (d->perm[0] / nelt2 == nonzero_halves[1])
51166 {
51167 /* Attempt to increase the likelihood that dfinal
51168 shuffle will be intra-lane. */
51169 std::swap (nonzero_halves[0], nonzero_halves[1]);
51170 }
51171
51172 /* vperm2f128 or vperm2i128. */
51173 for (i = 0; i < nelt2; ++i)
51174 {
51175 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
51176 remap[i + nonzero_halves[0] * nelt2] = i;
51177 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
51178 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
51179 }
51180
51181 if (d->vmode != V8SFmode
51182 && d->vmode != V4DFmode
51183 && d->vmode != V8SImode)
51184 {
51185 dremap.vmode = V8SImode;
51186 dremap.nelt = 8;
51187 for (i = 0; i < 4; ++i)
51188 {
51189 dremap.perm[i] = i + nonzero_halves[0] * 4;
51190 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
51191 }
51192 }
51193 }
51194 else if (d->one_operand_p)
51195 return false;
51196 else if (TARGET_AVX2
51197 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
51198 {
51199 /* vpunpckl* */
51200 for (i = 0; i < nelt4; ++i)
51201 {
51202 remap[i] = i * 2;
51203 remap[i + nelt] = i * 2 + 1;
51204 remap[i + nelt2] = i * 2 + nelt2;
51205 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
51206 dremap.perm[i * 2] = i;
51207 dremap.perm[i * 2 + 1] = i + nelt;
51208 dremap.perm[i * 2 + nelt2] = i + nelt2;
51209 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
51210 }
51211 }
51212 else if (TARGET_AVX2
51213 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
51214 {
51215 /* vpunpckh* */
51216 for (i = 0; i < nelt4; ++i)
51217 {
51218 remap[i + nelt4] = i * 2;
51219 remap[i + nelt + nelt4] = i * 2 + 1;
51220 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
51221 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
51222 dremap.perm[i * 2] = i + nelt4;
51223 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
51224 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
51225 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
51226 }
51227 }
51228 else
51229 return false;
51230 }
51231
51232 /* Use the remapping array set up above to move the elements from their
51233 swizzled locations into their final destinations. */
51234 dfinal = *d;
51235 for (i = 0; i < nelt; ++i)
51236 {
51237 unsigned e = remap[d->perm[i]];
51238 gcc_assert (e < nelt);
51239 /* If same_halves is true, both halves of the remapped vector are the
51240 same. Avoid cross-lane accesses if possible. */
51241 if (same_halves && i >= nelt2)
51242 {
51243 gcc_assert (e < nelt2);
51244 dfinal.perm[i] = e + nelt2;
51245 }
51246 else
51247 dfinal.perm[i] = e;
51248 }
51249 if (!d->testing_p)
51250 {
51251 dremap.target = gen_reg_rtx (dremap.vmode);
51252 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
51253 }
51254 dfinal.op1 = dfinal.op0;
51255 dfinal.one_operand_p = true;
51256
51257 /* Test if the final remap can be done with a single insn. For V4SFmode or
51258 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
51259 start_sequence ();
51260 ok = expand_vec_perm_1 (&dfinal);
51261 seq = get_insns ();
51262 end_sequence ();
51263
51264 if (!ok)
51265 return false;
51266
51267 if (d->testing_p)
51268 return true;
51269
51270 if (dremap.vmode != dfinal.vmode)
51271 {
51272 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
51273 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
51274 }
51275
51276 ok = expand_vec_perm_1 (&dremap);
51277 gcc_assert (ok);
51278
51279 emit_insn (seq);
51280 return true;
51281 }
51282
51283 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
51284 a single vector cross-lane permutation into vpermq followed
51285 by any of the single insn permutations. */
51286
51287 static bool
51288 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
51289 {
51290 struct expand_vec_perm_d dremap, dfinal;
51291 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
51292 unsigned contents[2];
51293 bool ok;
51294
51295 if (!(TARGET_AVX2
51296 && (d->vmode == V32QImode || d->vmode == V16HImode)
51297 && d->one_operand_p))
51298 return false;
51299
51300 contents[0] = 0;
51301 contents[1] = 0;
51302 for (i = 0; i < nelt2; ++i)
51303 {
51304 contents[0] |= 1u << (d->perm[i] / nelt4);
51305 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
51306 }
51307
51308 for (i = 0; i < 2; ++i)
51309 {
51310 unsigned int cnt = 0;
51311 for (j = 0; j < 4; ++j)
51312 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
51313 return false;
51314 }
51315
51316 if (d->testing_p)
51317 return true;
51318
51319 dremap = *d;
51320 dremap.vmode = V4DImode;
51321 dremap.nelt = 4;
51322 dremap.target = gen_reg_rtx (V4DImode);
51323 dremap.op0 = gen_lowpart (V4DImode, d->op0);
51324 dremap.op1 = dremap.op0;
51325 dremap.one_operand_p = true;
51326 for (i = 0; i < 2; ++i)
51327 {
51328 unsigned int cnt = 0;
51329 for (j = 0; j < 4; ++j)
51330 if ((contents[i] & (1u << j)) != 0)
51331 dremap.perm[2 * i + cnt++] = j;
51332 for (; cnt < 2; ++cnt)
51333 dremap.perm[2 * i + cnt] = 0;
51334 }
51335
51336 dfinal = *d;
51337 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
51338 dfinal.op1 = dfinal.op0;
51339 dfinal.one_operand_p = true;
51340 for (i = 0, j = 0; i < nelt; ++i)
51341 {
51342 if (i == nelt2)
51343 j = 2;
51344 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
51345 if ((d->perm[i] / nelt4) == dremap.perm[j])
51346 ;
51347 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
51348 dfinal.perm[i] |= nelt4;
51349 else
51350 gcc_unreachable ();
51351 }
51352
51353 ok = expand_vec_perm_1 (&dremap);
51354 gcc_assert (ok);
51355
51356 ok = expand_vec_perm_1 (&dfinal);
51357 gcc_assert (ok);
51358
51359 return true;
51360 }
51361
51362 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
51363 a vector permutation using two instructions, vperm2f128 resp.
51364 vperm2i128 followed by any single in-lane permutation. */
51365
51366 static bool
51367 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
51368 {
51369 struct expand_vec_perm_d dfirst, dsecond;
51370 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
51371 bool ok;
51372
51373 if (!TARGET_AVX
51374 || GET_MODE_SIZE (d->vmode) != 32
51375 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
51376 return false;
51377
51378 dsecond = *d;
51379 dsecond.one_operand_p = false;
51380 dsecond.testing_p = true;
51381
51382 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
51383 immediate. For perm < 16 the second permutation uses
51384 d->op0 as first operand, for perm >= 16 it uses d->op1
51385 as first operand. The second operand is the result of
51386 vperm2[fi]128. */
51387 for (perm = 0; perm < 32; perm++)
51388 {
51389 /* Ignore permutations which do not move anything cross-lane. */
51390 if (perm < 16)
51391 {
51392 /* The second shuffle for e.g. V4DFmode has
51393 0123 and ABCD operands.
51394 Ignore AB23, as 23 is already in the second lane
51395 of the first operand. */
51396 if ((perm & 0xc) == (1 << 2)) continue;
51397 /* And 01CD, as 01 is in the first lane of the first
51398 operand. */
51399 if ((perm & 3) == 0) continue;
51400 /* And 4567, as then the vperm2[fi]128 doesn't change
51401 anything on the original 4567 second operand. */
51402 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
51403 }
51404 else
51405 {
51406 /* The second shuffle for e.g. V4DFmode has
51407 4567 and ABCD operands.
51408 Ignore AB67, as 67 is already in the second lane
51409 of the first operand. */
51410 if ((perm & 0xc) == (3 << 2)) continue;
51411 /* And 45CD, as 45 is in the first lane of the first
51412 operand. */
51413 if ((perm & 3) == 2) continue;
51414 /* And 0123, as then the vperm2[fi]128 doesn't change
51415 anything on the original 0123 first operand. */
51416 if ((perm & 0xf) == (1 << 2)) continue;
51417 }
51418
51419 for (i = 0; i < nelt; i++)
51420 {
51421 j = d->perm[i] / nelt2;
51422 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
51423 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
51424 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
51425 dsecond.perm[i] = d->perm[i] & (nelt - 1);
51426 else
51427 break;
51428 }
51429
51430 if (i == nelt)
51431 {
51432 start_sequence ();
51433 ok = expand_vec_perm_1 (&dsecond);
51434 end_sequence ();
51435 }
51436 else
51437 ok = false;
51438
51439 if (ok)
51440 {
51441 if (d->testing_p)
51442 return true;
51443
51444 /* Found a usable second shuffle. dfirst will be
51445 vperm2f128 on d->op0 and d->op1. */
51446 dsecond.testing_p = false;
51447 dfirst = *d;
51448 dfirst.target = gen_reg_rtx (d->vmode);
51449 for (i = 0; i < nelt; i++)
51450 dfirst.perm[i] = (i & (nelt2 - 1))
51451 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
51452
51453 canonicalize_perm (&dfirst);
51454 ok = expand_vec_perm_1 (&dfirst);
51455 gcc_assert (ok);
51456
51457 /* And dsecond is some single insn shuffle, taking
51458 d->op0 and result of vperm2f128 (if perm < 16) or
51459 d->op1 and result of vperm2f128 (otherwise). */
51460 if (perm >= 16)
51461 dsecond.op0 = dsecond.op1;
51462 dsecond.op1 = dfirst.target;
51463
51464 ok = expand_vec_perm_1 (&dsecond);
51465 gcc_assert (ok);
51466
51467 return true;
51468 }
51469
51470 /* For one operand, the only useful vperm2f128 permutation is 0x01
51471 aka lanes swap. */
51472 if (d->one_operand_p)
51473 return false;
51474 }
51475
51476 return false;
51477 }
51478
51479 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
51480 a two vector permutation using 2 intra-lane interleave insns
51481 and cross-lane shuffle for 32-byte vectors. */
51482
51483 static bool
51484 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
51485 {
51486 unsigned i, nelt;
51487 rtx (*gen) (rtx, rtx, rtx);
51488
51489 if (d->one_operand_p)
51490 return false;
51491 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
51492 ;
51493 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
51494 ;
51495 else
51496 return false;
51497
51498 nelt = d->nelt;
51499 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
51500 return false;
51501 for (i = 0; i < nelt; i += 2)
51502 if (d->perm[i] != d->perm[0] + i / 2
51503 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
51504 return false;
51505
51506 if (d->testing_p)
51507 return true;
51508
51509 switch (d->vmode)
51510 {
51511 case V32QImode:
51512 if (d->perm[0])
51513 gen = gen_vec_interleave_highv32qi;
51514 else
51515 gen = gen_vec_interleave_lowv32qi;
51516 break;
51517 case V16HImode:
51518 if (d->perm[0])
51519 gen = gen_vec_interleave_highv16hi;
51520 else
51521 gen = gen_vec_interleave_lowv16hi;
51522 break;
51523 case V8SImode:
51524 if (d->perm[0])
51525 gen = gen_vec_interleave_highv8si;
51526 else
51527 gen = gen_vec_interleave_lowv8si;
51528 break;
51529 case V4DImode:
51530 if (d->perm[0])
51531 gen = gen_vec_interleave_highv4di;
51532 else
51533 gen = gen_vec_interleave_lowv4di;
51534 break;
51535 case V8SFmode:
51536 if (d->perm[0])
51537 gen = gen_vec_interleave_highv8sf;
51538 else
51539 gen = gen_vec_interleave_lowv8sf;
51540 break;
51541 case V4DFmode:
51542 if (d->perm[0])
51543 gen = gen_vec_interleave_highv4df;
51544 else
51545 gen = gen_vec_interleave_lowv4df;
51546 break;
51547 default:
51548 gcc_unreachable ();
51549 }
51550
51551 emit_insn (gen (d->target, d->op0, d->op1));
51552 return true;
51553 }
51554
51555 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
51556 a single vector permutation using a single intra-lane vector
51557 permutation, vperm2f128 swapping the lanes and vblend* insn blending
51558 the non-swapped and swapped vectors together. */
51559
51560 static bool
51561 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
51562 {
51563 struct expand_vec_perm_d dfirst, dsecond;
51564 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
51565 rtx_insn *seq;
51566 bool ok;
51567 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
51568
51569 if (!TARGET_AVX
51570 || TARGET_AVX2
51571 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
51572 || !d->one_operand_p)
51573 return false;
51574
51575 dfirst = *d;
51576 for (i = 0; i < nelt; i++)
51577 dfirst.perm[i] = 0xff;
51578 for (i = 0, msk = 0; i < nelt; i++)
51579 {
51580 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
51581 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
51582 return false;
51583 dfirst.perm[j] = d->perm[i];
51584 if (j != i)
51585 msk |= (1 << i);
51586 }
51587 for (i = 0; i < nelt; i++)
51588 if (dfirst.perm[i] == 0xff)
51589 dfirst.perm[i] = i;
51590
51591 if (!d->testing_p)
51592 dfirst.target = gen_reg_rtx (dfirst.vmode);
51593
51594 start_sequence ();
51595 ok = expand_vec_perm_1 (&dfirst);
51596 seq = get_insns ();
51597 end_sequence ();
51598
51599 if (!ok)
51600 return false;
51601
51602 if (d->testing_p)
51603 return true;
51604
51605 emit_insn (seq);
51606
51607 dsecond = *d;
51608 dsecond.op0 = dfirst.target;
51609 dsecond.op1 = dfirst.target;
51610 dsecond.one_operand_p = true;
51611 dsecond.target = gen_reg_rtx (dsecond.vmode);
51612 for (i = 0; i < nelt; i++)
51613 dsecond.perm[i] = i ^ nelt2;
51614
51615 ok = expand_vec_perm_1 (&dsecond);
51616 gcc_assert (ok);
51617
51618 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
51619 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
51620 return true;
51621 }
51622
51623 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
51624 permutation using two vperm2f128, followed by a vshufpd insn blending
51625 the two vectors together. */
51626
51627 static bool
51628 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
51629 {
51630 struct expand_vec_perm_d dfirst, dsecond, dthird;
51631 bool ok;
51632
51633 if (!TARGET_AVX || (d->vmode != V4DFmode))
51634 return false;
51635
51636 if (d->testing_p)
51637 return true;
51638
51639 dfirst = *d;
51640 dsecond = *d;
51641 dthird = *d;
51642
51643 dfirst.perm[0] = (d->perm[0] & ~1);
51644 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
51645 dfirst.perm[2] = (d->perm[2] & ~1);
51646 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
51647 dsecond.perm[0] = (d->perm[1] & ~1);
51648 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
51649 dsecond.perm[2] = (d->perm[3] & ~1);
51650 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
51651 dthird.perm[0] = (d->perm[0] % 2);
51652 dthird.perm[1] = (d->perm[1] % 2) + 4;
51653 dthird.perm[2] = (d->perm[2] % 2) + 2;
51654 dthird.perm[3] = (d->perm[3] % 2) + 6;
51655
51656 dfirst.target = gen_reg_rtx (dfirst.vmode);
51657 dsecond.target = gen_reg_rtx (dsecond.vmode);
51658 dthird.op0 = dfirst.target;
51659 dthird.op1 = dsecond.target;
51660 dthird.one_operand_p = false;
51661
51662 canonicalize_perm (&dfirst);
51663 canonicalize_perm (&dsecond);
51664
51665 ok = expand_vec_perm_1 (&dfirst)
51666 && expand_vec_perm_1 (&dsecond)
51667 && expand_vec_perm_1 (&dthird);
51668
51669 gcc_assert (ok);
51670
51671 return true;
51672 }
51673
51674 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
51675 permutation with two pshufb insns and an ior. We should have already
51676 failed all two instruction sequences. */
51677
51678 static bool
51679 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
51680 {
51681 rtx rperm[2][16], vperm, l, h, op, m128;
51682 unsigned int i, nelt, eltsz;
51683
51684 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
51685 return false;
51686 gcc_assert (!d->one_operand_p);
51687
51688 if (d->testing_p)
51689 return true;
51690
51691 nelt = d->nelt;
51692 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51693
51694 /* Generate two permutation masks. If the required element is within
51695 the given vector it is shuffled into the proper lane. If the required
51696 element is in the other vector, force a zero into the lane by setting
51697 bit 7 in the permutation mask. */
51698 m128 = GEN_INT (-128);
51699 for (i = 0; i < nelt; ++i)
51700 {
51701 unsigned j, e = d->perm[i];
51702 unsigned which = (e >= nelt);
51703 if (e >= nelt)
51704 e -= nelt;
51705
51706 for (j = 0; j < eltsz; ++j)
51707 {
51708 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
51709 rperm[1-which][i*eltsz + j] = m128;
51710 }
51711 }
51712
51713 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
51714 vperm = force_reg (V16QImode, vperm);
51715
51716 l = gen_reg_rtx (V16QImode);
51717 op = gen_lowpart (V16QImode, d->op0);
51718 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
51719
51720 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
51721 vperm = force_reg (V16QImode, vperm);
51722
51723 h = gen_reg_rtx (V16QImode);
51724 op = gen_lowpart (V16QImode, d->op1);
51725 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
51726
51727 op = d->target;
51728 if (d->vmode != V16QImode)
51729 op = gen_reg_rtx (V16QImode);
51730 emit_insn (gen_iorv16qi3 (op, l, h));
51731 if (op != d->target)
51732 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51733
51734 return true;
51735 }
51736
51737 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
51738 with two vpshufb insns, vpermq and vpor. We should have already failed
51739 all two or three instruction sequences. */
51740
51741 static bool
51742 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
51743 {
51744 rtx rperm[2][32], vperm, l, h, hp, op, m128;
51745 unsigned int i, nelt, eltsz;
51746
51747 if (!TARGET_AVX2
51748 || !d->one_operand_p
51749 || (d->vmode != V32QImode && d->vmode != V16HImode))
51750 return false;
51751
51752 if (d->testing_p)
51753 return true;
51754
51755 nelt = d->nelt;
51756 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51757
51758 /* Generate two permutation masks. If the required element is within
51759 the same lane, it is shuffled in. If the required element from the
51760 other lane, force a zero by setting bit 7 in the permutation mask.
51761 In the other mask the mask has non-negative elements if element
51762 is requested from the other lane, but also moved to the other lane,
51763 so that the result of vpshufb can have the two V2TImode halves
51764 swapped. */
51765 m128 = GEN_INT (-128);
51766 for (i = 0; i < nelt; ++i)
51767 {
51768 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51769 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51770
51771 for (j = 0; j < eltsz; ++j)
51772 {
51773 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
51774 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
51775 }
51776 }
51777
51778 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
51779 vperm = force_reg (V32QImode, vperm);
51780
51781 h = gen_reg_rtx (V32QImode);
51782 op = gen_lowpart (V32QImode, d->op0);
51783 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
51784
51785 /* Swap the 128-byte lanes of h into hp. */
51786 hp = gen_reg_rtx (V4DImode);
51787 op = gen_lowpart (V4DImode, h);
51788 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
51789 const1_rtx));
51790
51791 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
51792 vperm = force_reg (V32QImode, vperm);
51793
51794 l = gen_reg_rtx (V32QImode);
51795 op = gen_lowpart (V32QImode, d->op0);
51796 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
51797
51798 op = d->target;
51799 if (d->vmode != V32QImode)
51800 op = gen_reg_rtx (V32QImode);
51801 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
51802 if (op != d->target)
51803 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51804
51805 return true;
51806 }
51807
51808 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
51809 and extract-odd permutations of two V32QImode and V16QImode operand
51810 with two vpshufb insns, vpor and vpermq. We should have already
51811 failed all two or three instruction sequences. */
51812
51813 static bool
51814 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
51815 {
51816 rtx rperm[2][32], vperm, l, h, ior, op, m128;
51817 unsigned int i, nelt, eltsz;
51818
51819 if (!TARGET_AVX2
51820 || d->one_operand_p
51821 || (d->vmode != V32QImode && d->vmode != V16HImode))
51822 return false;
51823
51824 for (i = 0; i < d->nelt; ++i)
51825 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
51826 return false;
51827
51828 if (d->testing_p)
51829 return true;
51830
51831 nelt = d->nelt;
51832 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51833
51834 /* Generate two permutation masks. In the first permutation mask
51835 the first quarter will contain indexes for the first half
51836 of the op0, the second quarter will contain bit 7 set, third quarter
51837 will contain indexes for the second half of the op0 and the
51838 last quarter bit 7 set. In the second permutation mask
51839 the first quarter will contain bit 7 set, the second quarter
51840 indexes for the first half of the op1, the third quarter bit 7 set
51841 and last quarter indexes for the second half of the op1.
51842 I.e. the first mask e.g. for V32QImode extract even will be:
51843 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
51844 (all values masked with 0xf except for -128) and second mask
51845 for extract even will be
51846 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
51847 m128 = GEN_INT (-128);
51848 for (i = 0; i < nelt; ++i)
51849 {
51850 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51851 unsigned which = d->perm[i] >= nelt;
51852 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
51853
51854 for (j = 0; j < eltsz; ++j)
51855 {
51856 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
51857 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
51858 }
51859 }
51860
51861 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
51862 vperm = force_reg (V32QImode, vperm);
51863
51864 l = gen_reg_rtx (V32QImode);
51865 op = gen_lowpart (V32QImode, d->op0);
51866 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
51867
51868 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
51869 vperm = force_reg (V32QImode, vperm);
51870
51871 h = gen_reg_rtx (V32QImode);
51872 op = gen_lowpart (V32QImode, d->op1);
51873 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
51874
51875 ior = gen_reg_rtx (V32QImode);
51876 emit_insn (gen_iorv32qi3 (ior, l, h));
51877
51878 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
51879 op = gen_reg_rtx (V4DImode);
51880 ior = gen_lowpart (V4DImode, ior);
51881 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
51882 const1_rtx, GEN_INT (3)));
51883 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51884
51885 return true;
51886 }
51887
51888 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
51889 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
51890 with two "and" and "pack" or two "shift" and "pack" insns. We should
51891 have already failed all two instruction sequences. */
51892
51893 static bool
51894 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
51895 {
51896 rtx op, dop0, dop1, t, rperm[16];
51897 unsigned i, odd, c, s, nelt = d->nelt;
51898 bool end_perm = false;
51899 machine_mode half_mode;
51900 rtx (*gen_and) (rtx, rtx, rtx);
51901 rtx (*gen_pack) (rtx, rtx, rtx);
51902 rtx (*gen_shift) (rtx, rtx, rtx);
51903
51904 if (d->one_operand_p)
51905 return false;
51906
51907 switch (d->vmode)
51908 {
51909 case V8HImode:
51910 /* Required for "pack". */
51911 if (!TARGET_SSE4_1)
51912 return false;
51913 c = 0xffff;
51914 s = 16;
51915 half_mode = V4SImode;
51916 gen_and = gen_andv4si3;
51917 gen_pack = gen_sse4_1_packusdw;
51918 gen_shift = gen_lshrv4si3;
51919 break;
51920 case V16QImode:
51921 /* No check as all instructions are SSE2. */
51922 c = 0xff;
51923 s = 8;
51924 half_mode = V8HImode;
51925 gen_and = gen_andv8hi3;
51926 gen_pack = gen_sse2_packuswb;
51927 gen_shift = gen_lshrv8hi3;
51928 break;
51929 case V16HImode:
51930 if (!TARGET_AVX2)
51931 return false;
51932 c = 0xffff;
51933 s = 16;
51934 half_mode = V8SImode;
51935 gen_and = gen_andv8si3;
51936 gen_pack = gen_avx2_packusdw;
51937 gen_shift = gen_lshrv8si3;
51938 end_perm = true;
51939 break;
51940 case V32QImode:
51941 if (!TARGET_AVX2)
51942 return false;
51943 c = 0xff;
51944 s = 8;
51945 half_mode = V16HImode;
51946 gen_and = gen_andv16hi3;
51947 gen_pack = gen_avx2_packuswb;
51948 gen_shift = gen_lshrv16hi3;
51949 end_perm = true;
51950 break;
51951 default:
51952 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
51953 general shuffles. */
51954 return false;
51955 }
51956
51957 /* Check that permutation is even or odd. */
51958 odd = d->perm[0];
51959 if (odd > 1)
51960 return false;
51961
51962 for (i = 1; i < nelt; ++i)
51963 if (d->perm[i] != 2 * i + odd)
51964 return false;
51965
51966 if (d->testing_p)
51967 return true;
51968
51969 dop0 = gen_reg_rtx (half_mode);
51970 dop1 = gen_reg_rtx (half_mode);
51971 if (odd == 0)
51972 {
51973 for (i = 0; i < nelt / 2; i++)
51974 rperm[i] = GEN_INT (c);
51975 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
51976 t = force_reg (half_mode, t);
51977 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
51978 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
51979 }
51980 else
51981 {
51982 emit_insn (gen_shift (dop0,
51983 gen_lowpart (half_mode, d->op0),
51984 GEN_INT (s)));
51985 emit_insn (gen_shift (dop1,
51986 gen_lowpart (half_mode, d->op1),
51987 GEN_INT (s)));
51988 }
51989 /* In AVX2 for 256 bit case we need to permute pack result. */
51990 if (TARGET_AVX2 && end_perm)
51991 {
51992 op = gen_reg_rtx (d->vmode);
51993 t = gen_reg_rtx (V4DImode);
51994 emit_insn (gen_pack (op, dop0, dop1));
51995 emit_insn (gen_avx2_permv4di_1 (t,
51996 gen_lowpart (V4DImode, op),
51997 const0_rtx,
51998 const2_rtx,
51999 const1_rtx,
52000 GEN_INT (3)));
52001 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
52002 }
52003 else
52004 emit_insn (gen_pack (d->target, dop0, dop1));
52005
52006 return true;
52007 }
52008
52009 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
52010 and extract-odd permutations of two V64QI operands
52011 with two "shifts", two "truncs" and one "concat" insns for "odd"
52012 and two "truncs" and one concat insn for "even."
52013 Have already failed all two instruction sequences. */
52014
52015 static bool
52016 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
52017 {
52018 rtx t1, t2, t3, t4;
52019 unsigned i, odd, nelt = d->nelt;
52020
52021 if (!TARGET_AVX512BW
52022 || d->one_operand_p
52023 || d->vmode != V64QImode)
52024 return false;
52025
52026 /* Check that permutation is even or odd. */
52027 odd = d->perm[0];
52028 if (odd > 1)
52029 return false;
52030
52031 for (i = 1; i < nelt; ++i)
52032 if (d->perm[i] != 2 * i + odd)
52033 return false;
52034
52035 if (d->testing_p)
52036 return true;
52037
52038
52039 if (odd)
52040 {
52041 t1 = gen_reg_rtx (V32HImode);
52042 t2 = gen_reg_rtx (V32HImode);
52043 emit_insn (gen_lshrv32hi3 (t1,
52044 gen_lowpart (V32HImode, d->op0),
52045 GEN_INT (8)));
52046 emit_insn (gen_lshrv32hi3 (t2,
52047 gen_lowpart (V32HImode, d->op1),
52048 GEN_INT (8)));
52049 }
52050 else
52051 {
52052 t1 = gen_lowpart (V32HImode, d->op0);
52053 t2 = gen_lowpart (V32HImode, d->op1);
52054 }
52055
52056 t3 = gen_reg_rtx (V32QImode);
52057 t4 = gen_reg_rtx (V32QImode);
52058 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
52059 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
52060 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
52061
52062 return true;
52063 }
52064
52065 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
52066 and extract-odd permutations. */
52067
52068 static bool
52069 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
52070 {
52071 rtx t1, t2, t3, t4, t5;
52072
52073 switch (d->vmode)
52074 {
52075 case V4DFmode:
52076 if (d->testing_p)
52077 break;
52078 t1 = gen_reg_rtx (V4DFmode);
52079 t2 = gen_reg_rtx (V4DFmode);
52080
52081 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
52082 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
52083 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
52084
52085 /* Now an unpck[lh]pd will produce the result required. */
52086 if (odd)
52087 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
52088 else
52089 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
52090 emit_insn (t3);
52091 break;
52092
52093 case V8SFmode:
52094 {
52095 int mask = odd ? 0xdd : 0x88;
52096
52097 if (d->testing_p)
52098 break;
52099 t1 = gen_reg_rtx (V8SFmode);
52100 t2 = gen_reg_rtx (V8SFmode);
52101 t3 = gen_reg_rtx (V8SFmode);
52102
52103 /* Shuffle within the 128-bit lanes to produce:
52104 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
52105 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
52106 GEN_INT (mask)));
52107
52108 /* Shuffle the lanes around to produce:
52109 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
52110 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
52111 GEN_INT (0x3)));
52112
52113 /* Shuffle within the 128-bit lanes to produce:
52114 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
52115 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
52116
52117 /* Shuffle within the 128-bit lanes to produce:
52118 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
52119 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
52120
52121 /* Shuffle the lanes around to produce:
52122 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
52123 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
52124 GEN_INT (0x20)));
52125 }
52126 break;
52127
52128 case V2DFmode:
52129 case V4SFmode:
52130 case V2DImode:
52131 case V4SImode:
52132 /* These are always directly implementable by expand_vec_perm_1. */
52133 gcc_unreachable ();
52134
52135 case V8HImode:
52136 if (TARGET_SSE4_1)
52137 return expand_vec_perm_even_odd_pack (d);
52138 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
52139 return expand_vec_perm_pshufb2 (d);
52140 else
52141 {
52142 if (d->testing_p)
52143 break;
52144 /* We need 2*log2(N)-1 operations to achieve odd/even
52145 with interleave. */
52146 t1 = gen_reg_rtx (V8HImode);
52147 t2 = gen_reg_rtx (V8HImode);
52148 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
52149 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
52150 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
52151 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
52152 if (odd)
52153 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
52154 else
52155 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
52156 emit_insn (t3);
52157 }
52158 break;
52159
52160 case V16QImode:
52161 return expand_vec_perm_even_odd_pack (d);
52162
52163 case V16HImode:
52164 case V32QImode:
52165 return expand_vec_perm_even_odd_pack (d);
52166
52167 case V64QImode:
52168 return expand_vec_perm_even_odd_trunc (d);
52169
52170 case V4DImode:
52171 if (!TARGET_AVX2)
52172 {
52173 struct expand_vec_perm_d d_copy = *d;
52174 d_copy.vmode = V4DFmode;
52175 if (d->testing_p)
52176 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
52177 else
52178 d_copy.target = gen_reg_rtx (V4DFmode);
52179 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
52180 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
52181 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
52182 {
52183 if (!d->testing_p)
52184 emit_move_insn (d->target,
52185 gen_lowpart (V4DImode, d_copy.target));
52186 return true;
52187 }
52188 return false;
52189 }
52190
52191 if (d->testing_p)
52192 break;
52193
52194 t1 = gen_reg_rtx (V4DImode);
52195 t2 = gen_reg_rtx (V4DImode);
52196
52197 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
52198 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
52199 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
52200
52201 /* Now an vpunpck[lh]qdq will produce the result required. */
52202 if (odd)
52203 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
52204 else
52205 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
52206 emit_insn (t3);
52207 break;
52208
52209 case V8SImode:
52210 if (!TARGET_AVX2)
52211 {
52212 struct expand_vec_perm_d d_copy = *d;
52213 d_copy.vmode = V8SFmode;
52214 if (d->testing_p)
52215 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
52216 else
52217 d_copy.target = gen_reg_rtx (V8SFmode);
52218 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
52219 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
52220 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
52221 {
52222 if (!d->testing_p)
52223 emit_move_insn (d->target,
52224 gen_lowpart (V8SImode, d_copy.target));
52225 return true;
52226 }
52227 return false;
52228 }
52229
52230 if (d->testing_p)
52231 break;
52232
52233 t1 = gen_reg_rtx (V8SImode);
52234 t2 = gen_reg_rtx (V8SImode);
52235 t3 = gen_reg_rtx (V4DImode);
52236 t4 = gen_reg_rtx (V4DImode);
52237 t5 = gen_reg_rtx (V4DImode);
52238
52239 /* Shuffle the lanes around into
52240 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
52241 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
52242 gen_lowpart (V4DImode, d->op1),
52243 GEN_INT (0x20)));
52244 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
52245 gen_lowpart (V4DImode, d->op1),
52246 GEN_INT (0x31)));
52247
52248 /* Swap the 2nd and 3rd position in each lane into
52249 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
52250 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
52251 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
52252 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
52253 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
52254
52255 /* Now an vpunpck[lh]qdq will produce
52256 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
52257 if (odd)
52258 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
52259 gen_lowpart (V4DImode, t2));
52260 else
52261 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
52262 gen_lowpart (V4DImode, t2));
52263 emit_insn (t3);
52264 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
52265 break;
52266
52267 default:
52268 gcc_unreachable ();
52269 }
52270
52271 return true;
52272 }
52273
52274 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
52275 extract-even and extract-odd permutations. */
52276
52277 static bool
52278 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
52279 {
52280 unsigned i, odd, nelt = d->nelt;
52281
52282 odd = d->perm[0];
52283 if (odd != 0 && odd != 1)
52284 return false;
52285
52286 for (i = 1; i < nelt; ++i)
52287 if (d->perm[i] != 2 * i + odd)
52288 return false;
52289
52290 return expand_vec_perm_even_odd_1 (d, odd);
52291 }
52292
52293 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
52294 permutations. We assume that expand_vec_perm_1 has already failed. */
52295
52296 static bool
52297 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
52298 {
52299 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
52300 machine_mode vmode = d->vmode;
52301 unsigned char perm2[4];
52302 rtx op0 = d->op0, dest;
52303 bool ok;
52304
52305 switch (vmode)
52306 {
52307 case V4DFmode:
52308 case V8SFmode:
52309 /* These are special-cased in sse.md so that we can optionally
52310 use the vbroadcast instruction. They expand to two insns
52311 if the input happens to be in a register. */
52312 gcc_unreachable ();
52313
52314 case V2DFmode:
52315 case V2DImode:
52316 case V4SFmode:
52317 case V4SImode:
52318 /* These are always implementable using standard shuffle patterns. */
52319 gcc_unreachable ();
52320
52321 case V8HImode:
52322 case V16QImode:
52323 /* These can be implemented via interleave. We save one insn by
52324 stopping once we have promoted to V4SImode and then use pshufd. */
52325 if (d->testing_p)
52326 return true;
52327 do
52328 {
52329 rtx dest;
52330 rtx (*gen) (rtx, rtx, rtx)
52331 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
52332 : gen_vec_interleave_lowv8hi;
52333
52334 if (elt >= nelt2)
52335 {
52336 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
52337 : gen_vec_interleave_highv8hi;
52338 elt -= nelt2;
52339 }
52340 nelt2 /= 2;
52341
52342 dest = gen_reg_rtx (vmode);
52343 emit_insn (gen (dest, op0, op0));
52344 vmode = get_mode_wider_vector (vmode);
52345 op0 = gen_lowpart (vmode, dest);
52346 }
52347 while (vmode != V4SImode);
52348
52349 memset (perm2, elt, 4);
52350 dest = gen_reg_rtx (V4SImode);
52351 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
52352 gcc_assert (ok);
52353 if (!d->testing_p)
52354 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
52355 return true;
52356
52357 case V64QImode:
52358 case V32QImode:
52359 case V16HImode:
52360 case V8SImode:
52361 case V4DImode:
52362 /* For AVX2 broadcasts of the first element vpbroadcast* or
52363 vpermq should be used by expand_vec_perm_1. */
52364 gcc_assert (!TARGET_AVX2 || d->perm[0]);
52365 return false;
52366
52367 default:
52368 gcc_unreachable ();
52369 }
52370 }
52371
52372 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
52373 broadcast permutations. */
52374
52375 static bool
52376 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
52377 {
52378 unsigned i, elt, nelt = d->nelt;
52379
52380 if (!d->one_operand_p)
52381 return false;
52382
52383 elt = d->perm[0];
52384 for (i = 1; i < nelt; ++i)
52385 if (d->perm[i] != elt)
52386 return false;
52387
52388 return expand_vec_perm_broadcast_1 (d);
52389 }
52390
52391 /* Implement arbitrary permutations of two V64QImode operands
52392 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
52393 static bool
52394 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
52395 {
52396 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
52397 return false;
52398
52399 if (d->testing_p)
52400 return true;
52401
52402 struct expand_vec_perm_d ds[2];
52403 rtx rperm[128], vperm, target0, target1;
52404 unsigned int i, nelt;
52405 machine_mode vmode;
52406
52407 nelt = d->nelt;
52408 vmode = V64QImode;
52409
52410 for (i = 0; i < 2; i++)
52411 {
52412 ds[i] = *d;
52413 ds[i].vmode = V32HImode;
52414 ds[i].nelt = 32;
52415 ds[i].target = gen_reg_rtx (V32HImode);
52416 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
52417 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
52418 }
52419
52420 /* Prepare permutations such that the first one takes care of
52421 putting the even bytes into the right positions or one higher
52422 positions (ds[0]) and the second one takes care of
52423 putting the odd bytes into the right positions or one below
52424 (ds[1]). */
52425
52426 for (i = 0; i < nelt; i++)
52427 {
52428 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
52429 if (i & 1)
52430 {
52431 rperm[i] = constm1_rtx;
52432 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
52433 }
52434 else
52435 {
52436 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
52437 rperm[i + 64] = constm1_rtx;
52438 }
52439 }
52440
52441 bool ok = expand_vec_perm_1 (&ds[0]);
52442 gcc_assert (ok);
52443 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
52444
52445 ok = expand_vec_perm_1 (&ds[1]);
52446 gcc_assert (ok);
52447 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
52448
52449 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
52450 vperm = force_reg (vmode, vperm);
52451 target0 = gen_reg_rtx (V64QImode);
52452 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
52453
52454 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
52455 vperm = force_reg (vmode, vperm);
52456 target1 = gen_reg_rtx (V64QImode);
52457 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
52458
52459 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
52460 return true;
52461 }
52462
52463 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
52464 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
52465 all the shorter instruction sequences. */
52466
52467 static bool
52468 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
52469 {
52470 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
52471 unsigned int i, nelt, eltsz;
52472 bool used[4];
52473
52474 if (!TARGET_AVX2
52475 || d->one_operand_p
52476 || (d->vmode != V32QImode && d->vmode != V16HImode))
52477 return false;
52478
52479 if (d->testing_p)
52480 return true;
52481
52482 nelt = d->nelt;
52483 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
52484
52485 /* Generate 4 permutation masks. If the required element is within
52486 the same lane, it is shuffled in. If the required element from the
52487 other lane, force a zero by setting bit 7 in the permutation mask.
52488 In the other mask the mask has non-negative elements if element
52489 is requested from the other lane, but also moved to the other lane,
52490 so that the result of vpshufb can have the two V2TImode halves
52491 swapped. */
52492 m128 = GEN_INT (-128);
52493 for (i = 0; i < 32; ++i)
52494 {
52495 rperm[0][i] = m128;
52496 rperm[1][i] = m128;
52497 rperm[2][i] = m128;
52498 rperm[3][i] = m128;
52499 }
52500 used[0] = false;
52501 used[1] = false;
52502 used[2] = false;
52503 used[3] = false;
52504 for (i = 0; i < nelt; ++i)
52505 {
52506 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
52507 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
52508 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
52509
52510 for (j = 0; j < eltsz; ++j)
52511 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
52512 used[which] = true;
52513 }
52514
52515 for (i = 0; i < 2; ++i)
52516 {
52517 if (!used[2 * i + 1])
52518 {
52519 h[i] = NULL_RTX;
52520 continue;
52521 }
52522 vperm = gen_rtx_CONST_VECTOR (V32QImode,
52523 gen_rtvec_v (32, rperm[2 * i + 1]));
52524 vperm = force_reg (V32QImode, vperm);
52525 h[i] = gen_reg_rtx (V32QImode);
52526 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
52527 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
52528 }
52529
52530 /* Swap the 128-byte lanes of h[X]. */
52531 for (i = 0; i < 2; ++i)
52532 {
52533 if (h[i] == NULL_RTX)
52534 continue;
52535 op = gen_reg_rtx (V4DImode);
52536 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
52537 const2_rtx, GEN_INT (3), const0_rtx,
52538 const1_rtx));
52539 h[i] = gen_lowpart (V32QImode, op);
52540 }
52541
52542 for (i = 0; i < 2; ++i)
52543 {
52544 if (!used[2 * i])
52545 {
52546 l[i] = NULL_RTX;
52547 continue;
52548 }
52549 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
52550 vperm = force_reg (V32QImode, vperm);
52551 l[i] = gen_reg_rtx (V32QImode);
52552 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
52553 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
52554 }
52555
52556 for (i = 0; i < 2; ++i)
52557 {
52558 if (h[i] && l[i])
52559 {
52560 op = gen_reg_rtx (V32QImode);
52561 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
52562 l[i] = op;
52563 }
52564 else if (h[i])
52565 l[i] = h[i];
52566 }
52567
52568 gcc_assert (l[0] && l[1]);
52569 op = d->target;
52570 if (d->vmode != V32QImode)
52571 op = gen_reg_rtx (V32QImode);
52572 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
52573 if (op != d->target)
52574 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
52575 return true;
52576 }
52577
52578 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
52579 With all of the interface bits taken care of, perform the expansion
52580 in D and return true on success. */
52581
52582 static bool
52583 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
52584 {
52585 /* Try a single instruction expansion. */
52586 if (expand_vec_perm_1 (d))
52587 return true;
52588
52589 /* Try sequences of two instructions. */
52590
52591 if (expand_vec_perm_pshuflw_pshufhw (d))
52592 return true;
52593
52594 if (expand_vec_perm_palignr (d, false))
52595 return true;
52596
52597 if (expand_vec_perm_interleave2 (d))
52598 return true;
52599
52600 if (expand_vec_perm_broadcast (d))
52601 return true;
52602
52603 if (expand_vec_perm_vpermq_perm_1 (d))
52604 return true;
52605
52606 if (expand_vec_perm_vperm2f128 (d))
52607 return true;
52608
52609 if (expand_vec_perm_pblendv (d))
52610 return true;
52611
52612 /* Try sequences of three instructions. */
52613
52614 if (expand_vec_perm_even_odd_pack (d))
52615 return true;
52616
52617 if (expand_vec_perm_2vperm2f128_vshuf (d))
52618 return true;
52619
52620 if (expand_vec_perm_pshufb2 (d))
52621 return true;
52622
52623 if (expand_vec_perm_interleave3 (d))
52624 return true;
52625
52626 if (expand_vec_perm_vperm2f128_vblend (d))
52627 return true;
52628
52629 /* Try sequences of four instructions. */
52630
52631 if (expand_vec_perm_even_odd_trunc (d))
52632 return true;
52633 if (expand_vec_perm_vpshufb2_vpermq (d))
52634 return true;
52635
52636 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
52637 return true;
52638
52639 if (expand_vec_perm_vpermi2_vpshub2 (d))
52640 return true;
52641
52642 /* ??? Look for narrow permutations whose element orderings would
52643 allow the promotion to a wider mode. */
52644
52645 /* ??? Look for sequences of interleave or a wider permute that place
52646 the data into the correct lanes for a half-vector shuffle like
52647 pshuf[lh]w or vpermilps. */
52648
52649 /* ??? Look for sequences of interleave that produce the desired results.
52650 The combinatorics of punpck[lh] get pretty ugly... */
52651
52652 if (expand_vec_perm_even_odd (d))
52653 return true;
52654
52655 /* Even longer sequences. */
52656 if (expand_vec_perm_vpshufb4_vpermq2 (d))
52657 return true;
52658
52659 /* See if we can get the same permutation in different vector integer
52660 mode. */
52661 struct expand_vec_perm_d nd;
52662 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
52663 {
52664 if (!d->testing_p)
52665 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
52666 return true;
52667 }
52668
52669 return false;
52670 }
52671
52672 /* If a permutation only uses one operand, make it clear. Returns true
52673 if the permutation references both operands. */
52674
52675 static bool
52676 canonicalize_perm (struct expand_vec_perm_d *d)
52677 {
52678 int i, which, nelt = d->nelt;
52679
52680 for (i = which = 0; i < nelt; ++i)
52681 which |= (d->perm[i] < nelt ? 1 : 2);
52682
52683 d->one_operand_p = true;
52684 switch (which)
52685 {
52686 default:
52687 gcc_unreachable();
52688
52689 case 3:
52690 if (!rtx_equal_p (d->op0, d->op1))
52691 {
52692 d->one_operand_p = false;
52693 break;
52694 }
52695 /* The elements of PERM do not suggest that only the first operand
52696 is used, but both operands are identical. Allow easier matching
52697 of the permutation by folding the permutation into the single
52698 input vector. */
52699 /* FALLTHRU */
52700
52701 case 2:
52702 for (i = 0; i < nelt; ++i)
52703 d->perm[i] &= nelt - 1;
52704 d->op0 = d->op1;
52705 break;
52706
52707 case 1:
52708 d->op1 = d->op0;
52709 break;
52710 }
52711
52712 return (which == 3);
52713 }
52714
52715 bool
52716 ix86_expand_vec_perm_const (rtx operands[4])
52717 {
52718 struct expand_vec_perm_d d;
52719 unsigned char perm[MAX_VECT_LEN];
52720 int i, nelt;
52721 bool two_args;
52722 rtx sel;
52723
52724 d.target = operands[0];
52725 d.op0 = operands[1];
52726 d.op1 = operands[2];
52727 sel = operands[3];
52728
52729 d.vmode = GET_MODE (d.target);
52730 gcc_assert (VECTOR_MODE_P (d.vmode));
52731 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
52732 d.testing_p = false;
52733
52734 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
52735 gcc_assert (XVECLEN (sel, 0) == nelt);
52736 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
52737
52738 for (i = 0; i < nelt; ++i)
52739 {
52740 rtx e = XVECEXP (sel, 0, i);
52741 int ei = INTVAL (e) & (2 * nelt - 1);
52742 d.perm[i] = ei;
52743 perm[i] = ei;
52744 }
52745
52746 two_args = canonicalize_perm (&d);
52747
52748 if (ix86_expand_vec_perm_const_1 (&d))
52749 return true;
52750
52751 /* If the selector says both arguments are needed, but the operands are the
52752 same, the above tried to expand with one_operand_p and flattened selector.
52753 If that didn't work, retry without one_operand_p; we succeeded with that
52754 during testing. */
52755 if (two_args && d.one_operand_p)
52756 {
52757 d.one_operand_p = false;
52758 memcpy (d.perm, perm, sizeof (perm));
52759 return ix86_expand_vec_perm_const_1 (&d);
52760 }
52761
52762 return false;
52763 }
52764
52765 /* Implement targetm.vectorize.vec_perm_const_ok. */
52766
52767 static bool
52768 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
52769 const unsigned char *sel)
52770 {
52771 struct expand_vec_perm_d d;
52772 unsigned int i, nelt, which;
52773 bool ret;
52774
52775 d.vmode = vmode;
52776 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
52777 d.testing_p = true;
52778
52779 /* Given sufficient ISA support we can just return true here
52780 for selected vector modes. */
52781 switch (d.vmode)
52782 {
52783 case V16SFmode:
52784 case V16SImode:
52785 case V8DImode:
52786 case V8DFmode:
52787 if (TARGET_AVX512F)
52788 /* All implementable with a single vpermi2 insn. */
52789 return true;
52790 break;
52791 case V32HImode:
52792 if (TARGET_AVX512BW)
52793 /* All implementable with a single vpermi2 insn. */
52794 return true;
52795 break;
52796 case V64QImode:
52797 if (TARGET_AVX512BW)
52798 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
52799 return true;
52800 break;
52801 case V8SImode:
52802 case V8SFmode:
52803 case V4DFmode:
52804 case V4DImode:
52805 if (TARGET_AVX512VL)
52806 /* All implementable with a single vpermi2 insn. */
52807 return true;
52808 break;
52809 case V16HImode:
52810 if (TARGET_AVX2)
52811 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
52812 return true;
52813 break;
52814 case V32QImode:
52815 if (TARGET_AVX2)
52816 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
52817 return true;
52818 break;
52819 case V4SImode:
52820 case V4SFmode:
52821 case V8HImode:
52822 case V16QImode:
52823 /* All implementable with a single vpperm insn. */
52824 if (TARGET_XOP)
52825 return true;
52826 /* All implementable with 2 pshufb + 1 ior. */
52827 if (TARGET_SSSE3)
52828 return true;
52829 break;
52830 case V2DImode:
52831 case V2DFmode:
52832 /* All implementable with shufpd or unpck[lh]pd. */
52833 return true;
52834 default:
52835 return false;
52836 }
52837
52838 /* Extract the values from the vector CST into the permutation
52839 array in D. */
52840 memcpy (d.perm, sel, nelt);
52841 for (i = which = 0; i < nelt; ++i)
52842 {
52843 unsigned char e = d.perm[i];
52844 gcc_assert (e < 2 * nelt);
52845 which |= (e < nelt ? 1 : 2);
52846 }
52847
52848 /* For all elements from second vector, fold the elements to first. */
52849 if (which == 2)
52850 for (i = 0; i < nelt; ++i)
52851 d.perm[i] -= nelt;
52852
52853 /* Check whether the mask can be applied to the vector type. */
52854 d.one_operand_p = (which != 3);
52855
52856 /* Implementable with shufps or pshufd. */
52857 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
52858 return true;
52859
52860 /* Otherwise we have to go through the motions and see if we can
52861 figure out how to generate the requested permutation. */
52862 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
52863 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
52864 if (!d.one_operand_p)
52865 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
52866
52867 start_sequence ();
52868 ret = ix86_expand_vec_perm_const_1 (&d);
52869 end_sequence ();
52870
52871 return ret;
52872 }
52873
52874 void
52875 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
52876 {
52877 struct expand_vec_perm_d d;
52878 unsigned i, nelt;
52879
52880 d.target = targ;
52881 d.op0 = op0;
52882 d.op1 = op1;
52883 d.vmode = GET_MODE (targ);
52884 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
52885 d.one_operand_p = false;
52886 d.testing_p = false;
52887
52888 for (i = 0; i < nelt; ++i)
52889 d.perm[i] = i * 2 + odd;
52890
52891 /* We'll either be able to implement the permutation directly... */
52892 if (expand_vec_perm_1 (&d))
52893 return;
52894
52895 /* ... or we use the special-case patterns. */
52896 expand_vec_perm_even_odd_1 (&d, odd);
52897 }
52898
52899 static void
52900 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
52901 {
52902 struct expand_vec_perm_d d;
52903 unsigned i, nelt, base;
52904 bool ok;
52905
52906 d.target = targ;
52907 d.op0 = op0;
52908 d.op1 = op1;
52909 d.vmode = GET_MODE (targ);
52910 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
52911 d.one_operand_p = false;
52912 d.testing_p = false;
52913
52914 base = high_p ? nelt / 2 : 0;
52915 for (i = 0; i < nelt / 2; ++i)
52916 {
52917 d.perm[i * 2] = i + base;
52918 d.perm[i * 2 + 1] = i + base + nelt;
52919 }
52920
52921 /* Note that for AVX this isn't one instruction. */
52922 ok = ix86_expand_vec_perm_const_1 (&d);
52923 gcc_assert (ok);
52924 }
52925
52926
52927 /* Expand a vector operation CODE for a V*QImode in terms of the
52928 same operation on V*HImode. */
52929
52930 void
52931 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
52932 {
52933 machine_mode qimode = GET_MODE (dest);
52934 machine_mode himode;
52935 rtx (*gen_il) (rtx, rtx, rtx);
52936 rtx (*gen_ih) (rtx, rtx, rtx);
52937 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
52938 struct expand_vec_perm_d d;
52939 bool ok, full_interleave;
52940 bool uns_p = false;
52941 int i;
52942
52943 switch (qimode)
52944 {
52945 case V16QImode:
52946 himode = V8HImode;
52947 gen_il = gen_vec_interleave_lowv16qi;
52948 gen_ih = gen_vec_interleave_highv16qi;
52949 break;
52950 case V32QImode:
52951 himode = V16HImode;
52952 gen_il = gen_avx2_interleave_lowv32qi;
52953 gen_ih = gen_avx2_interleave_highv32qi;
52954 break;
52955 case V64QImode:
52956 himode = V32HImode;
52957 gen_il = gen_avx512bw_interleave_lowv64qi;
52958 gen_ih = gen_avx512bw_interleave_highv64qi;
52959 break;
52960 default:
52961 gcc_unreachable ();
52962 }
52963
52964 op2_l = op2_h = op2;
52965 switch (code)
52966 {
52967 case MULT:
52968 /* Unpack data such that we've got a source byte in each low byte of
52969 each word. We don't care what goes into the high byte of each word.
52970 Rather than trying to get zero in there, most convenient is to let
52971 it be a copy of the low byte. */
52972 op2_l = gen_reg_rtx (qimode);
52973 op2_h = gen_reg_rtx (qimode);
52974 emit_insn (gen_il (op2_l, op2, op2));
52975 emit_insn (gen_ih (op2_h, op2, op2));
52976 /* FALLTHRU */
52977
52978 op1_l = gen_reg_rtx (qimode);
52979 op1_h = gen_reg_rtx (qimode);
52980 emit_insn (gen_il (op1_l, op1, op1));
52981 emit_insn (gen_ih (op1_h, op1, op1));
52982 full_interleave = qimode == V16QImode;
52983 break;
52984
52985 case ASHIFT:
52986 case LSHIFTRT:
52987 uns_p = true;
52988 /* FALLTHRU */
52989 case ASHIFTRT:
52990 op1_l = gen_reg_rtx (himode);
52991 op1_h = gen_reg_rtx (himode);
52992 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
52993 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
52994 full_interleave = true;
52995 break;
52996 default:
52997 gcc_unreachable ();
52998 }
52999
53000 /* Perform the operation. */
53001 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
53002 1, OPTAB_DIRECT);
53003 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
53004 1, OPTAB_DIRECT);
53005 gcc_assert (res_l && res_h);
53006
53007 /* Merge the data back into the right place. */
53008 d.target = dest;
53009 d.op0 = gen_lowpart (qimode, res_l);
53010 d.op1 = gen_lowpart (qimode, res_h);
53011 d.vmode = qimode;
53012 d.nelt = GET_MODE_NUNITS (qimode);
53013 d.one_operand_p = false;
53014 d.testing_p = false;
53015
53016 if (full_interleave)
53017 {
53018 /* For SSE2, we used an full interleave, so the desired
53019 results are in the even elements. */
53020 for (i = 0; i < d.nelt; ++i)
53021 d.perm[i] = i * 2;
53022 }
53023 else
53024 {
53025 /* For AVX, the interleave used above was not cross-lane. So the
53026 extraction is evens but with the second and third quarter swapped.
53027 Happily, that is even one insn shorter than even extraction.
53028 For AVX512BW we have 4 lanes. We extract evens from within a lane,
53029 always first from the first and then from the second source operand,
53030 the index bits above the low 4 bits remains the same.
53031 Thus, for d.nelt == 32 we want permutation
53032 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
53033 and for d.nelt == 64 we want permutation
53034 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
53035 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
53036 for (i = 0; i < d.nelt; ++i)
53037 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
53038 }
53039
53040 ok = ix86_expand_vec_perm_const_1 (&d);
53041 gcc_assert (ok);
53042
53043 set_unique_reg_note (get_last_insn (), REG_EQUAL,
53044 gen_rtx_fmt_ee (code, qimode, op1, op2));
53045 }
53046
53047 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
53048 if op is CONST_VECTOR with all odd elements equal to their
53049 preceding element. */
53050
53051 static bool
53052 const_vector_equal_evenodd_p (rtx op)
53053 {
53054 machine_mode mode = GET_MODE (op);
53055 int i, nunits = GET_MODE_NUNITS (mode);
53056 if (GET_CODE (op) != CONST_VECTOR
53057 || nunits != CONST_VECTOR_NUNITS (op))
53058 return false;
53059 for (i = 0; i < nunits; i += 2)
53060 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
53061 return false;
53062 return true;
53063 }
53064
53065 void
53066 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
53067 bool uns_p, bool odd_p)
53068 {
53069 machine_mode mode = GET_MODE (op1);
53070 machine_mode wmode = GET_MODE (dest);
53071 rtx x;
53072 rtx orig_op1 = op1, orig_op2 = op2;
53073
53074 if (!nonimmediate_operand (op1, mode))
53075 op1 = force_reg (mode, op1);
53076 if (!nonimmediate_operand (op2, mode))
53077 op2 = force_reg (mode, op2);
53078
53079 /* We only play even/odd games with vectors of SImode. */
53080 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
53081
53082 /* If we're looking for the odd results, shift those members down to
53083 the even slots. For some cpus this is faster than a PSHUFD. */
53084 if (odd_p)
53085 {
53086 /* For XOP use vpmacsdqh, but only for smult, as it is only
53087 signed. */
53088 if (TARGET_XOP && mode == V4SImode && !uns_p)
53089 {
53090 x = force_reg (wmode, CONST0_RTX (wmode));
53091 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
53092 return;
53093 }
53094
53095 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
53096 if (!const_vector_equal_evenodd_p (orig_op1))
53097 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
53098 x, NULL, 1, OPTAB_DIRECT);
53099 if (!const_vector_equal_evenodd_p (orig_op2))
53100 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
53101 x, NULL, 1, OPTAB_DIRECT);
53102 op1 = gen_lowpart (mode, op1);
53103 op2 = gen_lowpart (mode, op2);
53104 }
53105
53106 if (mode == V16SImode)
53107 {
53108 if (uns_p)
53109 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
53110 else
53111 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
53112 }
53113 else if (mode == V8SImode)
53114 {
53115 if (uns_p)
53116 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
53117 else
53118 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
53119 }
53120 else if (uns_p)
53121 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
53122 else if (TARGET_SSE4_1)
53123 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
53124 else
53125 {
53126 rtx s1, s2, t0, t1, t2;
53127
53128 /* The easiest way to implement this without PMULDQ is to go through
53129 the motions as if we are performing a full 64-bit multiply. With
53130 the exception that we need to do less shuffling of the elements. */
53131
53132 /* Compute the sign-extension, aka highparts, of the two operands. */
53133 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
53134 op1, pc_rtx, pc_rtx);
53135 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
53136 op2, pc_rtx, pc_rtx);
53137
53138 /* Multiply LO(A) * HI(B), and vice-versa. */
53139 t1 = gen_reg_rtx (wmode);
53140 t2 = gen_reg_rtx (wmode);
53141 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
53142 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
53143
53144 /* Multiply LO(A) * LO(B). */
53145 t0 = gen_reg_rtx (wmode);
53146 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
53147
53148 /* Combine and shift the highparts into place. */
53149 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
53150 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
53151 1, OPTAB_DIRECT);
53152
53153 /* Combine high and low parts. */
53154 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
53155 return;
53156 }
53157 emit_insn (x);
53158 }
53159
53160 void
53161 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
53162 bool uns_p, bool high_p)
53163 {
53164 machine_mode wmode = GET_MODE (dest);
53165 machine_mode mode = GET_MODE (op1);
53166 rtx t1, t2, t3, t4, mask;
53167
53168 switch (mode)
53169 {
53170 case V4SImode:
53171 t1 = gen_reg_rtx (mode);
53172 t2 = gen_reg_rtx (mode);
53173 if (TARGET_XOP && !uns_p)
53174 {
53175 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
53176 shuffle the elements once so that all elements are in the right
53177 place for immediate use: { A C B D }. */
53178 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
53179 const1_rtx, GEN_INT (3)));
53180 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
53181 const1_rtx, GEN_INT (3)));
53182 }
53183 else
53184 {
53185 /* Put the elements into place for the multiply. */
53186 ix86_expand_vec_interleave (t1, op1, op1, high_p);
53187 ix86_expand_vec_interleave (t2, op2, op2, high_p);
53188 high_p = false;
53189 }
53190 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
53191 break;
53192
53193 case V8SImode:
53194 /* Shuffle the elements between the lanes. After this we
53195 have { A B E F | C D G H } for each operand. */
53196 t1 = gen_reg_rtx (V4DImode);
53197 t2 = gen_reg_rtx (V4DImode);
53198 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
53199 const0_rtx, const2_rtx,
53200 const1_rtx, GEN_INT (3)));
53201 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
53202 const0_rtx, const2_rtx,
53203 const1_rtx, GEN_INT (3)));
53204
53205 /* Shuffle the elements within the lanes. After this we
53206 have { A A B B | C C D D } or { E E F F | G G H H }. */
53207 t3 = gen_reg_rtx (V8SImode);
53208 t4 = gen_reg_rtx (V8SImode);
53209 mask = GEN_INT (high_p
53210 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
53211 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
53212 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
53213 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
53214
53215 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
53216 break;
53217
53218 case V8HImode:
53219 case V16HImode:
53220 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
53221 uns_p, OPTAB_DIRECT);
53222 t2 = expand_binop (mode,
53223 uns_p ? umul_highpart_optab : smul_highpart_optab,
53224 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
53225 gcc_assert (t1 && t2);
53226
53227 t3 = gen_reg_rtx (mode);
53228 ix86_expand_vec_interleave (t3, t1, t2, high_p);
53229 emit_move_insn (dest, gen_lowpart (wmode, t3));
53230 break;
53231
53232 case V16QImode:
53233 case V32QImode:
53234 case V32HImode:
53235 case V16SImode:
53236 case V64QImode:
53237 t1 = gen_reg_rtx (wmode);
53238 t2 = gen_reg_rtx (wmode);
53239 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
53240 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
53241
53242 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
53243 break;
53244
53245 default:
53246 gcc_unreachable ();
53247 }
53248 }
53249
53250 void
53251 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
53252 {
53253 rtx res_1, res_2, res_3, res_4;
53254
53255 res_1 = gen_reg_rtx (V4SImode);
53256 res_2 = gen_reg_rtx (V4SImode);
53257 res_3 = gen_reg_rtx (V2DImode);
53258 res_4 = gen_reg_rtx (V2DImode);
53259 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
53260 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
53261
53262 /* Move the results in element 2 down to element 1; we don't care
53263 what goes in elements 2 and 3. Then we can merge the parts
53264 back together with an interleave.
53265
53266 Note that two other sequences were tried:
53267 (1) Use interleaves at the start instead of psrldq, which allows
53268 us to use a single shufps to merge things back at the end.
53269 (2) Use shufps here to combine the two vectors, then pshufd to
53270 put the elements in the correct order.
53271 In both cases the cost of the reformatting stall was too high
53272 and the overall sequence slower. */
53273
53274 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
53275 const0_rtx, const2_rtx,
53276 const0_rtx, const0_rtx));
53277 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
53278 const0_rtx, const2_rtx,
53279 const0_rtx, const0_rtx));
53280 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
53281
53282 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
53283 }
53284
53285 void
53286 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
53287 {
53288 machine_mode mode = GET_MODE (op0);
53289 rtx t1, t2, t3, t4, t5, t6;
53290
53291 if (TARGET_AVX512DQ && mode == V8DImode)
53292 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
53293 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
53294 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
53295 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
53296 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
53297 else if (TARGET_XOP && mode == V2DImode)
53298 {
53299 /* op1: A,B,C,D, op2: E,F,G,H */
53300 op1 = gen_lowpart (V4SImode, op1);
53301 op2 = gen_lowpart (V4SImode, op2);
53302
53303 t1 = gen_reg_rtx (V4SImode);
53304 t2 = gen_reg_rtx (V4SImode);
53305 t3 = gen_reg_rtx (V2DImode);
53306 t4 = gen_reg_rtx (V2DImode);
53307
53308 /* t1: B,A,D,C */
53309 emit_insn (gen_sse2_pshufd_1 (t1, op1,
53310 GEN_INT (1),
53311 GEN_INT (0),
53312 GEN_INT (3),
53313 GEN_INT (2)));
53314
53315 /* t2: (B*E),(A*F),(D*G),(C*H) */
53316 emit_insn (gen_mulv4si3 (t2, t1, op2));
53317
53318 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
53319 emit_insn (gen_xop_phadddq (t3, t2));
53320
53321 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
53322 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
53323
53324 /* Multiply lower parts and add all */
53325 t5 = gen_reg_rtx (V2DImode);
53326 emit_insn (gen_vec_widen_umult_even_v4si (t5,
53327 gen_lowpart (V4SImode, op1),
53328 gen_lowpart (V4SImode, op2)));
53329 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
53330
53331 }
53332 else
53333 {
53334 machine_mode nmode;
53335 rtx (*umul) (rtx, rtx, rtx);
53336
53337 if (mode == V2DImode)
53338 {
53339 umul = gen_vec_widen_umult_even_v4si;
53340 nmode = V4SImode;
53341 }
53342 else if (mode == V4DImode)
53343 {
53344 umul = gen_vec_widen_umult_even_v8si;
53345 nmode = V8SImode;
53346 }
53347 else if (mode == V8DImode)
53348 {
53349 umul = gen_vec_widen_umult_even_v16si;
53350 nmode = V16SImode;
53351 }
53352 else
53353 gcc_unreachable ();
53354
53355
53356 /* Multiply low parts. */
53357 t1 = gen_reg_rtx (mode);
53358 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
53359
53360 /* Shift input vectors right 32 bits so we can multiply high parts. */
53361 t6 = GEN_INT (32);
53362 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
53363 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
53364
53365 /* Multiply high parts by low parts. */
53366 t4 = gen_reg_rtx (mode);
53367 t5 = gen_reg_rtx (mode);
53368 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
53369 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
53370
53371 /* Combine and shift the highparts back. */
53372 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
53373 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
53374
53375 /* Combine high and low parts. */
53376 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
53377 }
53378
53379 set_unique_reg_note (get_last_insn (), REG_EQUAL,
53380 gen_rtx_MULT (mode, op1, op2));
53381 }
53382
53383 /* Return 1 if control tansfer instruction INSN
53384 should be encoded with bnd prefix.
53385 If insn is NULL then return 1 when control
53386 transfer instructions should be prefixed with
53387 bnd by default for current function. */
53388
53389 bool
53390 ix86_bnd_prefixed_insn_p (rtx insn)
53391 {
53392 /* For call insns check special flag. */
53393 if (insn && CALL_P (insn))
53394 {
53395 rtx call = get_call_rtx_from (insn);
53396 if (call)
53397 return CALL_EXPR_WITH_BOUNDS_P (call);
53398 }
53399
53400 /* All other insns are prefixed only if function is instrumented. */
53401 return chkp_function_instrumented_p (current_function_decl);
53402 }
53403
53404 /* Calculate integer abs() using only SSE2 instructions. */
53405
53406 void
53407 ix86_expand_sse2_abs (rtx target, rtx input)
53408 {
53409 machine_mode mode = GET_MODE (target);
53410 rtx tmp0, tmp1, x;
53411
53412 switch (mode)
53413 {
53414 /* For 32-bit signed integer X, the best way to calculate the absolute
53415 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
53416 case V4SImode:
53417 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
53418 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
53419 NULL, 0, OPTAB_DIRECT);
53420 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
53421 NULL, 0, OPTAB_DIRECT);
53422 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
53423 target, 0, OPTAB_DIRECT);
53424 break;
53425
53426 /* For 16-bit signed integer X, the best way to calculate the absolute
53427 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
53428 case V8HImode:
53429 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
53430
53431 x = expand_simple_binop (mode, SMAX, tmp0, input,
53432 target, 0, OPTAB_DIRECT);
53433 break;
53434
53435 /* For 8-bit signed integer X, the best way to calculate the absolute
53436 value of X is min ((unsigned char) X, (unsigned char) (-X)),
53437 as SSE2 provides the PMINUB insn. */
53438 case V16QImode:
53439 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
53440
53441 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
53442 target, 0, OPTAB_DIRECT);
53443 break;
53444
53445 default:
53446 gcc_unreachable ();
53447 }
53448
53449 if (x != target)
53450 emit_move_insn (target, x);
53451 }
53452
53453 /* Expand an extract from a vector register through pextr insn.
53454 Return true if successful. */
53455
53456 bool
53457 ix86_expand_pextr (rtx *operands)
53458 {
53459 rtx dst = operands[0];
53460 rtx src = operands[1];
53461
53462 unsigned int size = INTVAL (operands[2]);
53463 unsigned int pos = INTVAL (operands[3]);
53464
53465 if (SUBREG_P (dst))
53466 {
53467 /* Reject non-lowpart subregs. */
53468 if (SUBREG_BYTE (dst) > 0)
53469 return false;
53470 dst = SUBREG_REG (dst);
53471 }
53472
53473 if (SUBREG_P (src))
53474 {
53475 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
53476 src = SUBREG_REG (src);
53477 }
53478
53479 switch (GET_MODE (src))
53480 {
53481 case V16QImode:
53482 case V8HImode:
53483 case V4SImode:
53484 case V2DImode:
53485 case V1TImode:
53486 case TImode:
53487 {
53488 machine_mode srcmode, dstmode;
53489 rtx d, pat;
53490
53491 dstmode = mode_for_size (size, MODE_INT, 0);
53492
53493 switch (dstmode)
53494 {
53495 case QImode:
53496 if (!TARGET_SSE4_1)
53497 return false;
53498 srcmode = V16QImode;
53499 break;
53500
53501 case HImode:
53502 if (!TARGET_SSE2)
53503 return false;
53504 srcmode = V8HImode;
53505 break;
53506
53507 case SImode:
53508 if (!TARGET_SSE4_1)
53509 return false;
53510 srcmode = V4SImode;
53511 break;
53512
53513 case DImode:
53514 gcc_assert (TARGET_64BIT);
53515 if (!TARGET_SSE4_1)
53516 return false;
53517 srcmode = V2DImode;
53518 break;
53519
53520 default:
53521 return false;
53522 }
53523
53524 /* Reject extractions from misaligned positions. */
53525 if (pos & (size-1))
53526 return false;
53527
53528 if (GET_MODE (dst) == dstmode)
53529 d = dst;
53530 else
53531 d = gen_reg_rtx (dstmode);
53532
53533 /* Construct insn pattern. */
53534 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
53535 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
53536
53537 /* Let the rtl optimizers know about the zero extension performed. */
53538 if (dstmode == QImode || dstmode == HImode)
53539 {
53540 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
53541 d = gen_lowpart (SImode, d);
53542 }
53543
53544 emit_insn (gen_rtx_SET (d, pat));
53545
53546 if (d != dst)
53547 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
53548 return true;
53549 }
53550
53551 default:
53552 return false;
53553 }
53554 }
53555
53556 /* Expand an insert into a vector register through pinsr insn.
53557 Return true if successful. */
53558
53559 bool
53560 ix86_expand_pinsr (rtx *operands)
53561 {
53562 rtx dst = operands[0];
53563 rtx src = operands[3];
53564
53565 unsigned int size = INTVAL (operands[1]);
53566 unsigned int pos = INTVAL (operands[2]);
53567
53568 if (SUBREG_P (dst))
53569 {
53570 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
53571 dst = SUBREG_REG (dst);
53572 }
53573
53574 switch (GET_MODE (dst))
53575 {
53576 case V16QImode:
53577 case V8HImode:
53578 case V4SImode:
53579 case V2DImode:
53580 case V1TImode:
53581 case TImode:
53582 {
53583 machine_mode srcmode, dstmode;
53584 rtx (*pinsr)(rtx, rtx, rtx, rtx);
53585 rtx d;
53586
53587 srcmode = mode_for_size (size, MODE_INT, 0);
53588
53589 switch (srcmode)
53590 {
53591 case QImode:
53592 if (!TARGET_SSE4_1)
53593 return false;
53594 dstmode = V16QImode;
53595 pinsr = gen_sse4_1_pinsrb;
53596 break;
53597
53598 case HImode:
53599 if (!TARGET_SSE2)
53600 return false;
53601 dstmode = V8HImode;
53602 pinsr = gen_sse2_pinsrw;
53603 break;
53604
53605 case SImode:
53606 if (!TARGET_SSE4_1)
53607 return false;
53608 dstmode = V4SImode;
53609 pinsr = gen_sse4_1_pinsrd;
53610 break;
53611
53612 case DImode:
53613 gcc_assert (TARGET_64BIT);
53614 if (!TARGET_SSE4_1)
53615 return false;
53616 dstmode = V2DImode;
53617 pinsr = gen_sse4_1_pinsrq;
53618 break;
53619
53620 default:
53621 return false;
53622 }
53623
53624 /* Reject insertions to misaligned positions. */
53625 if (pos & (size-1))
53626 return false;
53627
53628 if (SUBREG_P (src))
53629 {
53630 unsigned int srcpos = SUBREG_BYTE (src);
53631
53632 if (srcpos > 0)
53633 {
53634 rtx extr_ops[4];
53635
53636 extr_ops[0] = gen_reg_rtx (srcmode);
53637 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
53638 extr_ops[2] = GEN_INT (size);
53639 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
53640
53641 if (!ix86_expand_pextr (extr_ops))
53642 return false;
53643
53644 src = extr_ops[0];
53645 }
53646 else
53647 src = gen_lowpart (srcmode, SUBREG_REG (src));
53648 }
53649
53650 if (GET_MODE (dst) == dstmode)
53651 d = dst;
53652 else
53653 d = gen_reg_rtx (dstmode);
53654
53655 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
53656 gen_lowpart (srcmode, src),
53657 GEN_INT (1 << (pos / size))));
53658 if (d != dst)
53659 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
53660 return true;
53661 }
53662
53663 default:
53664 return false;
53665 }
53666 }
53667 \f
53668 /* This function returns the calling abi specific va_list type node.
53669 It returns the FNDECL specific va_list type. */
53670
53671 static tree
53672 ix86_fn_abi_va_list (tree fndecl)
53673 {
53674 if (!TARGET_64BIT)
53675 return va_list_type_node;
53676 gcc_assert (fndecl != NULL_TREE);
53677
53678 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
53679 return ms_va_list_type_node;
53680 else
53681 return sysv_va_list_type_node;
53682 }
53683
53684 /* Returns the canonical va_list type specified by TYPE. If there
53685 is no valid TYPE provided, it return NULL_TREE. */
53686
53687 static tree
53688 ix86_canonical_va_list_type (tree type)
53689 {
53690 tree wtype, htype;
53691
53692 /* Resolve references and pointers to va_list type. */
53693 if (TREE_CODE (type) == MEM_REF)
53694 type = TREE_TYPE (type);
53695 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
53696 type = TREE_TYPE (type);
53697 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
53698 type = TREE_TYPE (type);
53699
53700 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
53701 {
53702 wtype = va_list_type_node;
53703 gcc_assert (wtype != NULL_TREE);
53704 htype = type;
53705 if (TREE_CODE (wtype) == ARRAY_TYPE)
53706 {
53707 /* If va_list is an array type, the argument may have decayed
53708 to a pointer type, e.g. by being passed to another function.
53709 In that case, unwrap both types so that we can compare the
53710 underlying records. */
53711 if (TREE_CODE (htype) == ARRAY_TYPE
53712 || POINTER_TYPE_P (htype))
53713 {
53714 wtype = TREE_TYPE (wtype);
53715 htype = TREE_TYPE (htype);
53716 }
53717 }
53718 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
53719 return va_list_type_node;
53720 wtype = sysv_va_list_type_node;
53721 gcc_assert (wtype != NULL_TREE);
53722 htype = type;
53723 if (TREE_CODE (wtype) == ARRAY_TYPE)
53724 {
53725 /* If va_list is an array type, the argument may have decayed
53726 to a pointer type, e.g. by being passed to another function.
53727 In that case, unwrap both types so that we can compare the
53728 underlying records. */
53729 if (TREE_CODE (htype) == ARRAY_TYPE
53730 || POINTER_TYPE_P (htype))
53731 {
53732 wtype = TREE_TYPE (wtype);
53733 htype = TREE_TYPE (htype);
53734 }
53735 }
53736 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
53737 return sysv_va_list_type_node;
53738 wtype = ms_va_list_type_node;
53739 gcc_assert (wtype != NULL_TREE);
53740 htype = type;
53741 if (TREE_CODE (wtype) == ARRAY_TYPE)
53742 {
53743 /* If va_list is an array type, the argument may have decayed
53744 to a pointer type, e.g. by being passed to another function.
53745 In that case, unwrap both types so that we can compare the
53746 underlying records. */
53747 if (TREE_CODE (htype) == ARRAY_TYPE
53748 || POINTER_TYPE_P (htype))
53749 {
53750 wtype = TREE_TYPE (wtype);
53751 htype = TREE_TYPE (htype);
53752 }
53753 }
53754 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
53755 return ms_va_list_type_node;
53756 return NULL_TREE;
53757 }
53758 return std_canonical_va_list_type (type);
53759 }
53760
53761 /* Iterate through the target-specific builtin types for va_list.
53762 IDX denotes the iterator, *PTREE is set to the result type of
53763 the va_list builtin, and *PNAME to its internal type.
53764 Returns zero if there is no element for this index, otherwise
53765 IDX should be increased upon the next call.
53766 Note, do not iterate a base builtin's name like __builtin_va_list.
53767 Used from c_common_nodes_and_builtins. */
53768
53769 static int
53770 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
53771 {
53772 if (TARGET_64BIT)
53773 {
53774 switch (idx)
53775 {
53776 default:
53777 break;
53778
53779 case 0:
53780 *ptree = ms_va_list_type_node;
53781 *pname = "__builtin_ms_va_list";
53782 return 1;
53783
53784 case 1:
53785 *ptree = sysv_va_list_type_node;
53786 *pname = "__builtin_sysv_va_list";
53787 return 1;
53788 }
53789 }
53790
53791 return 0;
53792 }
53793
53794 #undef TARGET_SCHED_DISPATCH
53795 #define TARGET_SCHED_DISPATCH has_dispatch
53796 #undef TARGET_SCHED_DISPATCH_DO
53797 #define TARGET_SCHED_DISPATCH_DO do_dispatch
53798 #undef TARGET_SCHED_REASSOCIATION_WIDTH
53799 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
53800 #undef TARGET_SCHED_REORDER
53801 #define TARGET_SCHED_REORDER ix86_sched_reorder
53802 #undef TARGET_SCHED_ADJUST_PRIORITY
53803 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
53804 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
53805 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
53806 ix86_dependencies_evaluation_hook
53807
53808 /* The size of the dispatch window is the total number of bytes of
53809 object code allowed in a window. */
53810 #define DISPATCH_WINDOW_SIZE 16
53811
53812 /* Number of dispatch windows considered for scheduling. */
53813 #define MAX_DISPATCH_WINDOWS 3
53814
53815 /* Maximum number of instructions in a window. */
53816 #define MAX_INSN 4
53817
53818 /* Maximum number of immediate operands in a window. */
53819 #define MAX_IMM 4
53820
53821 /* Maximum number of immediate bits allowed in a window. */
53822 #define MAX_IMM_SIZE 128
53823
53824 /* Maximum number of 32 bit immediates allowed in a window. */
53825 #define MAX_IMM_32 4
53826
53827 /* Maximum number of 64 bit immediates allowed in a window. */
53828 #define MAX_IMM_64 2
53829
53830 /* Maximum total of loads or prefetches allowed in a window. */
53831 #define MAX_LOAD 2
53832
53833 /* Maximum total of stores allowed in a window. */
53834 #define MAX_STORE 1
53835
53836 #undef BIG
53837 #define BIG 100
53838
53839
53840 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
53841 enum dispatch_group {
53842 disp_no_group = 0,
53843 disp_load,
53844 disp_store,
53845 disp_load_store,
53846 disp_prefetch,
53847 disp_imm,
53848 disp_imm_32,
53849 disp_imm_64,
53850 disp_branch,
53851 disp_cmp,
53852 disp_jcc,
53853 disp_last
53854 };
53855
53856 /* Number of allowable groups in a dispatch window. It is an array
53857 indexed by dispatch_group enum. 100 is used as a big number,
53858 because the number of these kind of operations does not have any
53859 effect in dispatch window, but we need them for other reasons in
53860 the table. */
53861 static unsigned int num_allowable_groups[disp_last] = {
53862 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
53863 };
53864
53865 char group_name[disp_last + 1][16] = {
53866 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
53867 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
53868 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
53869 };
53870
53871 /* Instruction path. */
53872 enum insn_path {
53873 no_path = 0,
53874 path_single, /* Single micro op. */
53875 path_double, /* Double micro op. */
53876 path_multi, /* Instructions with more than 2 micro op.. */
53877 last_path
53878 };
53879
53880 /* sched_insn_info defines a window to the instructions scheduled in
53881 the basic block. It contains a pointer to the insn_info table and
53882 the instruction scheduled.
53883
53884 Windows are allocated for each basic block and are linked
53885 together. */
53886 typedef struct sched_insn_info_s {
53887 rtx insn;
53888 enum dispatch_group group;
53889 enum insn_path path;
53890 int byte_len;
53891 int imm_bytes;
53892 } sched_insn_info;
53893
53894 /* Linked list of dispatch windows. This is a two way list of
53895 dispatch windows of a basic block. It contains information about
53896 the number of uops in the window and the total number of
53897 instructions and of bytes in the object code for this dispatch
53898 window. */
53899 typedef struct dispatch_windows_s {
53900 int num_insn; /* Number of insn in the window. */
53901 int num_uops; /* Number of uops in the window. */
53902 int window_size; /* Number of bytes in the window. */
53903 int window_num; /* Window number between 0 or 1. */
53904 int num_imm; /* Number of immediates in an insn. */
53905 int num_imm_32; /* Number of 32 bit immediates in an insn. */
53906 int num_imm_64; /* Number of 64 bit immediates in an insn. */
53907 int imm_size; /* Total immediates in the window. */
53908 int num_loads; /* Total memory loads in the window. */
53909 int num_stores; /* Total memory stores in the window. */
53910 int violation; /* Violation exists in window. */
53911 sched_insn_info *window; /* Pointer to the window. */
53912 struct dispatch_windows_s *next;
53913 struct dispatch_windows_s *prev;
53914 } dispatch_windows;
53915
53916 /* Immediate valuse used in an insn. */
53917 typedef struct imm_info_s
53918 {
53919 int imm;
53920 int imm32;
53921 int imm64;
53922 } imm_info;
53923
53924 static dispatch_windows *dispatch_window_list;
53925 static dispatch_windows *dispatch_window_list1;
53926
53927 /* Get dispatch group of insn. */
53928
53929 static enum dispatch_group
53930 get_mem_group (rtx_insn *insn)
53931 {
53932 enum attr_memory memory;
53933
53934 if (INSN_CODE (insn) < 0)
53935 return disp_no_group;
53936 memory = get_attr_memory (insn);
53937 if (memory == MEMORY_STORE)
53938 return disp_store;
53939
53940 if (memory == MEMORY_LOAD)
53941 return disp_load;
53942
53943 if (memory == MEMORY_BOTH)
53944 return disp_load_store;
53945
53946 return disp_no_group;
53947 }
53948
53949 /* Return true if insn is a compare instruction. */
53950
53951 static bool
53952 is_cmp (rtx_insn *insn)
53953 {
53954 enum attr_type type;
53955
53956 type = get_attr_type (insn);
53957 return (type == TYPE_TEST
53958 || type == TYPE_ICMP
53959 || type == TYPE_FCMP
53960 || GET_CODE (PATTERN (insn)) == COMPARE);
53961 }
53962
53963 /* Return true if a dispatch violation encountered. */
53964
53965 static bool
53966 dispatch_violation (void)
53967 {
53968 if (dispatch_window_list->next)
53969 return dispatch_window_list->next->violation;
53970 return dispatch_window_list->violation;
53971 }
53972
53973 /* Return true if insn is a branch instruction. */
53974
53975 static bool
53976 is_branch (rtx_insn *insn)
53977 {
53978 return (CALL_P (insn) || JUMP_P (insn));
53979 }
53980
53981 /* Return true if insn is a prefetch instruction. */
53982
53983 static bool
53984 is_prefetch (rtx_insn *insn)
53985 {
53986 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
53987 }
53988
53989 /* This function initializes a dispatch window and the list container holding a
53990 pointer to the window. */
53991
53992 static void
53993 init_window (int window_num)
53994 {
53995 int i;
53996 dispatch_windows *new_list;
53997
53998 if (window_num == 0)
53999 new_list = dispatch_window_list;
54000 else
54001 new_list = dispatch_window_list1;
54002
54003 new_list->num_insn = 0;
54004 new_list->num_uops = 0;
54005 new_list->window_size = 0;
54006 new_list->next = NULL;
54007 new_list->prev = NULL;
54008 new_list->window_num = window_num;
54009 new_list->num_imm = 0;
54010 new_list->num_imm_32 = 0;
54011 new_list->num_imm_64 = 0;
54012 new_list->imm_size = 0;
54013 new_list->num_loads = 0;
54014 new_list->num_stores = 0;
54015 new_list->violation = false;
54016
54017 for (i = 0; i < MAX_INSN; i++)
54018 {
54019 new_list->window[i].insn = NULL;
54020 new_list->window[i].group = disp_no_group;
54021 new_list->window[i].path = no_path;
54022 new_list->window[i].byte_len = 0;
54023 new_list->window[i].imm_bytes = 0;
54024 }
54025 return;
54026 }
54027
54028 /* This function allocates and initializes a dispatch window and the
54029 list container holding a pointer to the window. */
54030
54031 static dispatch_windows *
54032 allocate_window (void)
54033 {
54034 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
54035 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
54036
54037 return new_list;
54038 }
54039
54040 /* This routine initializes the dispatch scheduling information. It
54041 initiates building dispatch scheduler tables and constructs the
54042 first dispatch window. */
54043
54044 static void
54045 init_dispatch_sched (void)
54046 {
54047 /* Allocate a dispatch list and a window. */
54048 dispatch_window_list = allocate_window ();
54049 dispatch_window_list1 = allocate_window ();
54050 init_window (0);
54051 init_window (1);
54052 }
54053
54054 /* This function returns true if a branch is detected. End of a basic block
54055 does not have to be a branch, but here we assume only branches end a
54056 window. */
54057
54058 static bool
54059 is_end_basic_block (enum dispatch_group group)
54060 {
54061 return group == disp_branch;
54062 }
54063
54064 /* This function is called when the end of a window processing is reached. */
54065
54066 static void
54067 process_end_window (void)
54068 {
54069 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
54070 if (dispatch_window_list->next)
54071 {
54072 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
54073 gcc_assert (dispatch_window_list->window_size
54074 + dispatch_window_list1->window_size <= 48);
54075 init_window (1);
54076 }
54077 init_window (0);
54078 }
54079
54080 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
54081 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
54082 for 48 bytes of instructions. Note that these windows are not dispatch
54083 windows that their sizes are DISPATCH_WINDOW_SIZE. */
54084
54085 static dispatch_windows *
54086 allocate_next_window (int window_num)
54087 {
54088 if (window_num == 0)
54089 {
54090 if (dispatch_window_list->next)
54091 init_window (1);
54092 init_window (0);
54093 return dispatch_window_list;
54094 }
54095
54096 dispatch_window_list->next = dispatch_window_list1;
54097 dispatch_window_list1->prev = dispatch_window_list;
54098
54099 return dispatch_window_list1;
54100 }
54101
54102 /* Compute number of immediate operands of an instruction. */
54103
54104 static void
54105 find_constant (rtx in_rtx, imm_info *imm_values)
54106 {
54107 if (INSN_P (in_rtx))
54108 in_rtx = PATTERN (in_rtx);
54109 subrtx_iterator::array_type array;
54110 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
54111 if (const_rtx x = *iter)
54112 switch (GET_CODE (x))
54113 {
54114 case CONST:
54115 case SYMBOL_REF:
54116 case CONST_INT:
54117 (imm_values->imm)++;
54118 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
54119 (imm_values->imm32)++;
54120 else
54121 (imm_values->imm64)++;
54122 break;
54123
54124 case CONST_DOUBLE:
54125 case CONST_WIDE_INT:
54126 (imm_values->imm)++;
54127 (imm_values->imm64)++;
54128 break;
54129
54130 case CODE_LABEL:
54131 if (LABEL_KIND (x) == LABEL_NORMAL)
54132 {
54133 (imm_values->imm)++;
54134 (imm_values->imm32)++;
54135 }
54136 break;
54137
54138 default:
54139 break;
54140 }
54141 }
54142
54143 /* Return total size of immediate operands of an instruction along with number
54144 of corresponding immediate-operands. It initializes its parameters to zero
54145 befor calling FIND_CONSTANT.
54146 INSN is the input instruction. IMM is the total of immediates.
54147 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
54148 bit immediates. */
54149
54150 static int
54151 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
54152 {
54153 imm_info imm_values = {0, 0, 0};
54154
54155 find_constant (insn, &imm_values);
54156 *imm = imm_values.imm;
54157 *imm32 = imm_values.imm32;
54158 *imm64 = imm_values.imm64;
54159 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
54160 }
54161
54162 /* This function indicates if an operand of an instruction is an
54163 immediate. */
54164
54165 static bool
54166 has_immediate (rtx_insn *insn)
54167 {
54168 int num_imm_operand;
54169 int num_imm32_operand;
54170 int num_imm64_operand;
54171
54172 if (insn)
54173 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
54174 &num_imm64_operand);
54175 return false;
54176 }
54177
54178 /* Return single or double path for instructions. */
54179
54180 static enum insn_path
54181 get_insn_path (rtx_insn *insn)
54182 {
54183 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
54184
54185 if ((int)path == 0)
54186 return path_single;
54187
54188 if ((int)path == 1)
54189 return path_double;
54190
54191 return path_multi;
54192 }
54193
54194 /* Return insn dispatch group. */
54195
54196 static enum dispatch_group
54197 get_insn_group (rtx_insn *insn)
54198 {
54199 enum dispatch_group group = get_mem_group (insn);
54200 if (group)
54201 return group;
54202
54203 if (is_branch (insn))
54204 return disp_branch;
54205
54206 if (is_cmp (insn))
54207 return disp_cmp;
54208
54209 if (has_immediate (insn))
54210 return disp_imm;
54211
54212 if (is_prefetch (insn))
54213 return disp_prefetch;
54214
54215 return disp_no_group;
54216 }
54217
54218 /* Count number of GROUP restricted instructions in a dispatch
54219 window WINDOW_LIST. */
54220
54221 static int
54222 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
54223 {
54224 enum dispatch_group group = get_insn_group (insn);
54225 int imm_size;
54226 int num_imm_operand;
54227 int num_imm32_operand;
54228 int num_imm64_operand;
54229
54230 if (group == disp_no_group)
54231 return 0;
54232
54233 if (group == disp_imm)
54234 {
54235 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
54236 &num_imm64_operand);
54237 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
54238 || num_imm_operand + window_list->num_imm > MAX_IMM
54239 || (num_imm32_operand > 0
54240 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
54241 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
54242 || (num_imm64_operand > 0
54243 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
54244 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
54245 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
54246 && num_imm64_operand > 0
54247 && ((window_list->num_imm_64 > 0
54248 && window_list->num_insn >= 2)
54249 || window_list->num_insn >= 3)))
54250 return BIG;
54251
54252 return 1;
54253 }
54254
54255 if ((group == disp_load_store
54256 && (window_list->num_loads >= MAX_LOAD
54257 || window_list->num_stores >= MAX_STORE))
54258 || ((group == disp_load
54259 || group == disp_prefetch)
54260 && window_list->num_loads >= MAX_LOAD)
54261 || (group == disp_store
54262 && window_list->num_stores >= MAX_STORE))
54263 return BIG;
54264
54265 return 1;
54266 }
54267
54268 /* This function returns true if insn satisfies dispatch rules on the
54269 last window scheduled. */
54270
54271 static bool
54272 fits_dispatch_window (rtx_insn *insn)
54273 {
54274 dispatch_windows *window_list = dispatch_window_list;
54275 dispatch_windows *window_list_next = dispatch_window_list->next;
54276 unsigned int num_restrict;
54277 enum dispatch_group group = get_insn_group (insn);
54278 enum insn_path path = get_insn_path (insn);
54279 int sum;
54280
54281 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
54282 instructions should be given the lowest priority in the
54283 scheduling process in Haifa scheduler to make sure they will be
54284 scheduled in the same dispatch window as the reference to them. */
54285 if (group == disp_jcc || group == disp_cmp)
54286 return false;
54287
54288 /* Check nonrestricted. */
54289 if (group == disp_no_group || group == disp_branch)
54290 return true;
54291
54292 /* Get last dispatch window. */
54293 if (window_list_next)
54294 window_list = window_list_next;
54295
54296 if (window_list->window_num == 1)
54297 {
54298 sum = window_list->prev->window_size + window_list->window_size;
54299
54300 if (sum == 32
54301 || (min_insn_size (insn) + sum) >= 48)
54302 /* Window 1 is full. Go for next window. */
54303 return true;
54304 }
54305
54306 num_restrict = count_num_restricted (insn, window_list);
54307
54308 if (num_restrict > num_allowable_groups[group])
54309 return false;
54310
54311 /* See if it fits in the first window. */
54312 if (window_list->window_num == 0)
54313 {
54314 /* The first widow should have only single and double path
54315 uops. */
54316 if (path == path_double
54317 && (window_list->num_uops + 2) > MAX_INSN)
54318 return false;
54319 else if (path != path_single)
54320 return false;
54321 }
54322 return true;
54323 }
54324
54325 /* Add an instruction INSN with NUM_UOPS micro-operations to the
54326 dispatch window WINDOW_LIST. */
54327
54328 static void
54329 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
54330 {
54331 int byte_len = min_insn_size (insn);
54332 int num_insn = window_list->num_insn;
54333 int imm_size;
54334 sched_insn_info *window = window_list->window;
54335 enum dispatch_group group = get_insn_group (insn);
54336 enum insn_path path = get_insn_path (insn);
54337 int num_imm_operand;
54338 int num_imm32_operand;
54339 int num_imm64_operand;
54340
54341 if (!window_list->violation && group != disp_cmp
54342 && !fits_dispatch_window (insn))
54343 window_list->violation = true;
54344
54345 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
54346 &num_imm64_operand);
54347
54348 /* Initialize window with new instruction. */
54349 window[num_insn].insn = insn;
54350 window[num_insn].byte_len = byte_len;
54351 window[num_insn].group = group;
54352 window[num_insn].path = path;
54353 window[num_insn].imm_bytes = imm_size;
54354
54355 window_list->window_size += byte_len;
54356 window_list->num_insn = num_insn + 1;
54357 window_list->num_uops = window_list->num_uops + num_uops;
54358 window_list->imm_size += imm_size;
54359 window_list->num_imm += num_imm_operand;
54360 window_list->num_imm_32 += num_imm32_operand;
54361 window_list->num_imm_64 += num_imm64_operand;
54362
54363 if (group == disp_store)
54364 window_list->num_stores += 1;
54365 else if (group == disp_load
54366 || group == disp_prefetch)
54367 window_list->num_loads += 1;
54368 else if (group == disp_load_store)
54369 {
54370 window_list->num_stores += 1;
54371 window_list->num_loads += 1;
54372 }
54373 }
54374
54375 /* Adds a scheduled instruction, INSN, to the current dispatch window.
54376 If the total bytes of instructions or the number of instructions in
54377 the window exceed allowable, it allocates a new window. */
54378
54379 static void
54380 add_to_dispatch_window (rtx_insn *insn)
54381 {
54382 int byte_len;
54383 dispatch_windows *window_list;
54384 dispatch_windows *next_list;
54385 dispatch_windows *window0_list;
54386 enum insn_path path;
54387 enum dispatch_group insn_group;
54388 bool insn_fits;
54389 int num_insn;
54390 int num_uops;
54391 int window_num;
54392 int insn_num_uops;
54393 int sum;
54394
54395 if (INSN_CODE (insn) < 0)
54396 return;
54397
54398 byte_len = min_insn_size (insn);
54399 window_list = dispatch_window_list;
54400 next_list = window_list->next;
54401 path = get_insn_path (insn);
54402 insn_group = get_insn_group (insn);
54403
54404 /* Get the last dispatch window. */
54405 if (next_list)
54406 window_list = dispatch_window_list->next;
54407
54408 if (path == path_single)
54409 insn_num_uops = 1;
54410 else if (path == path_double)
54411 insn_num_uops = 2;
54412 else
54413 insn_num_uops = (int) path;
54414
54415 /* If current window is full, get a new window.
54416 Window number zero is full, if MAX_INSN uops are scheduled in it.
54417 Window number one is full, if window zero's bytes plus window
54418 one's bytes is 32, or if the bytes of the new instruction added
54419 to the total makes it greater than 48, or it has already MAX_INSN
54420 instructions in it. */
54421 num_insn = window_list->num_insn;
54422 num_uops = window_list->num_uops;
54423 window_num = window_list->window_num;
54424 insn_fits = fits_dispatch_window (insn);
54425
54426 if (num_insn >= MAX_INSN
54427 || num_uops + insn_num_uops > MAX_INSN
54428 || !(insn_fits))
54429 {
54430 window_num = ~window_num & 1;
54431 window_list = allocate_next_window (window_num);
54432 }
54433
54434 if (window_num == 0)
54435 {
54436 add_insn_window (insn, window_list, insn_num_uops);
54437 if (window_list->num_insn >= MAX_INSN
54438 && insn_group == disp_branch)
54439 {
54440 process_end_window ();
54441 return;
54442 }
54443 }
54444 else if (window_num == 1)
54445 {
54446 window0_list = window_list->prev;
54447 sum = window0_list->window_size + window_list->window_size;
54448 if (sum == 32
54449 || (byte_len + sum) >= 48)
54450 {
54451 process_end_window ();
54452 window_list = dispatch_window_list;
54453 }
54454
54455 add_insn_window (insn, window_list, insn_num_uops);
54456 }
54457 else
54458 gcc_unreachable ();
54459
54460 if (is_end_basic_block (insn_group))
54461 {
54462 /* End of basic block is reached do end-basic-block process. */
54463 process_end_window ();
54464 return;
54465 }
54466 }
54467
54468 /* Print the dispatch window, WINDOW_NUM, to FILE. */
54469
54470 DEBUG_FUNCTION static void
54471 debug_dispatch_window_file (FILE *file, int window_num)
54472 {
54473 dispatch_windows *list;
54474 int i;
54475
54476 if (window_num == 0)
54477 list = dispatch_window_list;
54478 else
54479 list = dispatch_window_list1;
54480
54481 fprintf (file, "Window #%d:\n", list->window_num);
54482 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
54483 list->num_insn, list->num_uops, list->window_size);
54484 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
54485 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
54486
54487 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
54488 list->num_stores);
54489 fprintf (file, " insn info:\n");
54490
54491 for (i = 0; i < MAX_INSN; i++)
54492 {
54493 if (!list->window[i].insn)
54494 break;
54495 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
54496 i, group_name[list->window[i].group],
54497 i, (void *)list->window[i].insn,
54498 i, list->window[i].path,
54499 i, list->window[i].byte_len,
54500 i, list->window[i].imm_bytes);
54501 }
54502 }
54503
54504 /* Print to stdout a dispatch window. */
54505
54506 DEBUG_FUNCTION void
54507 debug_dispatch_window (int window_num)
54508 {
54509 debug_dispatch_window_file (stdout, window_num);
54510 }
54511
54512 /* Print INSN dispatch information to FILE. */
54513
54514 DEBUG_FUNCTION static void
54515 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
54516 {
54517 int byte_len;
54518 enum insn_path path;
54519 enum dispatch_group group;
54520 int imm_size;
54521 int num_imm_operand;
54522 int num_imm32_operand;
54523 int num_imm64_operand;
54524
54525 if (INSN_CODE (insn) < 0)
54526 return;
54527
54528 byte_len = min_insn_size (insn);
54529 path = get_insn_path (insn);
54530 group = get_insn_group (insn);
54531 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
54532 &num_imm64_operand);
54533
54534 fprintf (file, " insn info:\n");
54535 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
54536 group_name[group], path, byte_len);
54537 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
54538 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
54539 }
54540
54541 /* Print to STDERR the status of the ready list with respect to
54542 dispatch windows. */
54543
54544 DEBUG_FUNCTION void
54545 debug_ready_dispatch (void)
54546 {
54547 int i;
54548 int no_ready = number_in_ready ();
54549
54550 fprintf (stdout, "Number of ready: %d\n", no_ready);
54551
54552 for (i = 0; i < no_ready; i++)
54553 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
54554 }
54555
54556 /* This routine is the driver of the dispatch scheduler. */
54557
54558 static void
54559 do_dispatch (rtx_insn *insn, int mode)
54560 {
54561 if (mode == DISPATCH_INIT)
54562 init_dispatch_sched ();
54563 else if (mode == ADD_TO_DISPATCH_WINDOW)
54564 add_to_dispatch_window (insn);
54565 }
54566
54567 /* Return TRUE if Dispatch Scheduling is supported. */
54568
54569 static bool
54570 has_dispatch (rtx_insn *insn, int action)
54571 {
54572 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
54573 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
54574 switch (action)
54575 {
54576 default:
54577 return false;
54578
54579 case IS_DISPATCH_ON:
54580 return true;
54581 break;
54582
54583 case IS_CMP:
54584 return is_cmp (insn);
54585
54586 case DISPATCH_VIOLATION:
54587 return dispatch_violation ();
54588
54589 case FITS_DISPATCH_WINDOW:
54590 return fits_dispatch_window (insn);
54591 }
54592
54593 return false;
54594 }
54595
54596 /* Implementation of reassociation_width target hook used by
54597 reassoc phase to identify parallelism level in reassociated
54598 tree. Statements tree_code is passed in OPC. Arguments type
54599 is passed in MODE.
54600
54601 Currently parallel reassociation is enabled for Atom
54602 processors only and we set reassociation width to be 2
54603 because Atom may issue up to 2 instructions per cycle.
54604
54605 Return value should be fixed if parallel reassociation is
54606 enabled for other processors. */
54607
54608 static int
54609 ix86_reassociation_width (unsigned int, machine_mode mode)
54610 {
54611 /* Vector part. */
54612 if (VECTOR_MODE_P (mode))
54613 {
54614 if (TARGET_VECTOR_PARALLEL_EXECUTION)
54615 return 2;
54616 else
54617 return 1;
54618 }
54619
54620 /* Scalar part. */
54621 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
54622 return 2;
54623 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
54624 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
54625 else
54626 return 1;
54627 }
54628
54629 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
54630 place emms and femms instructions. */
54631
54632 static machine_mode
54633 ix86_preferred_simd_mode (machine_mode mode)
54634 {
54635 if (!TARGET_SSE)
54636 return word_mode;
54637
54638 switch (mode)
54639 {
54640 case QImode:
54641 return TARGET_AVX512BW ? V64QImode :
54642 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
54643 case HImode:
54644 return TARGET_AVX512BW ? V32HImode :
54645 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
54646 case SImode:
54647 return TARGET_AVX512F ? V16SImode :
54648 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
54649 case DImode:
54650 return TARGET_AVX512F ? V8DImode :
54651 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
54652
54653 case SFmode:
54654 if (TARGET_AVX512F)
54655 return V16SFmode;
54656 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
54657 return V8SFmode;
54658 else
54659 return V4SFmode;
54660
54661 case DFmode:
54662 if (!TARGET_VECTORIZE_DOUBLE)
54663 return word_mode;
54664 else if (TARGET_AVX512F)
54665 return V8DFmode;
54666 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
54667 return V4DFmode;
54668 else if (TARGET_SSE2)
54669 return V2DFmode;
54670 /* FALLTHRU */
54671
54672 default:
54673 return word_mode;
54674 }
54675 }
54676
54677 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
54678 vectors. If AVX512F is enabled then try vectorizing with 512bit,
54679 256bit and 128bit vectors. */
54680
54681 static unsigned int
54682 ix86_autovectorize_vector_sizes (void)
54683 {
54684 return TARGET_AVX512F ? 64 | 32 | 16 :
54685 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
54686 }
54687
54688 /* Implemenation of targetm.vectorize.get_mask_mode. */
54689
54690 static machine_mode
54691 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
54692 {
54693 unsigned elem_size = vector_size / nunits;
54694
54695 /* Scalar mask case. */
54696 if ((TARGET_AVX512F && vector_size == 64)
54697 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
54698 {
54699 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
54700 return smallest_mode_for_size (nunits, MODE_INT);
54701 }
54702
54703 machine_mode elem_mode
54704 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
54705
54706 gcc_assert (elem_size * nunits == vector_size);
54707
54708 return mode_for_vector (elem_mode, nunits);
54709 }
54710
54711 \f
54712
54713 /* Return class of registers which could be used for pseudo of MODE
54714 and of class RCLASS for spilling instead of memory. Return NO_REGS
54715 if it is not possible or non-profitable. */
54716
54717 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
54718
54719 static reg_class_t
54720 ix86_spill_class (reg_class_t rclass, machine_mode mode)
54721 {
54722 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
54723 && TARGET_SSE2
54724 && TARGET_INTER_UNIT_MOVES_TO_VEC
54725 && TARGET_INTER_UNIT_MOVES_FROM_VEC
54726 && (mode == SImode || (TARGET_64BIT && mode == DImode))
54727 && INTEGER_CLASS_P (rclass))
54728 return ALL_SSE_REGS;
54729 return NO_REGS;
54730 }
54731
54732 /* Implement targetm.vectorize.init_cost. */
54733
54734 static void *
54735 ix86_init_cost (struct loop *)
54736 {
54737 unsigned *cost = XNEWVEC (unsigned, 3);
54738 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
54739 return cost;
54740 }
54741
54742 /* Implement targetm.vectorize.add_stmt_cost. */
54743
54744 static unsigned
54745 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
54746 struct _stmt_vec_info *stmt_info, int misalign,
54747 enum vect_cost_model_location where)
54748 {
54749 unsigned *cost = (unsigned *) data;
54750 unsigned retval = 0;
54751
54752 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
54753 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
54754
54755 /* Statements in an inner loop relative to the loop being
54756 vectorized are weighted more heavily. The value here is
54757 arbitrary and could potentially be improved with analysis. */
54758 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
54759 count *= 50; /* FIXME. */
54760
54761 retval = (unsigned) (count * stmt_cost);
54762
54763 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
54764 for Silvermont as it has out of order integer pipeline and can execute
54765 2 scalar instruction per tick, but has in order SIMD pipeline. */
54766 if (TARGET_SILVERMONT || TARGET_INTEL)
54767 if (stmt_info && stmt_info->stmt)
54768 {
54769 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
54770 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
54771 retval = (retval * 17) / 10;
54772 }
54773
54774 cost[where] += retval;
54775
54776 return retval;
54777 }
54778
54779 /* Implement targetm.vectorize.finish_cost. */
54780
54781 static void
54782 ix86_finish_cost (void *data, unsigned *prologue_cost,
54783 unsigned *body_cost, unsigned *epilogue_cost)
54784 {
54785 unsigned *cost = (unsigned *) data;
54786 *prologue_cost = cost[vect_prologue];
54787 *body_cost = cost[vect_body];
54788 *epilogue_cost = cost[vect_epilogue];
54789 }
54790
54791 /* Implement targetm.vectorize.destroy_cost_data. */
54792
54793 static void
54794 ix86_destroy_cost_data (void *data)
54795 {
54796 free (data);
54797 }
54798
54799 /* Validate target specific memory model bits in VAL. */
54800
54801 static unsigned HOST_WIDE_INT
54802 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
54803 {
54804 enum memmodel model = memmodel_from_int (val);
54805 bool strong;
54806
54807 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
54808 |MEMMODEL_MASK)
54809 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
54810 {
54811 warning (OPT_Winvalid_memory_model,
54812 "Unknown architecture specific memory model");
54813 return MEMMODEL_SEQ_CST;
54814 }
54815 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
54816 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
54817 {
54818 warning (OPT_Winvalid_memory_model,
54819 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
54820 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
54821 }
54822 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
54823 {
54824 warning (OPT_Winvalid_memory_model,
54825 "HLE_RELEASE not used with RELEASE or stronger memory model");
54826 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
54827 }
54828 return val;
54829 }
54830
54831 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
54832 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
54833 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
54834 or number of vecsize_mangle variants that should be emitted. */
54835
54836 static int
54837 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
54838 struct cgraph_simd_clone *clonei,
54839 tree base_type, int num)
54840 {
54841 int ret = 1;
54842
54843 if (clonei->simdlen
54844 && (clonei->simdlen < 2
54845 || clonei->simdlen > 1024
54846 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
54847 {
54848 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
54849 "unsupported simdlen %d", clonei->simdlen);
54850 return 0;
54851 }
54852
54853 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
54854 if (TREE_CODE (ret_type) != VOID_TYPE)
54855 switch (TYPE_MODE (ret_type))
54856 {
54857 case QImode:
54858 case HImode:
54859 case SImode:
54860 case DImode:
54861 case SFmode:
54862 case DFmode:
54863 /* case SCmode: */
54864 /* case DCmode: */
54865 break;
54866 default:
54867 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
54868 "unsupported return type %qT for simd\n", ret_type);
54869 return 0;
54870 }
54871
54872 tree t;
54873 int i;
54874
54875 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
54876 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
54877 switch (TYPE_MODE (TREE_TYPE (t)))
54878 {
54879 case QImode:
54880 case HImode:
54881 case SImode:
54882 case DImode:
54883 case SFmode:
54884 case DFmode:
54885 /* case SCmode: */
54886 /* case DCmode: */
54887 break;
54888 default:
54889 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
54890 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
54891 return 0;
54892 }
54893
54894 if (clonei->cilk_elemental)
54895 {
54896 /* Parse here processor clause. If not present, default to 'b'. */
54897 clonei->vecsize_mangle = 'b';
54898 }
54899 else if (!TREE_PUBLIC (node->decl))
54900 {
54901 /* If the function isn't exported, we can pick up just one ISA
54902 for the clones. */
54903 if (TARGET_AVX512F)
54904 clonei->vecsize_mangle = 'e';
54905 else if (TARGET_AVX2)
54906 clonei->vecsize_mangle = 'd';
54907 else if (TARGET_AVX)
54908 clonei->vecsize_mangle = 'c';
54909 else
54910 clonei->vecsize_mangle = 'b';
54911 ret = 1;
54912 }
54913 else
54914 {
54915 clonei->vecsize_mangle = "bcde"[num];
54916 ret = 4;
54917 }
54918 clonei->mask_mode = VOIDmode;
54919 switch (clonei->vecsize_mangle)
54920 {
54921 case 'b':
54922 clonei->vecsize_int = 128;
54923 clonei->vecsize_float = 128;
54924 break;
54925 case 'c':
54926 clonei->vecsize_int = 128;
54927 clonei->vecsize_float = 256;
54928 break;
54929 case 'd':
54930 clonei->vecsize_int = 256;
54931 clonei->vecsize_float = 256;
54932 break;
54933 case 'e':
54934 clonei->vecsize_int = 512;
54935 clonei->vecsize_float = 512;
54936 if (TYPE_MODE (base_type) == QImode)
54937 clonei->mask_mode = DImode;
54938 else
54939 clonei->mask_mode = SImode;
54940 break;
54941 }
54942 if (clonei->simdlen == 0)
54943 {
54944 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
54945 clonei->simdlen = clonei->vecsize_int;
54946 else
54947 clonei->simdlen = clonei->vecsize_float;
54948 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
54949 }
54950 else if (clonei->simdlen > 16)
54951 {
54952 /* For compatibility with ICC, use the same upper bounds
54953 for simdlen. In particular, for CTYPE below, use the return type,
54954 unless the function returns void, in that case use the characteristic
54955 type. If it is possible for given SIMDLEN to pass CTYPE value
54956 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
54957 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
54958 emit corresponding clone. */
54959 tree ctype = ret_type;
54960 if (TREE_CODE (ret_type) == VOID_TYPE)
54961 ctype = base_type;
54962 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
54963 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
54964 cnt /= clonei->vecsize_int;
54965 else
54966 cnt /= clonei->vecsize_float;
54967 if (cnt > (TARGET_64BIT ? 16 : 8))
54968 {
54969 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
54970 "unsupported simdlen %d", clonei->simdlen);
54971 return 0;
54972 }
54973 }
54974 return ret;
54975 }
54976
54977 /* Add target attribute to SIMD clone NODE if needed. */
54978
54979 static void
54980 ix86_simd_clone_adjust (struct cgraph_node *node)
54981 {
54982 const char *str = NULL;
54983 gcc_assert (node->decl == cfun->decl);
54984 switch (node->simdclone->vecsize_mangle)
54985 {
54986 case 'b':
54987 if (!TARGET_SSE2)
54988 str = "sse2";
54989 break;
54990 case 'c':
54991 if (!TARGET_AVX)
54992 str = "avx";
54993 break;
54994 case 'd':
54995 if (!TARGET_AVX2)
54996 str = "avx2";
54997 break;
54998 case 'e':
54999 if (!TARGET_AVX512F)
55000 str = "avx512f";
55001 break;
55002 default:
55003 gcc_unreachable ();
55004 }
55005 if (str == NULL)
55006 return;
55007 push_cfun (NULL);
55008 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
55009 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
55010 gcc_assert (ok);
55011 pop_cfun ();
55012 ix86_reset_previous_fndecl ();
55013 ix86_set_current_function (node->decl);
55014 }
55015
55016 /* If SIMD clone NODE can't be used in a vectorized loop
55017 in current function, return -1, otherwise return a badness of using it
55018 (0 if it is most desirable from vecsize_mangle point of view, 1
55019 slightly less desirable, etc.). */
55020
55021 static int
55022 ix86_simd_clone_usable (struct cgraph_node *node)
55023 {
55024 switch (node->simdclone->vecsize_mangle)
55025 {
55026 case 'b':
55027 if (!TARGET_SSE2)
55028 return -1;
55029 if (!TARGET_AVX)
55030 return 0;
55031 return TARGET_AVX2 ? 2 : 1;
55032 case 'c':
55033 if (!TARGET_AVX)
55034 return -1;
55035 return TARGET_AVX2 ? 1 : 0;
55036 break;
55037 case 'd':
55038 if (!TARGET_AVX2)
55039 return -1;
55040 return 0;
55041 case 'e':
55042 if (!TARGET_AVX512F)
55043 return -1;
55044 return 0;
55045 default:
55046 gcc_unreachable ();
55047 }
55048 }
55049
55050 /* This function adjusts the unroll factor based on
55051 the hardware capabilities. For ex, bdver3 has
55052 a loop buffer which makes unrolling of smaller
55053 loops less important. This function decides the
55054 unroll factor using number of memory references
55055 (value 32 is used) as a heuristic. */
55056
55057 static unsigned
55058 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
55059 {
55060 basic_block *bbs;
55061 rtx_insn *insn;
55062 unsigned i;
55063 unsigned mem_count = 0;
55064
55065 if (!TARGET_ADJUST_UNROLL)
55066 return nunroll;
55067
55068 /* Count the number of memory references within the loop body.
55069 This value determines the unrolling factor for bdver3 and bdver4
55070 architectures. */
55071 subrtx_iterator::array_type array;
55072 bbs = get_loop_body (loop);
55073 for (i = 0; i < loop->num_nodes; i++)
55074 FOR_BB_INSNS (bbs[i], insn)
55075 if (NONDEBUG_INSN_P (insn))
55076 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
55077 if (const_rtx x = *iter)
55078 if (MEM_P (x))
55079 {
55080 machine_mode mode = GET_MODE (x);
55081 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
55082 if (n_words > 4)
55083 mem_count += 2;
55084 else
55085 mem_count += 1;
55086 }
55087 free (bbs);
55088
55089 if (mem_count && mem_count <=32)
55090 return 32/mem_count;
55091
55092 return nunroll;
55093 }
55094
55095
55096 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
55097
55098 static bool
55099 ix86_float_exceptions_rounding_supported_p (void)
55100 {
55101 /* For x87 floating point with standard excess precision handling,
55102 there is no adddf3 pattern (since x87 floating point only has
55103 XFmode operations) so the default hook implementation gets this
55104 wrong. */
55105 return TARGET_80387 || TARGET_SSE_MATH;
55106 }
55107
55108 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
55109
55110 static void
55111 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
55112 {
55113 if (!TARGET_80387 && !TARGET_SSE_MATH)
55114 return;
55115 tree exceptions_var = create_tmp_var_raw (integer_type_node);
55116 if (TARGET_80387)
55117 {
55118 tree fenv_index_type = build_index_type (size_int (6));
55119 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
55120 tree fenv_var = create_tmp_var_raw (fenv_type);
55121 TREE_ADDRESSABLE (fenv_var) = 1;
55122 tree fenv_ptr = build_pointer_type (fenv_type);
55123 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
55124 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
55125 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
55126 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
55127 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
55128 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
55129 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
55130 tree hold_fnclex = build_call_expr (fnclex, 0);
55131 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
55132 NULL_TREE, NULL_TREE);
55133 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
55134 hold_fnclex);
55135 *clear = build_call_expr (fnclex, 0);
55136 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
55137 tree fnstsw_call = build_call_expr (fnstsw, 0);
55138 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
55139 sw_var, fnstsw_call);
55140 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
55141 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
55142 exceptions_var, exceptions_x87);
55143 *update = build2 (COMPOUND_EXPR, integer_type_node,
55144 sw_mod, update_mod);
55145 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
55146 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
55147 }
55148 if (TARGET_SSE_MATH)
55149 {
55150 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
55151 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
55152 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
55153 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
55154 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
55155 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
55156 mxcsr_orig_var, stmxcsr_hold_call);
55157 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
55158 mxcsr_orig_var,
55159 build_int_cst (unsigned_type_node, 0x1f80));
55160 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
55161 build_int_cst (unsigned_type_node, 0xffffffc0));
55162 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
55163 mxcsr_mod_var, hold_mod_val);
55164 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
55165 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
55166 hold_assign_orig, hold_assign_mod);
55167 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
55168 ldmxcsr_hold_call);
55169 if (*hold)
55170 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
55171 else
55172 *hold = hold_all;
55173 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
55174 if (*clear)
55175 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
55176 ldmxcsr_clear_call);
55177 else
55178 *clear = ldmxcsr_clear_call;
55179 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
55180 tree exceptions_sse = fold_convert (integer_type_node,
55181 stxmcsr_update_call);
55182 if (*update)
55183 {
55184 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
55185 exceptions_var, exceptions_sse);
55186 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
55187 exceptions_var, exceptions_mod);
55188 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
55189 exceptions_assign);
55190 }
55191 else
55192 *update = build2 (MODIFY_EXPR, integer_type_node,
55193 exceptions_var, exceptions_sse);
55194 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
55195 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
55196 ldmxcsr_update_call);
55197 }
55198 tree atomic_feraiseexcept
55199 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
55200 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
55201 1, exceptions_var);
55202 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
55203 atomic_feraiseexcept_call);
55204 }
55205
55206 /* Return mode to be used for bounds or VOIDmode
55207 if bounds are not supported. */
55208
55209 static enum machine_mode
55210 ix86_mpx_bound_mode ()
55211 {
55212 /* Do not support pointer checker if MPX
55213 is not enabled. */
55214 if (!TARGET_MPX)
55215 {
55216 if (flag_check_pointer_bounds)
55217 warning (0, "Pointer Checker requires MPX support on this target."
55218 " Use -mmpx options to enable MPX.");
55219 return VOIDmode;
55220 }
55221
55222 return BNDmode;
55223 }
55224
55225 /* Return constant used to statically initialize constant bounds.
55226
55227 This function is used to create special bound values. For now
55228 only INIT bounds and NONE bounds are expected. More special
55229 values may be added later. */
55230
55231 static tree
55232 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
55233 {
55234 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
55235 : build_zero_cst (pointer_sized_int_node);
55236 tree high = ub ? build_zero_cst (pointer_sized_int_node)
55237 : build_minus_one_cst (pointer_sized_int_node);
55238
55239 /* This function is supposed to be used to create INIT and
55240 NONE bounds only. */
55241 gcc_assert ((lb == 0 && ub == -1)
55242 || (lb == -1 && ub == 0));
55243
55244 return build_complex (NULL, low, high);
55245 }
55246
55247 /* Generate a list of statements STMTS to initialize pointer bounds
55248 variable VAR with bounds LB and UB. Return the number of generated
55249 statements. */
55250
55251 static int
55252 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
55253 {
55254 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
55255 tree lhs, modify, var_p;
55256
55257 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
55258 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
55259
55260 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
55261 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
55262 append_to_statement_list (modify, stmts);
55263
55264 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
55265 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
55266 TYPE_SIZE_UNIT (pointer_sized_int_node)));
55267 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
55268 append_to_statement_list (modify, stmts);
55269
55270 return 2;
55271 }
55272
55273 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
55274 /* For i386, common symbol is local only for non-PIE binaries. For
55275 x86-64, common symbol is local only for non-PIE binaries or linker
55276 supports copy reloc in PIE binaries. */
55277
55278 static bool
55279 ix86_binds_local_p (const_tree exp)
55280 {
55281 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
55282 (!flag_pic
55283 || (TARGET_64BIT
55284 && HAVE_LD_PIE_COPYRELOC != 0)));
55285 }
55286 #endif
55287
55288 /* If MEM is in the form of [base+offset], extract the two parts
55289 of address and set to BASE and OFFSET, otherwise return false. */
55290
55291 static bool
55292 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
55293 {
55294 rtx addr;
55295
55296 gcc_assert (MEM_P (mem));
55297
55298 addr = XEXP (mem, 0);
55299
55300 if (GET_CODE (addr) == CONST)
55301 addr = XEXP (addr, 0);
55302
55303 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
55304 {
55305 *base = addr;
55306 *offset = const0_rtx;
55307 return true;
55308 }
55309
55310 if (GET_CODE (addr) == PLUS
55311 && (REG_P (XEXP (addr, 0))
55312 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
55313 && CONST_INT_P (XEXP (addr, 1)))
55314 {
55315 *base = XEXP (addr, 0);
55316 *offset = XEXP (addr, 1);
55317 return true;
55318 }
55319
55320 return false;
55321 }
55322
55323 /* Given OPERANDS of consecutive load/store, check if we can merge
55324 them into move multiple. LOAD is true if they are load instructions.
55325 MODE is the mode of memory operands. */
55326
55327 bool
55328 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
55329 enum machine_mode mode)
55330 {
55331 HOST_WIDE_INT offval_1, offval_2, msize;
55332 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
55333
55334 if (load)
55335 {
55336 mem_1 = operands[1];
55337 mem_2 = operands[3];
55338 reg_1 = operands[0];
55339 reg_2 = operands[2];
55340 }
55341 else
55342 {
55343 mem_1 = operands[0];
55344 mem_2 = operands[2];
55345 reg_1 = operands[1];
55346 reg_2 = operands[3];
55347 }
55348
55349 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
55350
55351 if (REGNO (reg_1) != REGNO (reg_2))
55352 return false;
55353
55354 /* Check if the addresses are in the form of [base+offset]. */
55355 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
55356 return false;
55357 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
55358 return false;
55359
55360 /* Check if the bases are the same. */
55361 if (!rtx_equal_p (base_1, base_2))
55362 return false;
55363
55364 offval_1 = INTVAL (offset_1);
55365 offval_2 = INTVAL (offset_2);
55366 msize = GET_MODE_SIZE (mode);
55367 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
55368 if (offval_1 + msize != offval_2)
55369 return false;
55370
55371 return true;
55372 }
55373
55374 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
55375
55376 static bool
55377 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
55378 optimization_type opt_type)
55379 {
55380 switch (op)
55381 {
55382 case asin_optab:
55383 case acos_optab:
55384 case log1p_optab:
55385 case exp_optab:
55386 case exp10_optab:
55387 case exp2_optab:
55388 case expm1_optab:
55389 case ldexp_optab:
55390 case scalb_optab:
55391 case round_optab:
55392 return opt_type == OPTIMIZE_FOR_SPEED;
55393
55394 case rint_optab:
55395 if (SSE_FLOAT_MODE_P (mode1)
55396 && TARGET_SSE_MATH
55397 && !flag_trapping_math
55398 && !TARGET_ROUND)
55399 return opt_type == OPTIMIZE_FOR_SPEED;
55400 return true;
55401
55402 case floor_optab:
55403 case ceil_optab:
55404 case btrunc_optab:
55405 if (SSE_FLOAT_MODE_P (mode1)
55406 && TARGET_SSE_MATH
55407 && !flag_trapping_math
55408 && TARGET_ROUND)
55409 return true;
55410 return opt_type == OPTIMIZE_FOR_SPEED;
55411
55412 case rsqrt_optab:
55413 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
55414
55415 default:
55416 return true;
55417 }
55418 }
55419
55420 /* Address space support.
55421
55422 This is not "far pointers" in the 16-bit sense, but an easy way
55423 to use %fs and %gs segment prefixes. Therefore:
55424
55425 (a) All address spaces have the same modes,
55426 (b) All address spaces have the same addresss forms,
55427 (c) While %fs and %gs are technically subsets of the generic
55428 address space, they are probably not subsets of each other.
55429 (d) Since we have no access to the segment base register values
55430 without resorting to a system call, we cannot convert a
55431 non-default address space to a default address space.
55432 Therefore we do not claim %fs or %gs are subsets of generic.
55433
55434 Therefore we can (mostly) use the default hooks. */
55435
55436 /* All use of segmentation is assumed to make address 0 valid. */
55437
55438 static bool
55439 ix86_addr_space_zero_address_valid (addr_space_t as)
55440 {
55441 return as != ADDR_SPACE_GENERIC;
55442 }
55443 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
55444 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
55445
55446 /* Initialize the GCC target structure. */
55447 #undef TARGET_RETURN_IN_MEMORY
55448 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
55449
55450 #undef TARGET_LEGITIMIZE_ADDRESS
55451 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
55452
55453 #undef TARGET_ATTRIBUTE_TABLE
55454 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
55455 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
55456 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
55457 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
55458 # undef TARGET_MERGE_DECL_ATTRIBUTES
55459 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
55460 #endif
55461
55462 #undef TARGET_COMP_TYPE_ATTRIBUTES
55463 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
55464
55465 #undef TARGET_INIT_BUILTINS
55466 #define TARGET_INIT_BUILTINS ix86_init_builtins
55467 #undef TARGET_BUILTIN_DECL
55468 #define TARGET_BUILTIN_DECL ix86_builtin_decl
55469 #undef TARGET_EXPAND_BUILTIN
55470 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
55471
55472 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
55473 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
55474 ix86_builtin_vectorized_function
55475
55476 #undef TARGET_VECTORIZE_BUILTIN_GATHER
55477 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
55478
55479 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
55480 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
55481
55482 #undef TARGET_BUILTIN_RECIPROCAL
55483 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
55484
55485 #undef TARGET_ASM_FUNCTION_EPILOGUE
55486 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
55487
55488 #undef TARGET_ENCODE_SECTION_INFO
55489 #ifndef SUBTARGET_ENCODE_SECTION_INFO
55490 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
55491 #else
55492 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
55493 #endif
55494
55495 #undef TARGET_ASM_OPEN_PAREN
55496 #define TARGET_ASM_OPEN_PAREN ""
55497 #undef TARGET_ASM_CLOSE_PAREN
55498 #define TARGET_ASM_CLOSE_PAREN ""
55499
55500 #undef TARGET_ASM_BYTE_OP
55501 #define TARGET_ASM_BYTE_OP ASM_BYTE
55502
55503 #undef TARGET_ASM_ALIGNED_HI_OP
55504 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
55505 #undef TARGET_ASM_ALIGNED_SI_OP
55506 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
55507 #ifdef ASM_QUAD
55508 #undef TARGET_ASM_ALIGNED_DI_OP
55509 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
55510 #endif
55511
55512 #undef TARGET_PROFILE_BEFORE_PROLOGUE
55513 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
55514
55515 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
55516 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
55517
55518 #undef TARGET_ASM_UNALIGNED_HI_OP
55519 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
55520 #undef TARGET_ASM_UNALIGNED_SI_OP
55521 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
55522 #undef TARGET_ASM_UNALIGNED_DI_OP
55523 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
55524
55525 #undef TARGET_PRINT_OPERAND
55526 #define TARGET_PRINT_OPERAND ix86_print_operand
55527 #undef TARGET_PRINT_OPERAND_ADDRESS
55528 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
55529 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
55530 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
55531 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
55532 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
55533
55534 #undef TARGET_SCHED_INIT_GLOBAL
55535 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
55536 #undef TARGET_SCHED_ADJUST_COST
55537 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
55538 #undef TARGET_SCHED_ISSUE_RATE
55539 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
55540 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
55541 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
55542 ia32_multipass_dfa_lookahead
55543 #undef TARGET_SCHED_MACRO_FUSION_P
55544 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
55545 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
55546 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
55547
55548 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
55549 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
55550
55551 #undef TARGET_MEMMODEL_CHECK
55552 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
55553
55554 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
55555 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
55556
55557 #ifdef HAVE_AS_TLS
55558 #undef TARGET_HAVE_TLS
55559 #define TARGET_HAVE_TLS true
55560 #endif
55561 #undef TARGET_CANNOT_FORCE_CONST_MEM
55562 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
55563 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
55564 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
55565
55566 #undef TARGET_DELEGITIMIZE_ADDRESS
55567 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
55568
55569 #undef TARGET_MS_BITFIELD_LAYOUT_P
55570 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
55571
55572 #if TARGET_MACHO
55573 #undef TARGET_BINDS_LOCAL_P
55574 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
55575 #else
55576 #undef TARGET_BINDS_LOCAL_P
55577 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
55578 #endif
55579 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
55580 #undef TARGET_BINDS_LOCAL_P
55581 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
55582 #endif
55583
55584 #undef TARGET_ASM_OUTPUT_MI_THUNK
55585 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
55586 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
55587 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
55588
55589 #undef TARGET_ASM_FILE_START
55590 #define TARGET_ASM_FILE_START x86_file_start
55591
55592 #undef TARGET_OPTION_OVERRIDE
55593 #define TARGET_OPTION_OVERRIDE ix86_option_override
55594
55595 #undef TARGET_REGISTER_MOVE_COST
55596 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
55597 #undef TARGET_MEMORY_MOVE_COST
55598 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
55599 #undef TARGET_RTX_COSTS
55600 #define TARGET_RTX_COSTS ix86_rtx_costs
55601 #undef TARGET_ADDRESS_COST
55602 #define TARGET_ADDRESS_COST ix86_address_cost
55603
55604 #undef TARGET_FIXED_CONDITION_CODE_REGS
55605 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
55606 #undef TARGET_CC_MODES_COMPATIBLE
55607 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
55608
55609 #undef TARGET_MACHINE_DEPENDENT_REORG
55610 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
55611
55612 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
55613 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
55614
55615 #undef TARGET_BUILD_BUILTIN_VA_LIST
55616 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
55617
55618 #undef TARGET_FOLD_BUILTIN
55619 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
55620
55621 #undef TARGET_COMPARE_VERSION_PRIORITY
55622 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
55623
55624 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
55625 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
55626 ix86_generate_version_dispatcher_body
55627
55628 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
55629 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
55630 ix86_get_function_versions_dispatcher
55631
55632 #undef TARGET_ENUM_VA_LIST_P
55633 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
55634
55635 #undef TARGET_FN_ABI_VA_LIST
55636 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
55637
55638 #undef TARGET_CANONICAL_VA_LIST_TYPE
55639 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
55640
55641 #undef TARGET_EXPAND_BUILTIN_VA_START
55642 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
55643
55644 #undef TARGET_MD_ASM_ADJUST
55645 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
55646
55647 #undef TARGET_PROMOTE_PROTOTYPES
55648 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
55649 #undef TARGET_SETUP_INCOMING_VARARGS
55650 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
55651 #undef TARGET_MUST_PASS_IN_STACK
55652 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
55653 #undef TARGET_FUNCTION_ARG_ADVANCE
55654 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
55655 #undef TARGET_FUNCTION_ARG
55656 #define TARGET_FUNCTION_ARG ix86_function_arg
55657 #undef TARGET_INIT_PIC_REG
55658 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
55659 #undef TARGET_USE_PSEUDO_PIC_REG
55660 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
55661 #undef TARGET_FUNCTION_ARG_BOUNDARY
55662 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
55663 #undef TARGET_PASS_BY_REFERENCE
55664 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
55665 #undef TARGET_INTERNAL_ARG_POINTER
55666 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
55667 #undef TARGET_UPDATE_STACK_BOUNDARY
55668 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
55669 #undef TARGET_GET_DRAP_RTX
55670 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
55671 #undef TARGET_STRICT_ARGUMENT_NAMING
55672 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
55673 #undef TARGET_STATIC_CHAIN
55674 #define TARGET_STATIC_CHAIN ix86_static_chain
55675 #undef TARGET_TRAMPOLINE_INIT
55676 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
55677 #undef TARGET_RETURN_POPS_ARGS
55678 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
55679
55680 #undef TARGET_LEGITIMATE_COMBINED_INSN
55681 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
55682
55683 #undef TARGET_ASAN_SHADOW_OFFSET
55684 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
55685
55686 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
55687 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
55688
55689 #undef TARGET_SCALAR_MODE_SUPPORTED_P
55690 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
55691
55692 #undef TARGET_VECTOR_MODE_SUPPORTED_P
55693 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
55694
55695 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
55696 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
55697 ix86_libgcc_floating_mode_supported_p
55698
55699 #undef TARGET_C_MODE_FOR_SUFFIX
55700 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
55701
55702 #ifdef HAVE_AS_TLS
55703 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
55704 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
55705 #endif
55706
55707 #ifdef SUBTARGET_INSERT_ATTRIBUTES
55708 #undef TARGET_INSERT_ATTRIBUTES
55709 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
55710 #endif
55711
55712 #undef TARGET_MANGLE_TYPE
55713 #define TARGET_MANGLE_TYPE ix86_mangle_type
55714
55715 #if !TARGET_MACHO
55716 #undef TARGET_STACK_PROTECT_FAIL
55717 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
55718 #endif
55719
55720 #undef TARGET_FUNCTION_VALUE
55721 #define TARGET_FUNCTION_VALUE ix86_function_value
55722
55723 #undef TARGET_FUNCTION_VALUE_REGNO_P
55724 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
55725
55726 #undef TARGET_PROMOTE_FUNCTION_MODE
55727 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
55728
55729 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
55730 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
55731
55732 #undef TARGET_MEMBER_TYPE_FORCES_BLK
55733 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
55734
55735 #undef TARGET_INSTANTIATE_DECLS
55736 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
55737
55738 #undef TARGET_SECONDARY_RELOAD
55739 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
55740
55741 #undef TARGET_CLASS_MAX_NREGS
55742 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
55743
55744 #undef TARGET_PREFERRED_RELOAD_CLASS
55745 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
55746 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
55747 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
55748 #undef TARGET_CLASS_LIKELY_SPILLED_P
55749 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
55750
55751 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
55752 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
55753 ix86_builtin_vectorization_cost
55754 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
55755 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
55756 ix86_vectorize_vec_perm_const_ok
55757 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
55758 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
55759 ix86_preferred_simd_mode
55760 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
55761 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
55762 ix86_autovectorize_vector_sizes
55763 #undef TARGET_VECTORIZE_GET_MASK_MODE
55764 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
55765 #undef TARGET_VECTORIZE_INIT_COST
55766 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
55767 #undef TARGET_VECTORIZE_ADD_STMT_COST
55768 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
55769 #undef TARGET_VECTORIZE_FINISH_COST
55770 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
55771 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
55772 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
55773
55774 #undef TARGET_SET_CURRENT_FUNCTION
55775 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
55776
55777 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
55778 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
55779
55780 #undef TARGET_OPTION_SAVE
55781 #define TARGET_OPTION_SAVE ix86_function_specific_save
55782
55783 #undef TARGET_OPTION_RESTORE
55784 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
55785
55786 #undef TARGET_OPTION_POST_STREAM_IN
55787 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
55788
55789 #undef TARGET_OPTION_PRINT
55790 #define TARGET_OPTION_PRINT ix86_function_specific_print
55791
55792 #undef TARGET_OPTION_FUNCTION_VERSIONS
55793 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
55794
55795 #undef TARGET_CAN_INLINE_P
55796 #define TARGET_CAN_INLINE_P ix86_can_inline_p
55797
55798 #undef TARGET_LEGITIMATE_ADDRESS_P
55799 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
55800
55801 #undef TARGET_LRA_P
55802 #define TARGET_LRA_P hook_bool_void_true
55803
55804 #undef TARGET_REGISTER_PRIORITY
55805 #define TARGET_REGISTER_PRIORITY ix86_register_priority
55806
55807 #undef TARGET_REGISTER_USAGE_LEVELING_P
55808 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
55809
55810 #undef TARGET_LEGITIMATE_CONSTANT_P
55811 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
55812
55813 #undef TARGET_FRAME_POINTER_REQUIRED
55814 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
55815
55816 #undef TARGET_CAN_ELIMINATE
55817 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
55818
55819 #undef TARGET_EXTRA_LIVE_ON_ENTRY
55820 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
55821
55822 #undef TARGET_ASM_CODE_END
55823 #define TARGET_ASM_CODE_END ix86_code_end
55824
55825 #undef TARGET_CONDITIONAL_REGISTER_USAGE
55826 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
55827
55828 #if TARGET_MACHO
55829 #undef TARGET_INIT_LIBFUNCS
55830 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
55831 #endif
55832
55833 #undef TARGET_LOOP_UNROLL_ADJUST
55834 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
55835
55836 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
55837 #undef TARGET_SPILL_CLASS
55838 #define TARGET_SPILL_CLASS ix86_spill_class
55839
55840 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
55841 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
55842 ix86_simd_clone_compute_vecsize_and_simdlen
55843
55844 #undef TARGET_SIMD_CLONE_ADJUST
55845 #define TARGET_SIMD_CLONE_ADJUST \
55846 ix86_simd_clone_adjust
55847
55848 #undef TARGET_SIMD_CLONE_USABLE
55849 #define TARGET_SIMD_CLONE_USABLE \
55850 ix86_simd_clone_usable
55851
55852 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
55853 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
55854 ix86_float_exceptions_rounding_supported_p
55855
55856 #undef TARGET_MODE_EMIT
55857 #define TARGET_MODE_EMIT ix86_emit_mode_set
55858
55859 #undef TARGET_MODE_NEEDED
55860 #define TARGET_MODE_NEEDED ix86_mode_needed
55861
55862 #undef TARGET_MODE_AFTER
55863 #define TARGET_MODE_AFTER ix86_mode_after
55864
55865 #undef TARGET_MODE_ENTRY
55866 #define TARGET_MODE_ENTRY ix86_mode_entry
55867
55868 #undef TARGET_MODE_EXIT
55869 #define TARGET_MODE_EXIT ix86_mode_exit
55870
55871 #undef TARGET_MODE_PRIORITY
55872 #define TARGET_MODE_PRIORITY ix86_mode_priority
55873
55874 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
55875 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
55876
55877 #undef TARGET_LOAD_BOUNDS_FOR_ARG
55878 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
55879
55880 #undef TARGET_STORE_BOUNDS_FOR_ARG
55881 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
55882
55883 #undef TARGET_LOAD_RETURNED_BOUNDS
55884 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
55885
55886 #undef TARGET_STORE_RETURNED_BOUNDS
55887 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
55888
55889 #undef TARGET_CHKP_BOUND_MODE
55890 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
55891
55892 #undef TARGET_BUILTIN_CHKP_FUNCTION
55893 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
55894
55895 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
55896 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
55897
55898 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
55899 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
55900
55901 #undef TARGET_CHKP_INITIALIZE_BOUNDS
55902 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
55903
55904 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
55905 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
55906
55907 #undef TARGET_OFFLOAD_OPTIONS
55908 #define TARGET_OFFLOAD_OPTIONS \
55909 ix86_offload_options
55910
55911 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
55912 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
55913
55914 #undef TARGET_OPTAB_SUPPORTED_P
55915 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
55916
55917 #undef TARGET_HARD_REGNO_SCRATCH_OK
55918 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
55919
55920 struct gcc_target targetm = TARGET_INITIALIZER;
55921 \f
55922 #include "gt-i386.h"