]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
expr.h: Remove prototypes of functions defined in builtins.c.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87 #include "builtins.h"
88
89 static rtx legitimize_dllimport_symbol (rtx, bool);
90 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
91 static rtx legitimize_pe_coff_symbol (rtx, bool);
92
93 #ifndef CHECK_STACK_LIMIT
94 #define CHECK_STACK_LIMIT (-1)
95 #endif
96
97 /* Return index of given mode in mult and division cost tables. */
98 #define MODE_INDEX(mode) \
99 ((mode) == QImode ? 0 \
100 : (mode) == HImode ? 1 \
101 : (mode) == SImode ? 2 \
102 : (mode) == DImode ? 3 \
103 : 4)
104
105 /* Processor costs (relative to an add) */
106 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
107 #define COSTS_N_BYTES(N) ((N) * 2)
108
109 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110
111 static stringop_algs ix86_size_memcpy[2] = {
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 static stringop_algs ix86_size_memset[2] = {
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
116 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117
118 const
119 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
120 COSTS_N_BYTES (2), /* cost of an add instruction */
121 COSTS_N_BYTES (3), /* cost of a lea instruction */
122 COSTS_N_BYTES (2), /* variable shift costs */
123 COSTS_N_BYTES (3), /* constant shift costs */
124 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 0, /* cost of multiply per each bit set */
130 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
131 COSTS_N_BYTES (3), /* HI */
132 COSTS_N_BYTES (3), /* SI */
133 COSTS_N_BYTES (3), /* DI */
134 COSTS_N_BYTES (5)}, /* other */
135 COSTS_N_BYTES (3), /* cost of movsx */
136 COSTS_N_BYTES (3), /* cost of movzx */
137 0, /* "large" insn */
138 2, /* MOVE_RATIO */
139 2, /* cost for loading QImode using movzbl */
140 {2, 2, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 2, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {2, 2, 2}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {2, 2, 2}, /* cost of storing fp registers
148 in SFmode, DFmode and XFmode */
149 3, /* cost of moving MMX register */
150 {3, 3}, /* cost of loading MMX registers
151 in SImode and DImode */
152 {3, 3}, /* cost of storing MMX registers
153 in SImode and DImode */
154 3, /* cost of moving SSE register */
155 {3, 3, 3}, /* cost of loading SSE registers
156 in SImode, DImode and TImode */
157 {3, 3, 3}, /* cost of storing SSE registers
158 in SImode, DImode and TImode */
159 3, /* MMX or SSE register to integer */
160 0, /* size of l1 cache */
161 0, /* size of l2 cache */
162 0, /* size of prefetch block */
163 0, /* number of parallel prefetches */
164 2, /* Branch cost */
165 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
166 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
167 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
168 COSTS_N_BYTES (2), /* cost of FABS instruction. */
169 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
170 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
171 ix86_size_memcpy,
172 ix86_size_memset,
173 1, /* scalar_stmt_cost. */
174 1, /* scalar load_cost. */
175 1, /* scalar_store_cost. */
176 1, /* vec_stmt_cost. */
177 1, /* vec_to_scalar_cost. */
178 1, /* scalar_to_vec_cost. */
179 1, /* vec_align_load_cost. */
180 1, /* vec_unalign_load_cost. */
181 1, /* vec_store_cost. */
182 1, /* cond_taken_branch_cost. */
183 1, /* cond_not_taken_branch_cost. */
184 };
185
186 /* Processor costs (relative to an add) */
187 static stringop_algs i386_memcpy[2] = {
188 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
189 DUMMY_STRINGOP_ALGS};
190 static stringop_algs i386_memset[2] = {
191 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
192 DUMMY_STRINGOP_ALGS};
193
194 static const
195 struct processor_costs i386_cost = { /* 386 specific costs */
196 COSTS_N_INSNS (1), /* cost of an add instruction */
197 COSTS_N_INSNS (1), /* cost of a lea instruction */
198 COSTS_N_INSNS (3), /* variable shift costs */
199 COSTS_N_INSNS (2), /* constant shift costs */
200 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
201 COSTS_N_INSNS (6), /* HI */
202 COSTS_N_INSNS (6), /* SI */
203 COSTS_N_INSNS (6), /* DI */
204 COSTS_N_INSNS (6)}, /* other */
205 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
206 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
207 COSTS_N_INSNS (23), /* HI */
208 COSTS_N_INSNS (23), /* SI */
209 COSTS_N_INSNS (23), /* DI */
210 COSTS_N_INSNS (23)}, /* other */
211 COSTS_N_INSNS (3), /* cost of movsx */
212 COSTS_N_INSNS (2), /* cost of movzx */
213 15, /* "large" insn */
214 3, /* MOVE_RATIO */
215 4, /* cost for loading QImode using movzbl */
216 {2, 4, 2}, /* cost of loading integer registers
217 in QImode, HImode and SImode.
218 Relative to reg-reg move (2). */
219 {2, 4, 2}, /* cost of storing integer registers */
220 2, /* cost of reg,reg fld/fst */
221 {8, 8, 8}, /* cost of loading fp registers
222 in SFmode, DFmode and XFmode */
223 {8, 8, 8}, /* cost of storing fp registers
224 in SFmode, DFmode and XFmode */
225 2, /* cost of moving MMX register */
226 {4, 8}, /* cost of loading MMX registers
227 in SImode and DImode */
228 {4, 8}, /* cost of storing MMX registers
229 in SImode and DImode */
230 2, /* cost of moving SSE register */
231 {4, 8, 16}, /* cost of loading SSE registers
232 in SImode, DImode and TImode */
233 {4, 8, 16}, /* cost of storing SSE registers
234 in SImode, DImode and TImode */
235 3, /* MMX or SSE register to integer */
236 0, /* size of l1 cache */
237 0, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (22), /* cost of FABS instruction. */
245 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
247 i386_memcpy,
248 i386_memset,
249 1, /* scalar_stmt_cost. */
250 1, /* scalar load_cost. */
251 1, /* scalar_store_cost. */
252 1, /* vec_stmt_cost. */
253 1, /* vec_to_scalar_cost. */
254 1, /* scalar_to_vec_cost. */
255 1, /* vec_align_load_cost. */
256 2, /* vec_unalign_load_cost. */
257 1, /* vec_store_cost. */
258 3, /* cond_taken_branch_cost. */
259 1, /* cond_not_taken_branch_cost. */
260 };
261
262 static stringop_algs i486_memcpy[2] = {
263 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
264 DUMMY_STRINGOP_ALGS};
265 static stringop_algs i486_memset[2] = {
266 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
267 DUMMY_STRINGOP_ALGS};
268
269 static const
270 struct processor_costs i486_cost = { /* 486 specific costs */
271 COSTS_N_INSNS (1), /* cost of an add instruction */
272 COSTS_N_INSNS (1), /* cost of a lea instruction */
273 COSTS_N_INSNS (3), /* variable shift costs */
274 COSTS_N_INSNS (2), /* constant shift costs */
275 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
276 COSTS_N_INSNS (12), /* HI */
277 COSTS_N_INSNS (12), /* SI */
278 COSTS_N_INSNS (12), /* DI */
279 COSTS_N_INSNS (12)}, /* other */
280 1, /* cost of multiply per each bit set */
281 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
282 COSTS_N_INSNS (40), /* HI */
283 COSTS_N_INSNS (40), /* SI */
284 COSTS_N_INSNS (40), /* DI */
285 COSTS_N_INSNS (40)}, /* other */
286 COSTS_N_INSNS (3), /* cost of movsx */
287 COSTS_N_INSNS (2), /* cost of movzx */
288 15, /* "large" insn */
289 3, /* MOVE_RATIO */
290 4, /* cost for loading QImode using movzbl */
291 {2, 4, 2}, /* cost of loading integer registers
292 in QImode, HImode and SImode.
293 Relative to reg-reg move (2). */
294 {2, 4, 2}, /* cost of storing integer registers */
295 2, /* cost of reg,reg fld/fst */
296 {8, 8, 8}, /* cost of loading fp registers
297 in SFmode, DFmode and XFmode */
298 {8, 8, 8}, /* cost of storing fp registers
299 in SFmode, DFmode and XFmode */
300 2, /* cost of moving MMX register */
301 {4, 8}, /* cost of loading MMX registers
302 in SImode and DImode */
303 {4, 8}, /* cost of storing MMX registers
304 in SImode and DImode */
305 2, /* cost of moving SSE register */
306 {4, 8, 16}, /* cost of loading SSE registers
307 in SImode, DImode and TImode */
308 {4, 8, 16}, /* cost of storing SSE registers
309 in SImode, DImode and TImode */
310 3, /* MMX or SSE register to integer */
311 4, /* size of l1 cache. 486 has 8kB cache
312 shared for code and data, so 4kB is
313 not really precise. */
314 4, /* size of l2 cache */
315 0, /* size of prefetch block */
316 0, /* number of parallel prefetches */
317 1, /* Branch cost */
318 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
319 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
320 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
321 COSTS_N_INSNS (3), /* cost of FABS instruction. */
322 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
323 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
324 i486_memcpy,
325 i486_memset,
326 1, /* scalar_stmt_cost. */
327 1, /* scalar load_cost. */
328 1, /* scalar_store_cost. */
329 1, /* vec_stmt_cost. */
330 1, /* vec_to_scalar_cost. */
331 1, /* scalar_to_vec_cost. */
332 1, /* vec_align_load_cost. */
333 2, /* vec_unalign_load_cost. */
334 1, /* vec_store_cost. */
335 3, /* cond_taken_branch_cost. */
336 1, /* cond_not_taken_branch_cost. */
337 };
338
339 static stringop_algs pentium_memcpy[2] = {
340 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
341 DUMMY_STRINGOP_ALGS};
342 static stringop_algs pentium_memset[2] = {
343 {libcall, {{-1, rep_prefix_4_byte, false}}},
344 DUMMY_STRINGOP_ALGS};
345
346 static const
347 struct processor_costs pentium_cost = {
348 COSTS_N_INSNS (1), /* cost of an add instruction */
349 COSTS_N_INSNS (1), /* cost of a lea instruction */
350 COSTS_N_INSNS (4), /* variable shift costs */
351 COSTS_N_INSNS (1), /* constant shift costs */
352 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
353 COSTS_N_INSNS (11), /* HI */
354 COSTS_N_INSNS (11), /* SI */
355 COSTS_N_INSNS (11), /* DI */
356 COSTS_N_INSNS (11)}, /* other */
357 0, /* cost of multiply per each bit set */
358 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
359 COSTS_N_INSNS (25), /* HI */
360 COSTS_N_INSNS (25), /* SI */
361 COSTS_N_INSNS (25), /* DI */
362 COSTS_N_INSNS (25)}, /* other */
363 COSTS_N_INSNS (3), /* cost of movsx */
364 COSTS_N_INSNS (2), /* cost of movzx */
365 8, /* "large" insn */
366 6, /* MOVE_RATIO */
367 6, /* cost for loading QImode using movzbl */
368 {2, 4, 2}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 4, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of storing fp registers
376 in SFmode, DFmode and XFmode */
377 8, /* cost of moving MMX register */
378 {8, 8}, /* cost of loading MMX registers
379 in SImode and DImode */
380 {8, 8}, /* cost of storing MMX registers
381 in SImode and DImode */
382 2, /* cost of moving SSE register */
383 {4, 8, 16}, /* cost of loading SSE registers
384 in SImode, DImode and TImode */
385 {4, 8, 16}, /* cost of storing SSE registers
386 in SImode, DImode and TImode */
387 3, /* MMX or SSE register to integer */
388 8, /* size of l1 cache. */
389 8, /* size of l2 cache */
390 0, /* size of prefetch block */
391 0, /* number of parallel prefetches */
392 2, /* Branch cost */
393 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
394 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
395 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
396 COSTS_N_INSNS (1), /* cost of FABS instruction. */
397 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
398 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
399 pentium_memcpy,
400 pentium_memset,
401 1, /* scalar_stmt_cost. */
402 1, /* scalar load_cost. */
403 1, /* scalar_store_cost. */
404 1, /* vec_stmt_cost. */
405 1, /* vec_to_scalar_cost. */
406 1, /* scalar_to_vec_cost. */
407 1, /* vec_align_load_cost. */
408 2, /* vec_unalign_load_cost. */
409 1, /* vec_store_cost. */
410 3, /* cond_taken_branch_cost. */
411 1, /* cond_not_taken_branch_cost. */
412 };
413
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
415 (we ensure the alignment). For small blocks inline loop is still a
416 noticeable win, for bigger blocks either rep movsl or rep movsb is
417 way to go. Rep movsb has apparently more expensive startup time in CPU,
418 but after 4K the difference is down in the noise. */
419 static stringop_algs pentiumpro_memcpy[2] = {
420 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
421 {8192, rep_prefix_4_byte, false},
422 {-1, rep_prefix_1_byte, false}}},
423 DUMMY_STRINGOP_ALGS};
424 static stringop_algs pentiumpro_memset[2] = {
425 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
426 {8192, rep_prefix_4_byte, false},
427 {-1, libcall, false}}},
428 DUMMY_STRINGOP_ALGS};
429 static const
430 struct processor_costs pentiumpro_cost = {
431 COSTS_N_INSNS (1), /* cost of an add instruction */
432 COSTS_N_INSNS (1), /* cost of a lea instruction */
433 COSTS_N_INSNS (1), /* variable shift costs */
434 COSTS_N_INSNS (1), /* constant shift costs */
435 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
436 COSTS_N_INSNS (4), /* HI */
437 COSTS_N_INSNS (4), /* SI */
438 COSTS_N_INSNS (4), /* DI */
439 COSTS_N_INSNS (4)}, /* other */
440 0, /* cost of multiply per each bit set */
441 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
442 COSTS_N_INSNS (17), /* HI */
443 COSTS_N_INSNS (17), /* SI */
444 COSTS_N_INSNS (17), /* DI */
445 COSTS_N_INSNS (17)}, /* other */
446 COSTS_N_INSNS (1), /* cost of movsx */
447 COSTS_N_INSNS (1), /* cost of movzx */
448 8, /* "large" insn */
449 6, /* MOVE_RATIO */
450 2, /* cost for loading QImode using movzbl */
451 {4, 4, 4}, /* cost of loading integer registers
452 in QImode, HImode and SImode.
453 Relative to reg-reg move (2). */
454 {2, 2, 2}, /* cost of storing integer registers */
455 2, /* cost of reg,reg fld/fst */
456 {2, 2, 6}, /* cost of loading fp registers
457 in SFmode, DFmode and XFmode */
458 {4, 4, 6}, /* cost of storing fp registers
459 in SFmode, DFmode and XFmode */
460 2, /* cost of moving MMX register */
461 {2, 2}, /* cost of loading MMX registers
462 in SImode and DImode */
463 {2, 2}, /* cost of storing MMX registers
464 in SImode and DImode */
465 2, /* cost of moving SSE register */
466 {2, 2, 8}, /* cost of loading SSE registers
467 in SImode, DImode and TImode */
468 {2, 2, 8}, /* cost of storing SSE registers
469 in SImode, DImode and TImode */
470 3, /* MMX or SSE register to integer */
471 8, /* size of l1 cache. */
472 256, /* size of l2 cache */
473 32, /* size of prefetch block */
474 6, /* number of parallel prefetches */
475 2, /* Branch cost */
476 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
477 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
478 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
479 COSTS_N_INSNS (2), /* cost of FABS instruction. */
480 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
481 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
482 pentiumpro_memcpy,
483 pentiumpro_memset,
484 1, /* scalar_stmt_cost. */
485 1, /* scalar load_cost. */
486 1, /* scalar_store_cost. */
487 1, /* vec_stmt_cost. */
488 1, /* vec_to_scalar_cost. */
489 1, /* scalar_to_vec_cost. */
490 1, /* vec_align_load_cost. */
491 2, /* vec_unalign_load_cost. */
492 1, /* vec_store_cost. */
493 3, /* cond_taken_branch_cost. */
494 1, /* cond_not_taken_branch_cost. */
495 };
496
497 static stringop_algs geode_memcpy[2] = {
498 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
499 DUMMY_STRINGOP_ALGS};
500 static stringop_algs geode_memset[2] = {
501 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static const
504 struct processor_costs geode_cost = {
505 COSTS_N_INSNS (1), /* cost of an add instruction */
506 COSTS_N_INSNS (1), /* cost of a lea instruction */
507 COSTS_N_INSNS (2), /* variable shift costs */
508 COSTS_N_INSNS (1), /* constant shift costs */
509 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
510 COSTS_N_INSNS (4), /* HI */
511 COSTS_N_INSNS (7), /* SI */
512 COSTS_N_INSNS (7), /* DI */
513 COSTS_N_INSNS (7)}, /* other */
514 0, /* cost of multiply per each bit set */
515 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
516 COSTS_N_INSNS (23), /* HI */
517 COSTS_N_INSNS (39), /* SI */
518 COSTS_N_INSNS (39), /* DI */
519 COSTS_N_INSNS (39)}, /* other */
520 COSTS_N_INSNS (1), /* cost of movsx */
521 COSTS_N_INSNS (1), /* cost of movzx */
522 8, /* "large" insn */
523 4, /* MOVE_RATIO */
524 1, /* cost for loading QImode using movzbl */
525 {1, 1, 1}, /* cost of loading integer registers
526 in QImode, HImode and SImode.
527 Relative to reg-reg move (2). */
528 {1, 1, 1}, /* cost of storing integer registers */
529 1, /* cost of reg,reg fld/fst */
530 {1, 1, 1}, /* cost of loading fp registers
531 in SFmode, DFmode and XFmode */
532 {4, 6, 6}, /* cost of storing fp registers
533 in SFmode, DFmode and XFmode */
534
535 1, /* cost of moving MMX register */
536 {1, 1}, /* cost of loading MMX registers
537 in SImode and DImode */
538 {1, 1}, /* cost of storing MMX registers
539 in SImode and DImode */
540 1, /* cost of moving SSE register */
541 {1, 1, 1}, /* cost of loading SSE registers
542 in SImode, DImode and TImode */
543 {1, 1, 1}, /* cost of storing SSE registers
544 in SImode, DImode and TImode */
545 1, /* MMX or SSE register to integer */
546 64, /* size of l1 cache. */
547 128, /* size of l2 cache. */
548 32, /* size of prefetch block */
549 1, /* number of parallel prefetches */
550 1, /* Branch cost */
551 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
552 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
553 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
554 COSTS_N_INSNS (1), /* cost of FABS instruction. */
555 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
556 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
557 geode_memcpy,
558 geode_memset,
559 1, /* scalar_stmt_cost. */
560 1, /* scalar load_cost. */
561 1, /* scalar_store_cost. */
562 1, /* vec_stmt_cost. */
563 1, /* vec_to_scalar_cost. */
564 1, /* scalar_to_vec_cost. */
565 1, /* vec_align_load_cost. */
566 2, /* vec_unalign_load_cost. */
567 1, /* vec_store_cost. */
568 3, /* cond_taken_branch_cost. */
569 1, /* cond_not_taken_branch_cost. */
570 };
571
572 static stringop_algs k6_memcpy[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS};
575 static stringop_algs k6_memset[2] = {
576 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
577 DUMMY_STRINGOP_ALGS};
578 static const
579 struct processor_costs k6_cost = {
580 COSTS_N_INSNS (1), /* cost of an add instruction */
581 COSTS_N_INSNS (2), /* cost of a lea instruction */
582 COSTS_N_INSNS (1), /* variable shift costs */
583 COSTS_N_INSNS (1), /* constant shift costs */
584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
585 COSTS_N_INSNS (3), /* HI */
586 COSTS_N_INSNS (3), /* SI */
587 COSTS_N_INSNS (3), /* DI */
588 COSTS_N_INSNS (3)}, /* other */
589 0, /* cost of multiply per each bit set */
590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
591 COSTS_N_INSNS (18), /* HI */
592 COSTS_N_INSNS (18), /* SI */
593 COSTS_N_INSNS (18), /* DI */
594 COSTS_N_INSNS (18)}, /* other */
595 COSTS_N_INSNS (2), /* cost of movsx */
596 COSTS_N_INSNS (2), /* cost of movzx */
597 8, /* "large" insn */
598 4, /* MOVE_RATIO */
599 3, /* cost for loading QImode using movzbl */
600 {4, 5, 4}, /* cost of loading integer registers
601 in QImode, HImode and SImode.
602 Relative to reg-reg move (2). */
603 {2, 3, 2}, /* cost of storing integer registers */
604 4, /* cost of reg,reg fld/fst */
605 {6, 6, 6}, /* cost of loading fp registers
606 in SFmode, DFmode and XFmode */
607 {4, 4, 4}, /* cost of storing fp registers
608 in SFmode, DFmode and XFmode */
609 2, /* cost of moving MMX register */
610 {2, 2}, /* cost of loading MMX registers
611 in SImode and DImode */
612 {2, 2}, /* cost of storing MMX registers
613 in SImode and DImode */
614 2, /* cost of moving SSE register */
615 {2, 2, 8}, /* cost of loading SSE registers
616 in SImode, DImode and TImode */
617 {2, 2, 8}, /* cost of storing SSE registers
618 in SImode, DImode and TImode */
619 6, /* MMX or SSE register to integer */
620 32, /* size of l1 cache. */
621 32, /* size of l2 cache. Some models
622 have integrated l2 cache, but
623 optimizing for k6 is not important
624 enough to worry about that. */
625 32, /* size of prefetch block */
626 1, /* number of parallel prefetches */
627 1, /* Branch cost */
628 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
634 k6_memcpy,
635 k6_memset,
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
647 };
648
649 /* For some reason, Athlon deals better with REP prefix (relative to loops)
650 compared to K8. Alignment becomes important after 8 bytes for memcpy and
651 128 bytes for memset. */
652 static stringop_algs athlon_memcpy[2] = {
653 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
654 DUMMY_STRINGOP_ALGS};
655 static stringop_algs athlon_memset[2] = {
656 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static const
659 struct processor_costs athlon_cost = {
660 COSTS_N_INSNS (1), /* cost of an add instruction */
661 COSTS_N_INSNS (2), /* cost of a lea instruction */
662 COSTS_N_INSNS (1), /* variable shift costs */
663 COSTS_N_INSNS (1), /* constant shift costs */
664 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
665 COSTS_N_INSNS (5), /* HI */
666 COSTS_N_INSNS (5), /* SI */
667 COSTS_N_INSNS (5), /* DI */
668 COSTS_N_INSNS (5)}, /* other */
669 0, /* cost of multiply per each bit set */
670 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
671 COSTS_N_INSNS (26), /* HI */
672 COSTS_N_INSNS (42), /* SI */
673 COSTS_N_INSNS (74), /* DI */
674 COSTS_N_INSNS (74)}, /* other */
675 COSTS_N_INSNS (1), /* cost of movsx */
676 COSTS_N_INSNS (1), /* cost of movzx */
677 8, /* "large" insn */
678 9, /* MOVE_RATIO */
679 4, /* cost for loading QImode using movzbl */
680 {3, 4, 3}, /* cost of loading integer registers
681 in QImode, HImode and SImode.
682 Relative to reg-reg move (2). */
683 {3, 4, 3}, /* cost of storing integer registers */
684 4, /* cost of reg,reg fld/fst */
685 {4, 4, 12}, /* cost of loading fp registers
686 in SFmode, DFmode and XFmode */
687 {6, 6, 8}, /* cost of storing fp registers
688 in SFmode, DFmode and XFmode */
689 2, /* cost of moving MMX register */
690 {4, 4}, /* cost of loading MMX registers
691 in SImode and DImode */
692 {4, 4}, /* cost of storing MMX registers
693 in SImode and DImode */
694 2, /* cost of moving SSE register */
695 {4, 4, 6}, /* cost of loading SSE registers
696 in SImode, DImode and TImode */
697 {4, 4, 5}, /* cost of storing SSE registers
698 in SImode, DImode and TImode */
699 5, /* MMX or SSE register to integer */
700 64, /* size of l1 cache. */
701 256, /* size of l2 cache. */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 5, /* Branch cost */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 athlon_memcpy,
712 athlon_memset,
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
724 };
725
726 /* K8 has optimized REP instruction for medium sized blocks, but for very
727 small blocks it is better to use loop. For large blocks, libcall can
728 do nontemporary accesses and beat inline considerably. */
729 static stringop_algs k8_memcpy[2] = {
730 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
731 {-1, rep_prefix_4_byte, false}}},
732 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
733 {-1, libcall, false}}}};
734 static stringop_algs k8_memset[2] = {
735 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
736 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
737 {libcall, {{48, unrolled_loop, false},
738 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
739 static const
740 struct processor_costs k8_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (2), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (4), /* HI */
747 COSTS_N_INSNS (3), /* SI */
748 COSTS_N_INSNS (4), /* DI */
749 COSTS_N_INSNS (5)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (26), /* HI */
753 COSTS_N_INSNS (42), /* SI */
754 COSTS_N_INSNS (74), /* DI */
755 COSTS_N_INSNS (74)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 8, /* "large" insn */
759 9, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {3, 4, 3}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {3, 4, 3}, /* cost of storing integer registers */
765 4, /* cost of reg,reg fld/fst */
766 {4, 4, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {6, 6, 8}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 2, /* cost of moving MMX register */
771 {3, 3}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {4, 4}, /* cost of storing MMX registers
774 in SImode and DImode */
775 2, /* cost of moving SSE register */
776 {4, 3, 6}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {4, 4, 5}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 5, /* MMX or SSE register to integer */
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
788 time). */
789 100, /* number of parallel prefetches */
790 3, /* Branch cost */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797
798 k8_memcpy,
799 k8_memset,
800 4, /* scalar_stmt_cost. */
801 2, /* scalar load_cost. */
802 2, /* scalar_store_cost. */
803 5, /* vec_stmt_cost. */
804 0, /* vec_to_scalar_cost. */
805 2, /* scalar_to_vec_cost. */
806 2, /* vec_align_load_cost. */
807 3, /* vec_unalign_load_cost. */
808 3, /* vec_store_cost. */
809 3, /* cond_taken_branch_cost. */
810 2, /* cond_not_taken_branch_cost. */
811 };
812
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 static stringop_algs amdfam10_memcpy[2] = {
817 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
818 {-1, rep_prefix_4_byte, false}}},
819 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
820 {-1, libcall, false}}}};
821 static stringop_algs amdfam10_memset[2] = {
822 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
823 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
824 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
825 {-1, libcall, false}}}};
826 struct processor_costs amdfam10_cost = {
827 COSTS_N_INSNS (1), /* cost of an add instruction */
828 COSTS_N_INSNS (2), /* cost of a lea instruction */
829 COSTS_N_INSNS (1), /* variable shift costs */
830 COSTS_N_INSNS (1), /* constant shift costs */
831 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
832 COSTS_N_INSNS (4), /* HI */
833 COSTS_N_INSNS (3), /* SI */
834 COSTS_N_INSNS (4), /* DI */
835 COSTS_N_INSNS (5)}, /* other */
836 0, /* cost of multiply per each bit set */
837 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
838 COSTS_N_INSNS (35), /* HI */
839 COSTS_N_INSNS (51), /* SI */
840 COSTS_N_INSNS (83), /* DI */
841 COSTS_N_INSNS (83)}, /* other */
842 COSTS_N_INSNS (1), /* cost of movsx */
843 COSTS_N_INSNS (1), /* cost of movzx */
844 8, /* "large" insn */
845 9, /* MOVE_RATIO */
846 4, /* cost for loading QImode using movzbl */
847 {3, 4, 3}, /* cost of loading integer registers
848 in QImode, HImode and SImode.
849 Relative to reg-reg move (2). */
850 {3, 4, 3}, /* cost of storing integer registers */
851 4, /* cost of reg,reg fld/fst */
852 {4, 4, 12}, /* cost of loading fp registers
853 in SFmode, DFmode and XFmode */
854 {6, 6, 8}, /* cost of storing fp registers
855 in SFmode, DFmode and XFmode */
856 2, /* cost of moving MMX register */
857 {3, 3}, /* cost of loading MMX registers
858 in SImode and DImode */
859 {4, 4}, /* cost of storing MMX registers
860 in SImode and DImode */
861 2, /* cost of moving SSE register */
862 {4, 4, 3}, /* cost of loading SSE registers
863 in SImode, DImode and TImode */
864 {4, 4, 5}, /* cost of storing SSE registers
865 in SImode, DImode and TImode */
866 3, /* MMX or SSE register to integer */
867 /* On K8:
868 MOVD reg64, xmmreg Double FSTORE 4
869 MOVD reg32, xmmreg Double FSTORE 4
870 On AMDFAM10:
871 MOVD reg64, xmmreg Double FADD 3
872 1/1 1/1
873 MOVD reg32, xmmreg Double FADD 3
874 1/1 1/1 */
875 64, /* size of l1 cache. */
876 512, /* size of l2 cache. */
877 64, /* size of prefetch block */
878 /* New AMD processors never drop prefetches; if they cannot be performed
879 immediately, they are queued. We set number of simultaneous prefetches
880 to a large constant to reflect this (it probably is not a good idea not
881 to limit number of prefetches at all, as their execution also takes some
882 time). */
883 100, /* number of parallel prefetches */
884 2, /* Branch cost */
885 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
886 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
887 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
888 COSTS_N_INSNS (2), /* cost of FABS instruction. */
889 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
890 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
891
892 amdfam10_memcpy,
893 amdfam10_memset,
894 4, /* scalar_stmt_cost. */
895 2, /* scalar load_cost. */
896 2, /* scalar_store_cost. */
897 6, /* vec_stmt_cost. */
898 0, /* vec_to_scalar_cost. */
899 2, /* scalar_to_vec_cost. */
900 2, /* vec_align_load_cost. */
901 2, /* vec_unalign_load_cost. */
902 2, /* vec_store_cost. */
903 2, /* cond_taken_branch_cost. */
904 1, /* cond_not_taken_branch_cost. */
905 };
906
907 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
908 very small blocks it is better to use loop. For large blocks, libcall
909 can do nontemporary accesses and beat inline considerably. */
910 static stringop_algs bdver1_memcpy[2] = {
911 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
912 {-1, rep_prefix_4_byte, false}}},
913 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
914 {-1, libcall, false}}}};
915 static stringop_algs bdver1_memset[2] = {
916 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
917 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
918 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
919 {-1, libcall, false}}}};
920
921 const struct processor_costs bdver1_cost = {
922 COSTS_N_INSNS (1), /* cost of an add instruction */
923 COSTS_N_INSNS (1), /* cost of a lea instruction */
924 COSTS_N_INSNS (1), /* variable shift costs */
925 COSTS_N_INSNS (1), /* constant shift costs */
926 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
927 COSTS_N_INSNS (4), /* HI */
928 COSTS_N_INSNS (4), /* SI */
929 COSTS_N_INSNS (6), /* DI */
930 COSTS_N_INSNS (6)}, /* other */
931 0, /* cost of multiply per each bit set */
932 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
933 COSTS_N_INSNS (35), /* HI */
934 COSTS_N_INSNS (51), /* SI */
935 COSTS_N_INSNS (83), /* DI */
936 COSTS_N_INSNS (83)}, /* other */
937 COSTS_N_INSNS (1), /* cost of movsx */
938 COSTS_N_INSNS (1), /* cost of movzx */
939 8, /* "large" insn */
940 9, /* MOVE_RATIO */
941 4, /* cost for loading QImode using movzbl */
942 {5, 5, 4}, /* cost of loading integer registers
943 in QImode, HImode and SImode.
944 Relative to reg-reg move (2). */
945 {4, 4, 4}, /* cost of storing integer registers */
946 2, /* cost of reg,reg fld/fst */
947 {5, 5, 12}, /* cost of loading fp registers
948 in SFmode, DFmode and XFmode */
949 {4, 4, 8}, /* cost of storing fp registers
950 in SFmode, DFmode and XFmode */
951 2, /* cost of moving MMX register */
952 {4, 4}, /* cost of loading MMX registers
953 in SImode and DImode */
954 {4, 4}, /* cost of storing MMX registers
955 in SImode and DImode */
956 2, /* cost of moving SSE register */
957 {4, 4, 4}, /* cost of loading SSE registers
958 in SImode, DImode and TImode */
959 {4, 4, 4}, /* cost of storing SSE registers
960 in SImode, DImode and TImode */
961 2, /* MMX or SSE register to integer */
962 /* On K8:
963 MOVD reg64, xmmreg Double FSTORE 4
964 MOVD reg32, xmmreg Double FSTORE 4
965 On AMDFAM10:
966 MOVD reg64, xmmreg Double FADD 3
967 1/1 1/1
968 MOVD reg32, xmmreg Double FADD 3
969 1/1 1/1 */
970 16, /* size of l1 cache. */
971 2048, /* size of l2 cache. */
972 64, /* size of prefetch block */
973 /* New AMD processors never drop prefetches; if they cannot be performed
974 immediately, they are queued. We set number of simultaneous prefetches
975 to a large constant to reflect this (it probably is not a good idea not
976 to limit number of prefetches at all, as their execution also takes some
977 time). */
978 100, /* number of parallel prefetches */
979 2, /* Branch cost */
980 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
981 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
982 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
983 COSTS_N_INSNS (2), /* cost of FABS instruction. */
984 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
985 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
986
987 bdver1_memcpy,
988 bdver1_memset,
989 6, /* scalar_stmt_cost. */
990 4, /* scalar load_cost. */
991 4, /* scalar_store_cost. */
992 6, /* vec_stmt_cost. */
993 0, /* vec_to_scalar_cost. */
994 2, /* scalar_to_vec_cost. */
995 4, /* vec_align_load_cost. */
996 4, /* vec_unalign_load_cost. */
997 4, /* vec_store_cost. */
998 2, /* cond_taken_branch_cost. */
999 1, /* cond_not_taken_branch_cost. */
1000 };
1001
1002 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1003 very small blocks it is better to use loop. For large blocks, libcall
1004 can do nontemporary accesses and beat inline considerably. */
1005
1006 static stringop_algs bdver2_memcpy[2] = {
1007 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1008 {-1, rep_prefix_4_byte, false}}},
1009 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1010 {-1, libcall, false}}}};
1011 static stringop_algs bdver2_memset[2] = {
1012 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1013 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1014 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1015 {-1, libcall, false}}}};
1016
1017 const struct processor_costs bdver2_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1036 9, /* MOVE_RATIO */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 /* On K8:
1059 MOVD reg64, xmmreg Double FSTORE 4
1060 MOVD reg32, xmmreg Double FSTORE 4
1061 On AMDFAM10:
1062 MOVD reg64, xmmreg Double FADD 3
1063 1/1 1/1
1064 MOVD reg32, xmmreg Double FADD 3
1065 1/1 1/1 */
1066 16, /* size of l1 cache. */
1067 2048, /* size of l2 cache. */
1068 64, /* size of prefetch block */
1069 /* New AMD processors never drop prefetches; if they cannot be performed
1070 immediately, they are queued. We set number of simultaneous prefetches
1071 to a large constant to reflect this (it probably is not a good idea not
1072 to limit number of prefetches at all, as their execution also takes some
1073 time). */
1074 100, /* number of parallel prefetches */
1075 2, /* Branch cost */
1076 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1077 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1078 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1079 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1080 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1081 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1082
1083 bdver2_memcpy,
1084 bdver2_memset,
1085 6, /* scalar_stmt_cost. */
1086 4, /* scalar load_cost. */
1087 4, /* scalar_store_cost. */
1088 6, /* vec_stmt_cost. */
1089 0, /* vec_to_scalar_cost. */
1090 2, /* scalar_to_vec_cost. */
1091 4, /* vec_align_load_cost. */
1092 4, /* vec_unalign_load_cost. */
1093 4, /* vec_store_cost. */
1094 2, /* cond_taken_branch_cost. */
1095 1, /* cond_not_taken_branch_cost. */
1096 };
1097
1098
1099 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1100 very small blocks it is better to use loop. For large blocks, libcall
1101 can do nontemporary accesses and beat inline considerably. */
1102 static stringop_algs bdver3_memcpy[2] = {
1103 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1104 {-1, rep_prefix_4_byte, false}}},
1105 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1106 {-1, libcall, false}}}};
1107 static stringop_algs bdver3_memset[2] = {
1108 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1109 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1110 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1111 {-1, libcall, false}}}};
1112 struct processor_costs bdver3_cost = {
1113 COSTS_N_INSNS (1), /* cost of an add instruction */
1114 COSTS_N_INSNS (1), /* cost of a lea instruction */
1115 COSTS_N_INSNS (1), /* variable shift costs */
1116 COSTS_N_INSNS (1), /* constant shift costs */
1117 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1118 COSTS_N_INSNS (4), /* HI */
1119 COSTS_N_INSNS (4), /* SI */
1120 COSTS_N_INSNS (6), /* DI */
1121 COSTS_N_INSNS (6)}, /* other */
1122 0, /* cost of multiply per each bit set */
1123 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1124 COSTS_N_INSNS (35), /* HI */
1125 COSTS_N_INSNS (51), /* SI */
1126 COSTS_N_INSNS (83), /* DI */
1127 COSTS_N_INSNS (83)}, /* other */
1128 COSTS_N_INSNS (1), /* cost of movsx */
1129 COSTS_N_INSNS (1), /* cost of movzx */
1130 8, /* "large" insn */
1131 9, /* MOVE_RATIO */
1132 4, /* cost for loading QImode using movzbl */
1133 {5, 5, 4}, /* cost of loading integer registers
1134 in QImode, HImode and SImode.
1135 Relative to reg-reg move (2). */
1136 {4, 4, 4}, /* cost of storing integer registers */
1137 2, /* cost of reg,reg fld/fst */
1138 {5, 5, 12}, /* cost of loading fp registers
1139 in SFmode, DFmode and XFmode */
1140 {4, 4, 8}, /* cost of storing fp registers
1141 in SFmode, DFmode and XFmode */
1142 2, /* cost of moving MMX register */
1143 {4, 4}, /* cost of loading MMX registers
1144 in SImode and DImode */
1145 {4, 4}, /* cost of storing MMX registers
1146 in SImode and DImode */
1147 2, /* cost of moving SSE register */
1148 {4, 4, 4}, /* cost of loading SSE registers
1149 in SImode, DImode and TImode */
1150 {4, 4, 4}, /* cost of storing SSE registers
1151 in SImode, DImode and TImode */
1152 2, /* MMX or SSE register to integer */
1153 16, /* size of l1 cache. */
1154 2048, /* size of l2 cache. */
1155 64, /* size of prefetch block */
1156 /* New AMD processors never drop prefetches; if they cannot be performed
1157 immediately, they are queued. We set number of simultaneous prefetches
1158 to a large constant to reflect this (it probably is not a good idea not
1159 to limit number of prefetches at all, as their execution also takes some
1160 time). */
1161 100, /* number of parallel prefetches */
1162 2, /* Branch cost */
1163 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1164 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1165 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1166 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1167 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1168 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1169
1170 bdver3_memcpy,
1171 bdver3_memset,
1172 6, /* scalar_stmt_cost. */
1173 4, /* scalar load_cost. */
1174 4, /* scalar_store_cost. */
1175 6, /* vec_stmt_cost. */
1176 0, /* vec_to_scalar_cost. */
1177 2, /* scalar_to_vec_cost. */
1178 4, /* vec_align_load_cost. */
1179 4, /* vec_unalign_load_cost. */
1180 4, /* vec_store_cost. */
1181 2, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1183 };
1184
1185 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1186 very small blocks it is better to use loop. For large blocks, libcall
1187 can do nontemporary accesses and beat inline considerably. */
1188 static stringop_algs bdver4_memcpy[2] = {
1189 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1190 {-1, rep_prefix_4_byte, false}}},
1191 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1192 {-1, libcall, false}}}};
1193 static stringop_algs bdver4_memset[2] = {
1194 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1195 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1196 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1197 {-1, libcall, false}}}};
1198 struct processor_costs bdver4_cost = {
1199 COSTS_N_INSNS (1), /* cost of an add instruction */
1200 COSTS_N_INSNS (1), /* cost of a lea instruction */
1201 COSTS_N_INSNS (1), /* variable shift costs */
1202 COSTS_N_INSNS (1), /* constant shift costs */
1203 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1204 COSTS_N_INSNS (4), /* HI */
1205 COSTS_N_INSNS (4), /* SI */
1206 COSTS_N_INSNS (6), /* DI */
1207 COSTS_N_INSNS (6)}, /* other */
1208 0, /* cost of multiply per each bit set */
1209 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1210 COSTS_N_INSNS (35), /* HI */
1211 COSTS_N_INSNS (51), /* SI */
1212 COSTS_N_INSNS (83), /* DI */
1213 COSTS_N_INSNS (83)}, /* other */
1214 COSTS_N_INSNS (1), /* cost of movsx */
1215 COSTS_N_INSNS (1), /* cost of movzx */
1216 8, /* "large" insn */
1217 9, /* MOVE_RATIO */
1218 4, /* cost for loading QImode using movzbl */
1219 {5, 5, 4}, /* cost of loading integer registers
1220 in QImode, HImode and SImode.
1221 Relative to reg-reg move (2). */
1222 {4, 4, 4}, /* cost of storing integer registers */
1223 2, /* cost of reg,reg fld/fst */
1224 {5, 5, 12}, /* cost of loading fp registers
1225 in SFmode, DFmode and XFmode */
1226 {4, 4, 8}, /* cost of storing fp registers
1227 in SFmode, DFmode and XFmode */
1228 2, /* cost of moving MMX register */
1229 {4, 4}, /* cost of loading MMX registers
1230 in SImode and DImode */
1231 {4, 4}, /* cost of storing MMX registers
1232 in SImode and DImode */
1233 2, /* cost of moving SSE register */
1234 {4, 4, 4}, /* cost of loading SSE registers
1235 in SImode, DImode and TImode */
1236 {4, 4, 4}, /* cost of storing SSE registers
1237 in SImode, DImode and TImode */
1238 2, /* MMX or SSE register to integer */
1239 16, /* size of l1 cache. */
1240 2048, /* size of l2 cache. */
1241 64, /* size of prefetch block */
1242 /* New AMD processors never drop prefetches; if they cannot be performed
1243 immediately, they are queued. We set number of simultaneous prefetches
1244 to a large constant to reflect this (it probably is not a good idea not
1245 to limit number of prefetches at all, as their execution also takes some
1246 time). */
1247 100, /* number of parallel prefetches */
1248 2, /* Branch cost */
1249 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1250 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1251 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1252 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1253 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1254 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1255
1256 bdver4_memcpy,
1257 bdver4_memset,
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1269 };
1270
1271 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1272 very small blocks it is better to use loop. For large blocks, libcall can
1273 do nontemporary accesses and beat inline considerably. */
1274 static stringop_algs btver1_memcpy[2] = {
1275 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1276 {-1, rep_prefix_4_byte, false}}},
1277 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1278 {-1, libcall, false}}}};
1279 static stringop_algs btver1_memset[2] = {
1280 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1281 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1282 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1283 {-1, libcall, false}}}};
1284 const struct processor_costs btver1_cost = {
1285 COSTS_N_INSNS (1), /* cost of an add instruction */
1286 COSTS_N_INSNS (2), /* cost of a lea instruction */
1287 COSTS_N_INSNS (1), /* variable shift costs */
1288 COSTS_N_INSNS (1), /* constant shift costs */
1289 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1290 COSTS_N_INSNS (4), /* HI */
1291 COSTS_N_INSNS (3), /* SI */
1292 COSTS_N_INSNS (4), /* DI */
1293 COSTS_N_INSNS (5)}, /* other */
1294 0, /* cost of multiply per each bit set */
1295 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1296 COSTS_N_INSNS (35), /* HI */
1297 COSTS_N_INSNS (51), /* SI */
1298 COSTS_N_INSNS (83), /* DI */
1299 COSTS_N_INSNS (83)}, /* other */
1300 COSTS_N_INSNS (1), /* cost of movsx */
1301 COSTS_N_INSNS (1), /* cost of movzx */
1302 8, /* "large" insn */
1303 9, /* MOVE_RATIO */
1304 4, /* cost for loading QImode using movzbl */
1305 {3, 4, 3}, /* cost of loading integer registers
1306 in QImode, HImode and SImode.
1307 Relative to reg-reg move (2). */
1308 {3, 4, 3}, /* cost of storing integer registers */
1309 4, /* cost of reg,reg fld/fst */
1310 {4, 4, 12}, /* cost of loading fp registers
1311 in SFmode, DFmode and XFmode */
1312 {6, 6, 8}, /* cost of storing fp registers
1313 in SFmode, DFmode and XFmode */
1314 2, /* cost of moving MMX register */
1315 {3, 3}, /* cost of loading MMX registers
1316 in SImode and DImode */
1317 {4, 4}, /* cost of storing MMX registers
1318 in SImode and DImode */
1319 2, /* cost of moving SSE register */
1320 {4, 4, 3}, /* cost of loading SSE registers
1321 in SImode, DImode and TImode */
1322 {4, 4, 5}, /* cost of storing SSE registers
1323 in SImode, DImode and TImode */
1324 3, /* MMX or SSE register to integer */
1325 /* On K8:
1326 MOVD reg64, xmmreg Double FSTORE 4
1327 MOVD reg32, xmmreg Double FSTORE 4
1328 On AMDFAM10:
1329 MOVD reg64, xmmreg Double FADD 3
1330 1/1 1/1
1331 MOVD reg32, xmmreg Double FADD 3
1332 1/1 1/1 */
1333 32, /* size of l1 cache. */
1334 512, /* size of l2 cache. */
1335 64, /* size of prefetch block */
1336 100, /* number of parallel prefetches */
1337 2, /* Branch cost */
1338 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1339 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1340 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1341 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1342 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1343 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1344
1345 btver1_memcpy,
1346 btver1_memset,
1347 4, /* scalar_stmt_cost. */
1348 2, /* scalar load_cost. */
1349 2, /* scalar_store_cost. */
1350 6, /* vec_stmt_cost. */
1351 0, /* vec_to_scalar_cost. */
1352 2, /* scalar_to_vec_cost. */
1353 2, /* vec_align_load_cost. */
1354 2, /* vec_unalign_load_cost. */
1355 2, /* vec_store_cost. */
1356 2, /* cond_taken_branch_cost. */
1357 1, /* cond_not_taken_branch_cost. */
1358 };
1359
1360 static stringop_algs btver2_memcpy[2] = {
1361 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1362 {-1, rep_prefix_4_byte, false}}},
1363 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1364 {-1, libcall, false}}}};
1365 static stringop_algs btver2_memset[2] = {
1366 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1367 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1368 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1369 {-1, libcall, false}}}};
1370 const struct processor_costs btver2_cost = {
1371 COSTS_N_INSNS (1), /* cost of an add instruction */
1372 COSTS_N_INSNS (2), /* cost of a lea instruction */
1373 COSTS_N_INSNS (1), /* variable shift costs */
1374 COSTS_N_INSNS (1), /* constant shift costs */
1375 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1376 COSTS_N_INSNS (4), /* HI */
1377 COSTS_N_INSNS (3), /* SI */
1378 COSTS_N_INSNS (4), /* DI */
1379 COSTS_N_INSNS (5)}, /* other */
1380 0, /* cost of multiply per each bit set */
1381 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1382 COSTS_N_INSNS (35), /* HI */
1383 COSTS_N_INSNS (51), /* SI */
1384 COSTS_N_INSNS (83), /* DI */
1385 COSTS_N_INSNS (83)}, /* other */
1386 COSTS_N_INSNS (1), /* cost of movsx */
1387 COSTS_N_INSNS (1), /* cost of movzx */
1388 8, /* "large" insn */
1389 9, /* MOVE_RATIO */
1390 4, /* cost for loading QImode using movzbl */
1391 {3, 4, 3}, /* cost of loading integer registers
1392 in QImode, HImode and SImode.
1393 Relative to reg-reg move (2). */
1394 {3, 4, 3}, /* cost of storing integer registers */
1395 4, /* cost of reg,reg fld/fst */
1396 {4, 4, 12}, /* cost of loading fp registers
1397 in SFmode, DFmode and XFmode */
1398 {6, 6, 8}, /* cost of storing fp registers
1399 in SFmode, DFmode and XFmode */
1400 2, /* cost of moving MMX register */
1401 {3, 3}, /* cost of loading MMX registers
1402 in SImode and DImode */
1403 {4, 4}, /* cost of storing MMX registers
1404 in SImode and DImode */
1405 2, /* cost of moving SSE register */
1406 {4, 4, 3}, /* cost of loading SSE registers
1407 in SImode, DImode and TImode */
1408 {4, 4, 5}, /* cost of storing SSE registers
1409 in SImode, DImode and TImode */
1410 3, /* MMX or SSE register to integer */
1411 /* On K8:
1412 MOVD reg64, xmmreg Double FSTORE 4
1413 MOVD reg32, xmmreg Double FSTORE 4
1414 On AMDFAM10:
1415 MOVD reg64, xmmreg Double FADD 3
1416 1/1 1/1
1417 MOVD reg32, xmmreg Double FADD 3
1418 1/1 1/1 */
1419 32, /* size of l1 cache. */
1420 2048, /* size of l2 cache. */
1421 64, /* size of prefetch block */
1422 100, /* number of parallel prefetches */
1423 2, /* Branch cost */
1424 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1425 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1426 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1427 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1428 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1429 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1430 btver2_memcpy,
1431 btver2_memset,
1432 4, /* scalar_stmt_cost. */
1433 2, /* scalar load_cost. */
1434 2, /* scalar_store_cost. */
1435 6, /* vec_stmt_cost. */
1436 0, /* vec_to_scalar_cost. */
1437 2, /* scalar_to_vec_cost. */
1438 2, /* vec_align_load_cost. */
1439 2, /* vec_unalign_load_cost. */
1440 2, /* vec_store_cost. */
1441 2, /* cond_taken_branch_cost. */
1442 1, /* cond_not_taken_branch_cost. */
1443 };
1444
1445 static stringop_algs pentium4_memcpy[2] = {
1446 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448 static stringop_algs pentium4_memset[2] = {
1449 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1450 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1451 DUMMY_STRINGOP_ALGS};
1452
1453 static const
1454 struct processor_costs pentium4_cost = {
1455 COSTS_N_INSNS (1), /* cost of an add instruction */
1456 COSTS_N_INSNS (3), /* cost of a lea instruction */
1457 COSTS_N_INSNS (4), /* variable shift costs */
1458 COSTS_N_INSNS (4), /* constant shift costs */
1459 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1460 COSTS_N_INSNS (15), /* HI */
1461 COSTS_N_INSNS (15), /* SI */
1462 COSTS_N_INSNS (15), /* DI */
1463 COSTS_N_INSNS (15)}, /* other */
1464 0, /* cost of multiply per each bit set */
1465 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1466 COSTS_N_INSNS (56), /* HI */
1467 COSTS_N_INSNS (56), /* SI */
1468 COSTS_N_INSNS (56), /* DI */
1469 COSTS_N_INSNS (56)}, /* other */
1470 COSTS_N_INSNS (1), /* cost of movsx */
1471 COSTS_N_INSNS (1), /* cost of movzx */
1472 16, /* "large" insn */
1473 6, /* MOVE_RATIO */
1474 2, /* cost for loading QImode using movzbl */
1475 {4, 5, 4}, /* cost of loading integer registers
1476 in QImode, HImode and SImode.
1477 Relative to reg-reg move (2). */
1478 {2, 3, 2}, /* cost of storing integer registers */
1479 2, /* cost of reg,reg fld/fst */
1480 {2, 2, 6}, /* cost of loading fp registers
1481 in SFmode, DFmode and XFmode */
1482 {4, 4, 6}, /* cost of storing fp registers
1483 in SFmode, DFmode and XFmode */
1484 2, /* cost of moving MMX register */
1485 {2, 2}, /* cost of loading MMX registers
1486 in SImode and DImode */
1487 {2, 2}, /* cost of storing MMX registers
1488 in SImode and DImode */
1489 12, /* cost of moving SSE register */
1490 {12, 12, 12}, /* cost of loading SSE registers
1491 in SImode, DImode and TImode */
1492 {2, 2, 8}, /* cost of storing SSE registers
1493 in SImode, DImode and TImode */
1494 10, /* MMX or SSE register to integer */
1495 8, /* size of l1 cache. */
1496 256, /* size of l2 cache. */
1497 64, /* size of prefetch block */
1498 6, /* number of parallel prefetches */
1499 2, /* Branch cost */
1500 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1501 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1502 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1503 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1504 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1505 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1506 pentium4_memcpy,
1507 pentium4_memset,
1508 1, /* scalar_stmt_cost. */
1509 1, /* scalar load_cost. */
1510 1, /* scalar_store_cost. */
1511 1, /* vec_stmt_cost. */
1512 1, /* vec_to_scalar_cost. */
1513 1, /* scalar_to_vec_cost. */
1514 1, /* vec_align_load_cost. */
1515 2, /* vec_unalign_load_cost. */
1516 1, /* vec_store_cost. */
1517 3, /* cond_taken_branch_cost. */
1518 1, /* cond_not_taken_branch_cost. */
1519 };
1520
1521 static stringop_algs nocona_memcpy[2] = {
1522 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1523 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1524 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1525
1526 static stringop_algs nocona_memset[2] = {
1527 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1528 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1529 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1530 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1531
1532 static const
1533 struct processor_costs nocona_cost = {
1534 COSTS_N_INSNS (1), /* cost of an add instruction */
1535 COSTS_N_INSNS (1), /* cost of a lea instruction */
1536 COSTS_N_INSNS (1), /* variable shift costs */
1537 COSTS_N_INSNS (1), /* constant shift costs */
1538 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1539 COSTS_N_INSNS (10), /* HI */
1540 COSTS_N_INSNS (10), /* SI */
1541 COSTS_N_INSNS (10), /* DI */
1542 COSTS_N_INSNS (10)}, /* other */
1543 0, /* cost of multiply per each bit set */
1544 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1545 COSTS_N_INSNS (66), /* HI */
1546 COSTS_N_INSNS (66), /* SI */
1547 COSTS_N_INSNS (66), /* DI */
1548 COSTS_N_INSNS (66)}, /* other */
1549 COSTS_N_INSNS (1), /* cost of movsx */
1550 COSTS_N_INSNS (1), /* cost of movzx */
1551 16, /* "large" insn */
1552 17, /* MOVE_RATIO */
1553 4, /* cost for loading QImode using movzbl */
1554 {4, 4, 4}, /* cost of loading integer registers
1555 in QImode, HImode and SImode.
1556 Relative to reg-reg move (2). */
1557 {4, 4, 4}, /* cost of storing integer registers */
1558 3, /* cost of reg,reg fld/fst */
1559 {12, 12, 12}, /* cost of loading fp registers
1560 in SFmode, DFmode and XFmode */
1561 {4, 4, 4}, /* cost of storing fp registers
1562 in SFmode, DFmode and XFmode */
1563 6, /* cost of moving MMX register */
1564 {12, 12}, /* cost of loading MMX registers
1565 in SImode and DImode */
1566 {12, 12}, /* cost of storing MMX registers
1567 in SImode and DImode */
1568 6, /* cost of moving SSE register */
1569 {12, 12, 12}, /* cost of loading SSE registers
1570 in SImode, DImode and TImode */
1571 {12, 12, 12}, /* cost of storing SSE registers
1572 in SImode, DImode and TImode */
1573 8, /* MMX or SSE register to integer */
1574 8, /* size of l1 cache. */
1575 1024, /* size of l2 cache. */
1576 64, /* size of prefetch block */
1577 8, /* number of parallel prefetches */
1578 1, /* Branch cost */
1579 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1580 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1581 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1582 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1583 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1584 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1585 nocona_memcpy,
1586 nocona_memset,
1587 1, /* scalar_stmt_cost. */
1588 1, /* scalar load_cost. */
1589 1, /* scalar_store_cost. */
1590 1, /* vec_stmt_cost. */
1591 1, /* vec_to_scalar_cost. */
1592 1, /* scalar_to_vec_cost. */
1593 1, /* vec_align_load_cost. */
1594 2, /* vec_unalign_load_cost. */
1595 1, /* vec_store_cost. */
1596 3, /* cond_taken_branch_cost. */
1597 1, /* cond_not_taken_branch_cost. */
1598 };
1599
1600 static stringop_algs atom_memcpy[2] = {
1601 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1602 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1604 static stringop_algs atom_memset[2] = {
1605 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1606 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1607 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1608 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1609 static const
1610 struct processor_costs atom_cost = {
1611 COSTS_N_INSNS (1), /* cost of an add instruction */
1612 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1613 COSTS_N_INSNS (1), /* variable shift costs */
1614 COSTS_N_INSNS (1), /* constant shift costs */
1615 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1616 COSTS_N_INSNS (4), /* HI */
1617 COSTS_N_INSNS (3), /* SI */
1618 COSTS_N_INSNS (4), /* DI */
1619 COSTS_N_INSNS (2)}, /* other */
1620 0, /* cost of multiply per each bit set */
1621 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1622 COSTS_N_INSNS (26), /* HI */
1623 COSTS_N_INSNS (42), /* SI */
1624 COSTS_N_INSNS (74), /* DI */
1625 COSTS_N_INSNS (74)}, /* other */
1626 COSTS_N_INSNS (1), /* cost of movsx */
1627 COSTS_N_INSNS (1), /* cost of movzx */
1628 8, /* "large" insn */
1629 17, /* MOVE_RATIO */
1630 4, /* cost for loading QImode using movzbl */
1631 {4, 4, 4}, /* cost of loading integer registers
1632 in QImode, HImode and SImode.
1633 Relative to reg-reg move (2). */
1634 {4, 4, 4}, /* cost of storing integer registers */
1635 4, /* cost of reg,reg fld/fst */
1636 {12, 12, 12}, /* cost of loading fp registers
1637 in SFmode, DFmode and XFmode */
1638 {6, 6, 8}, /* cost of storing fp registers
1639 in SFmode, DFmode and XFmode */
1640 2, /* cost of moving MMX register */
1641 {8, 8}, /* cost of loading MMX registers
1642 in SImode and DImode */
1643 {8, 8}, /* cost of storing MMX registers
1644 in SImode and DImode */
1645 2, /* cost of moving SSE register */
1646 {8, 8, 8}, /* cost of loading SSE registers
1647 in SImode, DImode and TImode */
1648 {8, 8, 8}, /* cost of storing SSE registers
1649 in SImode, DImode and TImode */
1650 5, /* MMX or SSE register to integer */
1651 32, /* size of l1 cache. */
1652 256, /* size of l2 cache. */
1653 64, /* size of prefetch block */
1654 6, /* number of parallel prefetches */
1655 3, /* Branch cost */
1656 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1657 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1658 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1659 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1660 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1661 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1662 atom_memcpy,
1663 atom_memset,
1664 1, /* scalar_stmt_cost. */
1665 1, /* scalar load_cost. */
1666 1, /* scalar_store_cost. */
1667 1, /* vec_stmt_cost. */
1668 1, /* vec_to_scalar_cost. */
1669 1, /* scalar_to_vec_cost. */
1670 1, /* vec_align_load_cost. */
1671 2, /* vec_unalign_load_cost. */
1672 1, /* vec_store_cost. */
1673 3, /* cond_taken_branch_cost. */
1674 1, /* cond_not_taken_branch_cost. */
1675 };
1676
1677 static stringop_algs slm_memcpy[2] = {
1678 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1679 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1680 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1681 static stringop_algs slm_memset[2] = {
1682 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1683 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1684 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1685 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 static const
1687 struct processor_costs slm_cost = {
1688 COSTS_N_INSNS (1), /* cost of an add instruction */
1689 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1690 COSTS_N_INSNS (1), /* variable shift costs */
1691 COSTS_N_INSNS (1), /* constant shift costs */
1692 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1693 COSTS_N_INSNS (3), /* HI */
1694 COSTS_N_INSNS (3), /* SI */
1695 COSTS_N_INSNS (4), /* DI */
1696 COSTS_N_INSNS (2)}, /* other */
1697 0, /* cost of multiply per each bit set */
1698 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1699 COSTS_N_INSNS (26), /* HI */
1700 COSTS_N_INSNS (42), /* SI */
1701 COSTS_N_INSNS (74), /* DI */
1702 COSTS_N_INSNS (74)}, /* other */
1703 COSTS_N_INSNS (1), /* cost of movsx */
1704 COSTS_N_INSNS (1), /* cost of movzx */
1705 8, /* "large" insn */
1706 17, /* MOVE_RATIO */
1707 4, /* cost for loading QImode using movzbl */
1708 {4, 4, 4}, /* cost of loading integer registers
1709 in QImode, HImode and SImode.
1710 Relative to reg-reg move (2). */
1711 {4, 4, 4}, /* cost of storing integer registers */
1712 4, /* cost of reg,reg fld/fst */
1713 {12, 12, 12}, /* cost of loading fp registers
1714 in SFmode, DFmode and XFmode */
1715 {6, 6, 8}, /* cost of storing fp registers
1716 in SFmode, DFmode and XFmode */
1717 2, /* cost of moving MMX register */
1718 {8, 8}, /* cost of loading MMX registers
1719 in SImode and DImode */
1720 {8, 8}, /* cost of storing MMX registers
1721 in SImode and DImode */
1722 2, /* cost of moving SSE register */
1723 {8, 8, 8}, /* cost of loading SSE registers
1724 in SImode, DImode and TImode */
1725 {8, 8, 8}, /* cost of storing SSE registers
1726 in SImode, DImode and TImode */
1727 5, /* MMX or SSE register to integer */
1728 32, /* size of l1 cache. */
1729 256, /* size of l2 cache. */
1730 64, /* size of prefetch block */
1731 6, /* number of parallel prefetches */
1732 3, /* Branch cost */
1733 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1734 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1735 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1736 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1737 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1738 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1739 slm_memcpy,
1740 slm_memset,
1741 1, /* scalar_stmt_cost. */
1742 1, /* scalar load_cost. */
1743 1, /* scalar_store_cost. */
1744 1, /* vec_stmt_cost. */
1745 4, /* vec_to_scalar_cost. */
1746 1, /* scalar_to_vec_cost. */
1747 1, /* vec_align_load_cost. */
1748 2, /* vec_unalign_load_cost. */
1749 1, /* vec_store_cost. */
1750 3, /* cond_taken_branch_cost. */
1751 1, /* cond_not_taken_branch_cost. */
1752 };
1753
1754 static stringop_algs intel_memcpy[2] = {
1755 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1756 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1757 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1758 static stringop_algs intel_memset[2] = {
1759 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1760 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1761 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1762 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 static const
1764 struct processor_costs intel_cost = {
1765 COSTS_N_INSNS (1), /* cost of an add instruction */
1766 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1767 COSTS_N_INSNS (1), /* variable shift costs */
1768 COSTS_N_INSNS (1), /* constant shift costs */
1769 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1770 COSTS_N_INSNS (3), /* HI */
1771 COSTS_N_INSNS (3), /* SI */
1772 COSTS_N_INSNS (4), /* DI */
1773 COSTS_N_INSNS (2)}, /* other */
1774 0, /* cost of multiply per each bit set */
1775 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1776 COSTS_N_INSNS (26), /* HI */
1777 COSTS_N_INSNS (42), /* SI */
1778 COSTS_N_INSNS (74), /* DI */
1779 COSTS_N_INSNS (74)}, /* other */
1780 COSTS_N_INSNS (1), /* cost of movsx */
1781 COSTS_N_INSNS (1), /* cost of movzx */
1782 8, /* "large" insn */
1783 17, /* MOVE_RATIO */
1784 4, /* cost for loading QImode using movzbl */
1785 {4, 4, 4}, /* cost of loading integer registers
1786 in QImode, HImode and SImode.
1787 Relative to reg-reg move (2). */
1788 {4, 4, 4}, /* cost of storing integer registers */
1789 4, /* cost of reg,reg fld/fst */
1790 {12, 12, 12}, /* cost of loading fp registers
1791 in SFmode, DFmode and XFmode */
1792 {6, 6, 8}, /* cost of storing fp registers
1793 in SFmode, DFmode and XFmode */
1794 2, /* cost of moving MMX register */
1795 {8, 8}, /* cost of loading MMX registers
1796 in SImode and DImode */
1797 {8, 8}, /* cost of storing MMX registers
1798 in SImode and DImode */
1799 2, /* cost of moving SSE register */
1800 {8, 8, 8}, /* cost of loading SSE registers
1801 in SImode, DImode and TImode */
1802 {8, 8, 8}, /* cost of storing SSE registers
1803 in SImode, DImode and TImode */
1804 5, /* MMX or SSE register to integer */
1805 32, /* size of l1 cache. */
1806 256, /* size of l2 cache. */
1807 64, /* size of prefetch block */
1808 6, /* number of parallel prefetches */
1809 3, /* Branch cost */
1810 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1811 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1812 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1813 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1814 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1815 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 intel_memcpy,
1817 intel_memset,
1818 1, /* scalar_stmt_cost. */
1819 1, /* scalar load_cost. */
1820 1, /* scalar_store_cost. */
1821 1, /* vec_stmt_cost. */
1822 4, /* vec_to_scalar_cost. */
1823 1, /* scalar_to_vec_cost. */
1824 1, /* vec_align_load_cost. */
1825 2, /* vec_unalign_load_cost. */
1826 1, /* vec_store_cost. */
1827 3, /* cond_taken_branch_cost. */
1828 1, /* cond_not_taken_branch_cost. */
1829 };
1830
1831 /* Generic should produce code tuned for Core-i7 (and newer chips)
1832 and btver1 (and newer chips). */
1833
1834 static stringop_algs generic_memcpy[2] = {
1835 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1836 {-1, libcall, false}}},
1837 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1838 {-1, libcall, false}}}};
1839 static stringop_algs generic_memset[2] = {
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1841 {-1, libcall, false}}},
1842 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1843 {-1, libcall, false}}}};
1844 static const
1845 struct processor_costs generic_cost = {
1846 COSTS_N_INSNS (1), /* cost of an add instruction */
1847 /* On all chips taken into consideration lea is 2 cycles and more. With
1848 this cost however our current implementation of synth_mult results in
1849 use of unnecessary temporary registers causing regression on several
1850 SPECfp benchmarks. */
1851 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1852 COSTS_N_INSNS (1), /* variable shift costs */
1853 COSTS_N_INSNS (1), /* constant shift costs */
1854 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1855 COSTS_N_INSNS (4), /* HI */
1856 COSTS_N_INSNS (3), /* SI */
1857 COSTS_N_INSNS (4), /* DI */
1858 COSTS_N_INSNS (2)}, /* other */
1859 0, /* cost of multiply per each bit set */
1860 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1861 COSTS_N_INSNS (26), /* HI */
1862 COSTS_N_INSNS (42), /* SI */
1863 COSTS_N_INSNS (74), /* DI */
1864 COSTS_N_INSNS (74)}, /* other */
1865 COSTS_N_INSNS (1), /* cost of movsx */
1866 COSTS_N_INSNS (1), /* cost of movzx */
1867 8, /* "large" insn */
1868 17, /* MOVE_RATIO */
1869 4, /* cost for loading QImode using movzbl */
1870 {4, 4, 4}, /* cost of loading integer registers
1871 in QImode, HImode and SImode.
1872 Relative to reg-reg move (2). */
1873 {4, 4, 4}, /* cost of storing integer registers */
1874 4, /* cost of reg,reg fld/fst */
1875 {12, 12, 12}, /* cost of loading fp registers
1876 in SFmode, DFmode and XFmode */
1877 {6, 6, 8}, /* cost of storing fp registers
1878 in SFmode, DFmode and XFmode */
1879 2, /* cost of moving MMX register */
1880 {8, 8}, /* cost of loading MMX registers
1881 in SImode and DImode */
1882 {8, 8}, /* cost of storing MMX registers
1883 in SImode and DImode */
1884 2, /* cost of moving SSE register */
1885 {8, 8, 8}, /* cost of loading SSE registers
1886 in SImode, DImode and TImode */
1887 {8, 8, 8}, /* cost of storing SSE registers
1888 in SImode, DImode and TImode */
1889 5, /* MMX or SSE register to integer */
1890 32, /* size of l1 cache. */
1891 512, /* size of l2 cache. */
1892 64, /* size of prefetch block */
1893 6, /* number of parallel prefetches */
1894 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1895 value is increased to perhaps more appropriate value of 5. */
1896 3, /* Branch cost */
1897 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1898 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1899 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1900 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1901 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1902 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1903 generic_memcpy,
1904 generic_memset,
1905 1, /* scalar_stmt_cost. */
1906 1, /* scalar load_cost. */
1907 1, /* scalar_store_cost. */
1908 1, /* vec_stmt_cost. */
1909 1, /* vec_to_scalar_cost. */
1910 1, /* scalar_to_vec_cost. */
1911 1, /* vec_align_load_cost. */
1912 2, /* vec_unalign_load_cost. */
1913 1, /* vec_store_cost. */
1914 3, /* cond_taken_branch_cost. */
1915 1, /* cond_not_taken_branch_cost. */
1916 };
1917
1918 /* core_cost should produce code tuned for Core familly of CPUs. */
1919 static stringop_algs core_memcpy[2] = {
1920 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1921 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1922 {-1, libcall, false}}}};
1923 static stringop_algs core_memset[2] = {
1924 {libcall, {{6, loop_1_byte, true},
1925 {24, loop, true},
1926 {8192, rep_prefix_4_byte, true},
1927 {-1, libcall, false}}},
1928 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1929 {-1, libcall, false}}}};
1930
1931 static const
1932 struct processor_costs core_cost = {
1933 COSTS_N_INSNS (1), /* cost of an add instruction */
1934 /* On all chips taken into consideration lea is 2 cycles and more. With
1935 this cost however our current implementation of synth_mult results in
1936 use of unnecessary temporary registers causing regression on several
1937 SPECfp benchmarks. */
1938 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1939 COSTS_N_INSNS (1), /* variable shift costs */
1940 COSTS_N_INSNS (1), /* constant shift costs */
1941 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1942 COSTS_N_INSNS (4), /* HI */
1943 COSTS_N_INSNS (3), /* SI */
1944 COSTS_N_INSNS (4), /* DI */
1945 COSTS_N_INSNS (2)}, /* other */
1946 0, /* cost of multiply per each bit set */
1947 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1948 COSTS_N_INSNS (26), /* HI */
1949 COSTS_N_INSNS (42), /* SI */
1950 COSTS_N_INSNS (74), /* DI */
1951 COSTS_N_INSNS (74)}, /* other */
1952 COSTS_N_INSNS (1), /* cost of movsx */
1953 COSTS_N_INSNS (1), /* cost of movzx */
1954 8, /* "large" insn */
1955 17, /* MOVE_RATIO */
1956 4, /* cost for loading QImode using movzbl */
1957 {4, 4, 4}, /* cost of loading integer registers
1958 in QImode, HImode and SImode.
1959 Relative to reg-reg move (2). */
1960 {4, 4, 4}, /* cost of storing integer registers */
1961 4, /* cost of reg,reg fld/fst */
1962 {12, 12, 12}, /* cost of loading fp registers
1963 in SFmode, DFmode and XFmode */
1964 {6, 6, 8}, /* cost of storing fp registers
1965 in SFmode, DFmode and XFmode */
1966 2, /* cost of moving MMX register */
1967 {8, 8}, /* cost of loading MMX registers
1968 in SImode and DImode */
1969 {8, 8}, /* cost of storing MMX registers
1970 in SImode and DImode */
1971 2, /* cost of moving SSE register */
1972 {8, 8, 8}, /* cost of loading SSE registers
1973 in SImode, DImode and TImode */
1974 {8, 8, 8}, /* cost of storing SSE registers
1975 in SImode, DImode and TImode */
1976 5, /* MMX or SSE register to integer */
1977 64, /* size of l1 cache. */
1978 512, /* size of l2 cache. */
1979 64, /* size of prefetch block */
1980 6, /* number of parallel prefetches */
1981 /* FIXME perhaps more appropriate value is 5. */
1982 3, /* Branch cost */
1983 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1984 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1985 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1986 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1987 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1988 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1989 core_memcpy,
1990 core_memset,
1991 1, /* scalar_stmt_cost. */
1992 1, /* scalar load_cost. */
1993 1, /* scalar_store_cost. */
1994 1, /* vec_stmt_cost. */
1995 1, /* vec_to_scalar_cost. */
1996 1, /* scalar_to_vec_cost. */
1997 1, /* vec_align_load_cost. */
1998 2, /* vec_unalign_load_cost. */
1999 1, /* vec_store_cost. */
2000 3, /* cond_taken_branch_cost. */
2001 1, /* cond_not_taken_branch_cost. */
2002 };
2003
2004
2005 /* Set by -mtune. */
2006 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2007
2008 /* Set by -mtune or -Os. */
2009 const struct processor_costs *ix86_cost = &pentium_cost;
2010
2011 /* Processor feature/optimization bitmasks. */
2012 #define m_386 (1<<PROCESSOR_I386)
2013 #define m_486 (1<<PROCESSOR_I486)
2014 #define m_PENT (1<<PROCESSOR_PENTIUM)
2015 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2016 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2017 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2018 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2019 #define m_CORE2 (1<<PROCESSOR_CORE2)
2020 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2021 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2022 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2023 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2024 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2025 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2026 #define m_INTEL (1<<PROCESSOR_INTEL)
2027
2028 #define m_GEODE (1<<PROCESSOR_GEODE)
2029 #define m_K6 (1<<PROCESSOR_K6)
2030 #define m_K6_GEODE (m_K6 | m_GEODE)
2031 #define m_K8 (1<<PROCESSOR_K8)
2032 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2033 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2034 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2035 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2036 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2037 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2038 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2039 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2040 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2041 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2042 #define m_BTVER (m_BTVER1 | m_BTVER2)
2043 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044
2045 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046
2047 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2048 #undef DEF_TUNE
2049 #define DEF_TUNE(tune, name, selector) name,
2050 #include "x86-tune.def"
2051 #undef DEF_TUNE
2052 };
2053
2054 /* Feature tests against the various tunings. */
2055 unsigned char ix86_tune_features[X86_TUNE_LAST];
2056
2057 /* Feature tests against the various tunings used to create ix86_tune_features
2058 based on the processor mask. */
2059 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2060 #undef DEF_TUNE
2061 #define DEF_TUNE(tune, name, selector) selector,
2062 #include "x86-tune.def"
2063 #undef DEF_TUNE
2064 };
2065
2066 /* Feature tests against the various architecture variations. */
2067 unsigned char ix86_arch_features[X86_ARCH_LAST];
2068
2069 /* Feature tests against the various architecture variations, used to create
2070 ix86_arch_features based on the processor mask. */
2071 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2072 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2073 ~(m_386 | m_486 | m_PENT | m_K6),
2074
2075 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2076 ~m_386,
2077
2078 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2079 ~(m_386 | m_486),
2080
2081 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2082 ~m_386,
2083
2084 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2085 ~m_386,
2086 };
2087
2088 /* In case the average insn count for single function invocation is
2089 lower than this constant, emit fast (but longer) prologue and
2090 epilogue code. */
2091 #define FAST_PROLOGUE_INSN_COUNT 20
2092
2093 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2094 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2095 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2096 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2097
2098 /* Array of the smallest class containing reg number REGNO, indexed by
2099 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100
2101 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2102 {
2103 /* ax, dx, cx, bx */
2104 AREG, DREG, CREG, BREG,
2105 /* si, di, bp, sp */
2106 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2107 /* FP registers */
2108 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2109 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2110 /* arg pointer */
2111 NON_Q_REGS,
2112 /* flags, fpsr, fpcr, frame */
2113 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2114 /* SSE registers */
2115 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2116 SSE_REGS, SSE_REGS,
2117 /* MMX registers */
2118 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2119 MMX_REGS, MMX_REGS,
2120 /* REX registers */
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2123 /* SSE REX registers */
2124 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2125 SSE_REGS, SSE_REGS,
2126 /* AVX-512 SSE registers */
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2131 /* Mask registers. */
2132 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2134 };
2135
2136 /* The "default" register map used in 32bit mode. */
2137
2138 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2139 {
2140 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2141 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2142 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2143 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2144 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2149 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2150 };
2151
2152 /* The "default" register map used in 64bit mode. */
2153
2154 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2155 {
2156 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2157 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2158 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2159 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2160 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2161 8,9,10,11,12,13,14,15, /* extended integer registers */
2162 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2163 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2164 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2165 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2166 };
2167
2168 /* Define the register numbers to be used in Dwarf debugging information.
2169 The SVR4 reference port C compiler uses the following register numbers
2170 in its Dwarf output code:
2171 0 for %eax (gcc regno = 0)
2172 1 for %ecx (gcc regno = 2)
2173 2 for %edx (gcc regno = 1)
2174 3 for %ebx (gcc regno = 3)
2175 4 for %esp (gcc regno = 7)
2176 5 for %ebp (gcc regno = 6)
2177 6 for %esi (gcc regno = 4)
2178 7 for %edi (gcc regno = 5)
2179 The following three DWARF register numbers are never generated by
2180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2181 believes these numbers have these meanings.
2182 8 for %eip (no gcc equivalent)
2183 9 for %eflags (gcc regno = 17)
2184 10 for %trapno (no gcc equivalent)
2185 It is not at all clear how we should number the FP stack registers
2186 for the x86 architecture. If the version of SDB on x86/svr4 were
2187 a bit less brain dead with respect to floating-point then we would
2188 have a precedent to follow with respect to DWARF register numbers
2189 for x86 FP registers, but the SDB on x86/svr4 is so completely
2190 broken with respect to FP registers that it is hardly worth thinking
2191 of it as something to strive for compatibility with.
2192 The version of x86/svr4 SDB I have at the moment does (partially)
2193 seem to believe that DWARF register number 11 is associated with
2194 the x86 register %st(0), but that's about all. Higher DWARF
2195 register numbers don't seem to be associated with anything in
2196 particular, and even for DWARF regno 11, SDB only seems to under-
2197 stand that it should say that a variable lives in %st(0) (when
2198 asked via an `=' command) if we said it was in DWARF regno 11,
2199 but SDB still prints garbage when asked for the value of the
2200 variable in question (via a `/' command).
2201 (Also note that the labels SDB prints for various FP stack regs
2202 when doing an `x' command are all wrong.)
2203 Note that these problems generally don't affect the native SVR4
2204 C compiler because it doesn't allow the use of -O with -g and
2205 because when it is *not* optimizing, it allocates a memory
2206 location for each floating-point variable, and the memory
2207 location is what gets described in the DWARF AT_location
2208 attribute for the variable in question.
2209 Regardless of the severe mental illness of the x86/svr4 SDB, we
2210 do something sensible here and we use the following DWARF
2211 register numbers. Note that these are all stack-top-relative
2212 numbers.
2213 11 for %st(0) (gcc regno = 8)
2214 12 for %st(1) (gcc regno = 9)
2215 13 for %st(2) (gcc regno = 10)
2216 14 for %st(3) (gcc regno = 11)
2217 15 for %st(4) (gcc regno = 12)
2218 16 for %st(5) (gcc regno = 13)
2219 17 for %st(6) (gcc regno = 14)
2220 18 for %st(7) (gcc regno = 15)
2221 */
2222 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2223 {
2224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2233 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2234 };
2235
2236 /* Define parameter passing and return registers. */
2237
2238 static int const x86_64_int_parameter_registers[6] =
2239 {
2240 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2241 };
2242
2243 static int const x86_64_ms_abi_int_parameter_registers[4] =
2244 {
2245 CX_REG, DX_REG, R8_REG, R9_REG
2246 };
2247
2248 static int const x86_64_int_return_registers[4] =
2249 {
2250 AX_REG, DX_REG, DI_REG, SI_REG
2251 };
2252
2253 /* Additional registers that are clobbered by SYSV calls. */
2254
2255 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2256 {
2257 SI_REG, DI_REG,
2258 XMM6_REG, XMM7_REG,
2259 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2260 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2261 };
2262
2263 /* Define the structure for the machine field in struct function. */
2264
2265 struct GTY(()) stack_local_entry {
2266 unsigned short mode;
2267 unsigned short n;
2268 rtx rtl;
2269 struct stack_local_entry *next;
2270 };
2271
2272 /* Structure describing stack frame layout.
2273 Stack grows downward:
2274
2275 [arguments]
2276 <- ARG_POINTER
2277 saved pc
2278
2279 saved static chain if ix86_static_chain_on_stack
2280
2281 saved frame pointer if frame_pointer_needed
2282 <- HARD_FRAME_POINTER
2283 [saved regs]
2284 <- regs_save_offset
2285 [padding0]
2286
2287 [saved SSE regs]
2288 <- sse_regs_save_offset
2289 [padding1] |
2290 | <- FRAME_POINTER
2291 [va_arg registers] |
2292 |
2293 [frame] |
2294 |
2295 [padding2] | = to_allocate
2296 <- STACK_POINTER
2297 */
2298 struct ix86_frame
2299 {
2300 int nsseregs;
2301 int nregs;
2302 int va_arg_size;
2303 int red_zone_size;
2304 int outgoing_arguments_size;
2305
2306 /* The offsets relative to ARG_POINTER. */
2307 HOST_WIDE_INT frame_pointer_offset;
2308 HOST_WIDE_INT hard_frame_pointer_offset;
2309 HOST_WIDE_INT stack_pointer_offset;
2310 HOST_WIDE_INT hfp_save_offset;
2311 HOST_WIDE_INT reg_save_offset;
2312 HOST_WIDE_INT sse_reg_save_offset;
2313
2314 /* When save_regs_using_mov is set, emit prologue using
2315 move instead of push instructions. */
2316 bool save_regs_using_mov;
2317 };
2318
2319 /* Which cpu are we scheduling for. */
2320 enum attr_cpu ix86_schedule;
2321
2322 /* Which cpu are we optimizing for. */
2323 enum processor_type ix86_tune;
2324
2325 /* Which instruction set architecture to use. */
2326 enum processor_type ix86_arch;
2327
2328 /* True if processor has SSE prefetch instruction. */
2329 unsigned char x86_prefetch_sse;
2330
2331 /* -mstackrealign option */
2332 static const char ix86_force_align_arg_pointer_string[]
2333 = "force_align_arg_pointer";
2334
2335 static rtx (*ix86_gen_leave) (void);
2336 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2339 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2340 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2343 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2346 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2347
2348 /* Preferred alignment for stack boundary in bits. */
2349 unsigned int ix86_preferred_stack_boundary;
2350
2351 /* Alignment for incoming stack boundary in bits specified at
2352 command line. */
2353 static unsigned int ix86_user_incoming_stack_boundary;
2354
2355 /* Default alignment for incoming stack boundary in bits. */
2356 static unsigned int ix86_default_incoming_stack_boundary;
2357
2358 /* Alignment for incoming stack boundary in bits. */
2359 unsigned int ix86_incoming_stack_boundary;
2360
2361 /* Calling abi specific va_list type nodes. */
2362 static GTY(()) tree sysv_va_list_type_node;
2363 static GTY(()) tree ms_va_list_type_node;
2364
2365 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2366 char internal_label_prefix[16];
2367 int internal_label_prefix_len;
2368
2369 /* Fence to use after loop using movnt. */
2370 tree x86_mfence;
2371
2372 /* Register class used for passing given 64bit part of the argument.
2373 These represent classes as documented by the PS ABI, with the exception
2374 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2375 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2376
2377 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2378 whenever possible (upper half does contain padding). */
2379 enum x86_64_reg_class
2380 {
2381 X86_64_NO_CLASS,
2382 X86_64_INTEGER_CLASS,
2383 X86_64_INTEGERSI_CLASS,
2384 X86_64_SSE_CLASS,
2385 X86_64_SSESF_CLASS,
2386 X86_64_SSEDF_CLASS,
2387 X86_64_SSEUP_CLASS,
2388 X86_64_X87_CLASS,
2389 X86_64_X87UP_CLASS,
2390 X86_64_COMPLEX_X87_CLASS,
2391 X86_64_MEMORY_CLASS
2392 };
2393
2394 #define MAX_CLASSES 8
2395
2396 /* Table of constants used by fldpi, fldln2, etc.... */
2397 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2398 static bool ext_80387_constants_init = 0;
2399
2400 \f
2401 static struct machine_function * ix86_init_machine_status (void);
2402 static rtx ix86_function_value (const_tree, const_tree, bool);
2403 static bool ix86_function_value_regno_p (const unsigned int);
2404 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2405 const_tree);
2406 static rtx ix86_static_chain (const_tree, bool);
2407 static int ix86_function_regparm (const_tree, const_tree);
2408 static void ix86_compute_frame_layout (struct ix86_frame *);
2409 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2410 rtx, rtx, int);
2411 static void ix86_add_new_builtins (HOST_WIDE_INT);
2412 static tree ix86_canonical_va_list_type (tree);
2413 static void predict_jump (int);
2414 static unsigned int split_stack_prologue_scratch_regno (void);
2415 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2416
2417 enum ix86_function_specific_strings
2418 {
2419 IX86_FUNCTION_SPECIFIC_ARCH,
2420 IX86_FUNCTION_SPECIFIC_TUNE,
2421 IX86_FUNCTION_SPECIFIC_MAX
2422 };
2423
2424 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2425 const char *, enum fpmath_unit, bool);
2426 static void ix86_function_specific_save (struct cl_target_option *,
2427 struct gcc_options *opts);
2428 static void ix86_function_specific_restore (struct gcc_options *opts,
2429 struct cl_target_option *);
2430 static void ix86_function_specific_print (FILE *, int,
2431 struct cl_target_option *);
2432 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2433 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2434 struct gcc_options *,
2435 struct gcc_options *,
2436 struct gcc_options *);
2437 static bool ix86_can_inline_p (tree, tree);
2438 static void ix86_set_current_function (tree);
2439 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2440
2441 static enum calling_abi ix86_function_abi (const_tree);
2442
2443 \f
2444 #ifndef SUBTARGET32_DEFAULT_CPU
2445 #define SUBTARGET32_DEFAULT_CPU "i386"
2446 #endif
2447
2448 /* Whether -mtune= or -march= were specified */
2449 static int ix86_tune_defaulted;
2450 static int ix86_arch_specified;
2451
2452 /* Vectorization library interface and handlers. */
2453 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2454
2455 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2456 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2457
2458 /* Processor target table, indexed by processor number */
2459 struct ptt
2460 {
2461 const char *const name; /* processor name */
2462 const struct processor_costs *cost; /* Processor costs */
2463 const int align_loop; /* Default alignments. */
2464 const int align_loop_max_skip;
2465 const int align_jump;
2466 const int align_jump_max_skip;
2467 const int align_func;
2468 };
2469
2470 /* This table must be in sync with enum processor_type in i386.h. */
2471 static const struct ptt processor_target_table[PROCESSOR_max] =
2472 {
2473 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2474 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2475 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2476 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2477 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2478 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2479 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2480 {"core2", &core_cost, 16, 10, 16, 10, 16},
2481 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2482 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2483 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2484 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2485 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2486 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2487 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2488 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2489 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2490 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2491 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2492 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2493 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2494 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2495 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2496 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2497 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2498 };
2499 \f
2500 static unsigned int
2501 rest_of_handle_insert_vzeroupper (void)
2502 {
2503 int i;
2504
2505 /* vzeroupper instructions are inserted immediately after reload to
2506 account for possible spills from 256bit registers. The pass
2507 reuses mode switching infrastructure by re-running mode insertion
2508 pass, so disable entities that have already been processed. */
2509 for (i = 0; i < MAX_386_ENTITIES; i++)
2510 ix86_optimize_mode_switching[i] = 0;
2511
2512 ix86_optimize_mode_switching[AVX_U128] = 1;
2513
2514 /* Call optimize_mode_switching. */
2515 g->get_passes ()->execute_pass_mode_switching ();
2516 return 0;
2517 }
2518
2519 namespace {
2520
2521 const pass_data pass_data_insert_vzeroupper =
2522 {
2523 RTL_PASS, /* type */
2524 "vzeroupper", /* name */
2525 OPTGROUP_NONE, /* optinfo_flags */
2526 true, /* has_execute */
2527 TV_NONE, /* tv_id */
2528 0, /* properties_required */
2529 0, /* properties_provided */
2530 0, /* properties_destroyed */
2531 0, /* todo_flags_start */
2532 TODO_df_finish, /* todo_flags_finish */
2533 };
2534
2535 class pass_insert_vzeroupper : public rtl_opt_pass
2536 {
2537 public:
2538 pass_insert_vzeroupper(gcc::context *ctxt)
2539 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2540 {}
2541
2542 /* opt_pass methods: */
2543 virtual bool gate (function *)
2544 {
2545 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2546 }
2547
2548 virtual unsigned int execute (function *)
2549 {
2550 return rest_of_handle_insert_vzeroupper ();
2551 }
2552
2553 }; // class pass_insert_vzeroupper
2554
2555 } // anon namespace
2556
2557 rtl_opt_pass *
2558 make_pass_insert_vzeroupper (gcc::context *ctxt)
2559 {
2560 return new pass_insert_vzeroupper (ctxt);
2561 }
2562
2563 /* Return true if a red-zone is in use. */
2564
2565 static inline bool
2566 ix86_using_red_zone (void)
2567 {
2568 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2569 }
2570 \f
2571 /* Return a string that documents the current -m options. The caller is
2572 responsible for freeing the string. */
2573
2574 static char *
2575 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2576 const char *tune, enum fpmath_unit fpmath,
2577 bool add_nl_p)
2578 {
2579 struct ix86_target_opts
2580 {
2581 const char *option; /* option string */
2582 HOST_WIDE_INT mask; /* isa mask options */
2583 };
2584
2585 /* This table is ordered so that options like -msse4.2 that imply
2586 preceding options while match those first. */
2587 static struct ix86_target_opts isa_opts[] =
2588 {
2589 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2590 { "-mfma", OPTION_MASK_ISA_FMA },
2591 { "-mxop", OPTION_MASK_ISA_XOP },
2592 { "-mlwp", OPTION_MASK_ISA_LWP },
2593 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2594 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2595 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2596 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2597 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2598 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2599 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2600 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2601 { "-msse3", OPTION_MASK_ISA_SSE3 },
2602 { "-msse2", OPTION_MASK_ISA_SSE2 },
2603 { "-msse", OPTION_MASK_ISA_SSE },
2604 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2605 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2606 { "-mmmx", OPTION_MASK_ISA_MMX },
2607 { "-mabm", OPTION_MASK_ISA_ABM },
2608 { "-mbmi", OPTION_MASK_ISA_BMI },
2609 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2610 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2611 { "-mhle", OPTION_MASK_ISA_HLE },
2612 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2613 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2614 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2615 { "-madx", OPTION_MASK_ISA_ADX },
2616 { "-mtbm", OPTION_MASK_ISA_TBM },
2617 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2618 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2619 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2620 { "-maes", OPTION_MASK_ISA_AES },
2621 { "-msha", OPTION_MASK_ISA_SHA },
2622 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2623 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2624 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2625 { "-mf16c", OPTION_MASK_ISA_F16C },
2626 { "-mrtm", OPTION_MASK_ISA_RTM },
2627 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2628 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2629 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2630 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2631 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2632 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2633 };
2634
2635 /* Flag options. */
2636 static struct ix86_target_opts flag_opts[] =
2637 {
2638 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2639 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2640 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2641 { "-m80387", MASK_80387 },
2642 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2643 { "-malign-double", MASK_ALIGN_DOUBLE },
2644 { "-mcld", MASK_CLD },
2645 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2646 { "-mieee-fp", MASK_IEEE_FP },
2647 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2648 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2649 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2650 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2651 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2652 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2653 { "-mno-red-zone", MASK_NO_RED_ZONE },
2654 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2655 { "-mrecip", MASK_RECIP },
2656 { "-mrtd", MASK_RTD },
2657 { "-msseregparm", MASK_SSEREGPARM },
2658 { "-mstack-arg-probe", MASK_STACK_PROBE },
2659 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2660 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2661 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2662 { "-mvzeroupper", MASK_VZEROUPPER },
2663 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2664 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2665 { "-mprefer-avx128", MASK_PREFER_AVX128},
2666 };
2667
2668 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2669
2670 char isa_other[40];
2671 char target_other[40];
2672 unsigned num = 0;
2673 unsigned i, j;
2674 char *ret;
2675 char *ptr;
2676 size_t len;
2677 size_t line_len;
2678 size_t sep_len;
2679 const char *abi;
2680
2681 memset (opts, '\0', sizeof (opts));
2682
2683 /* Add -march= option. */
2684 if (arch)
2685 {
2686 opts[num][0] = "-march=";
2687 opts[num++][1] = arch;
2688 }
2689
2690 /* Add -mtune= option. */
2691 if (tune)
2692 {
2693 opts[num][0] = "-mtune=";
2694 opts[num++][1] = tune;
2695 }
2696
2697 /* Add -m32/-m64/-mx32. */
2698 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2699 {
2700 if ((isa & OPTION_MASK_ABI_64) != 0)
2701 abi = "-m64";
2702 else
2703 abi = "-mx32";
2704 isa &= ~ (OPTION_MASK_ISA_64BIT
2705 | OPTION_MASK_ABI_64
2706 | OPTION_MASK_ABI_X32);
2707 }
2708 else
2709 abi = "-m32";
2710 opts[num++][0] = abi;
2711
2712 /* Pick out the options in isa options. */
2713 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2714 {
2715 if ((isa & isa_opts[i].mask) != 0)
2716 {
2717 opts[num++][0] = isa_opts[i].option;
2718 isa &= ~ isa_opts[i].mask;
2719 }
2720 }
2721
2722 if (isa && add_nl_p)
2723 {
2724 opts[num++][0] = isa_other;
2725 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2726 isa);
2727 }
2728
2729 /* Add flag options. */
2730 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2731 {
2732 if ((flags & flag_opts[i].mask) != 0)
2733 {
2734 opts[num++][0] = flag_opts[i].option;
2735 flags &= ~ flag_opts[i].mask;
2736 }
2737 }
2738
2739 if (flags && add_nl_p)
2740 {
2741 opts[num++][0] = target_other;
2742 sprintf (target_other, "(other flags: %#x)", flags);
2743 }
2744
2745 /* Add -fpmath= option. */
2746 if (fpmath)
2747 {
2748 opts[num][0] = "-mfpmath=";
2749 switch ((int) fpmath)
2750 {
2751 case FPMATH_387:
2752 opts[num++][1] = "387";
2753 break;
2754
2755 case FPMATH_SSE:
2756 opts[num++][1] = "sse";
2757 break;
2758
2759 case FPMATH_387 | FPMATH_SSE:
2760 opts[num++][1] = "sse+387";
2761 break;
2762
2763 default:
2764 gcc_unreachable ();
2765 }
2766 }
2767
2768 /* Any options? */
2769 if (num == 0)
2770 return NULL;
2771
2772 gcc_assert (num < ARRAY_SIZE (opts));
2773
2774 /* Size the string. */
2775 len = 0;
2776 sep_len = (add_nl_p) ? 3 : 1;
2777 for (i = 0; i < num; i++)
2778 {
2779 len += sep_len;
2780 for (j = 0; j < 2; j++)
2781 if (opts[i][j])
2782 len += strlen (opts[i][j]);
2783 }
2784
2785 /* Build the string. */
2786 ret = ptr = (char *) xmalloc (len);
2787 line_len = 0;
2788
2789 for (i = 0; i < num; i++)
2790 {
2791 size_t len2[2];
2792
2793 for (j = 0; j < 2; j++)
2794 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2795
2796 if (i != 0)
2797 {
2798 *ptr++ = ' ';
2799 line_len++;
2800
2801 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2802 {
2803 *ptr++ = '\\';
2804 *ptr++ = '\n';
2805 line_len = 0;
2806 }
2807 }
2808
2809 for (j = 0; j < 2; j++)
2810 if (opts[i][j])
2811 {
2812 memcpy (ptr, opts[i][j], len2[j]);
2813 ptr += len2[j];
2814 line_len += len2[j];
2815 }
2816 }
2817
2818 *ptr = '\0';
2819 gcc_assert (ret + len >= ptr);
2820
2821 return ret;
2822 }
2823
2824 /* Return true, if profiling code should be emitted before
2825 prologue. Otherwise it returns false.
2826 Note: For x86 with "hotfix" it is sorried. */
2827 static bool
2828 ix86_profile_before_prologue (void)
2829 {
2830 return flag_fentry != 0;
2831 }
2832
2833 /* Function that is callable from the debugger to print the current
2834 options. */
2835 void ATTRIBUTE_UNUSED
2836 ix86_debug_options (void)
2837 {
2838 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2839 ix86_arch_string, ix86_tune_string,
2840 ix86_fpmath, true);
2841
2842 if (opts)
2843 {
2844 fprintf (stderr, "%s\n\n", opts);
2845 free (opts);
2846 }
2847 else
2848 fputs ("<no options>\n\n", stderr);
2849
2850 return;
2851 }
2852
2853 static const char *stringop_alg_names[] = {
2854 #define DEF_ENUM
2855 #define DEF_ALG(alg, name) #name,
2856 #include "stringop.def"
2857 #undef DEF_ENUM
2858 #undef DEF_ALG
2859 };
2860
2861 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2862 The string is of the following form (or comma separated list of it):
2863
2864 strategy_alg:max_size:[align|noalign]
2865
2866 where the full size range for the strategy is either [0, max_size] or
2867 [min_size, max_size], in which min_size is the max_size + 1 of the
2868 preceding range. The last size range must have max_size == -1.
2869
2870 Examples:
2871
2872 1.
2873 -mmemcpy-strategy=libcall:-1:noalign
2874
2875 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2876
2877
2878 2.
2879 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2880
2881 This is to tell the compiler to use the following strategy for memset
2882 1) when the expected size is between [1, 16], use rep_8byte strategy;
2883 2) when the size is between [17, 2048], use vector_loop;
2884 3) when the size is > 2048, use libcall. */
2885
2886 struct stringop_size_range
2887 {
2888 int max;
2889 stringop_alg alg;
2890 bool noalign;
2891 };
2892
2893 static void
2894 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2895 {
2896 const struct stringop_algs *default_algs;
2897 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2898 char *curr_range_str, *next_range_str;
2899 int i = 0, n = 0;
2900
2901 if (is_memset)
2902 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2903 else
2904 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2905
2906 curr_range_str = strategy_str;
2907
2908 do
2909 {
2910 int maxs;
2911 char alg_name[128];
2912 char align[16];
2913 next_range_str = strchr (curr_range_str, ',');
2914 if (next_range_str)
2915 *next_range_str++ = '\0';
2916
2917 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2918 alg_name, &maxs, align))
2919 {
2920 error ("wrong arg %s to option %s", curr_range_str,
2921 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2922 return;
2923 }
2924
2925 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2926 {
2927 error ("size ranges of option %s should be increasing",
2928 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2929 return;
2930 }
2931
2932 for (i = 0; i < last_alg; i++)
2933 if (!strcmp (alg_name, stringop_alg_names[i]))
2934 break;
2935
2936 if (i == last_alg)
2937 {
2938 error ("wrong stringop strategy name %s specified for option %s",
2939 alg_name,
2940 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2941 return;
2942 }
2943
2944 input_ranges[n].max = maxs;
2945 input_ranges[n].alg = (stringop_alg) i;
2946 if (!strcmp (align, "align"))
2947 input_ranges[n].noalign = false;
2948 else if (!strcmp (align, "noalign"))
2949 input_ranges[n].noalign = true;
2950 else
2951 {
2952 error ("unknown alignment %s specified for option %s",
2953 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2954 return;
2955 }
2956 n++;
2957 curr_range_str = next_range_str;
2958 }
2959 while (curr_range_str);
2960
2961 if (input_ranges[n - 1].max != -1)
2962 {
2963 error ("the max value for the last size range should be -1"
2964 " for option %s",
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2967 }
2968
2969 if (n > MAX_STRINGOP_ALGS)
2970 {
2971 error ("too many size ranges specified in option %s",
2972 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2973 return;
2974 }
2975
2976 /* Now override the default algs array. */
2977 for (i = 0; i < n; i++)
2978 {
2979 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2980 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2981 = input_ranges[i].alg;
2982 *const_cast<int *>(&default_algs->size[i].noalign)
2983 = input_ranges[i].noalign;
2984 }
2985 }
2986
2987 \f
2988 /* parse -mtune-ctrl= option. When DUMP is true,
2989 print the features that are explicitly set. */
2990
2991 static void
2992 parse_mtune_ctrl_str (bool dump)
2993 {
2994 if (!ix86_tune_ctrl_string)
2995 return;
2996
2997 char *next_feature_string = NULL;
2998 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2999 char *orig = curr_feature_string;
3000 int i;
3001 do
3002 {
3003 bool clear = false;
3004
3005 next_feature_string = strchr (curr_feature_string, ',');
3006 if (next_feature_string)
3007 *next_feature_string++ = '\0';
3008 if (*curr_feature_string == '^')
3009 {
3010 curr_feature_string++;
3011 clear = true;
3012 }
3013 for (i = 0; i < X86_TUNE_LAST; i++)
3014 {
3015 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3016 {
3017 ix86_tune_features[i] = !clear;
3018 if (dump)
3019 fprintf (stderr, "Explicitly %s feature %s\n",
3020 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3021 break;
3022 }
3023 }
3024 if (i == X86_TUNE_LAST)
3025 error ("Unknown parameter to option -mtune-ctrl: %s",
3026 clear ? curr_feature_string - 1 : curr_feature_string);
3027 curr_feature_string = next_feature_string;
3028 }
3029 while (curr_feature_string);
3030 free (orig);
3031 }
3032
3033 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3034 processor type. */
3035
3036 static void
3037 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3038 {
3039 unsigned int ix86_tune_mask = 1u << ix86_tune;
3040 int i;
3041
3042 for (i = 0; i < X86_TUNE_LAST; ++i)
3043 {
3044 if (ix86_tune_no_default)
3045 ix86_tune_features[i] = 0;
3046 else
3047 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3048 }
3049
3050 if (dump)
3051 {
3052 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3053 for (i = 0; i < X86_TUNE_LAST; i++)
3054 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3055 ix86_tune_features[i] ? "on" : "off");
3056 }
3057
3058 parse_mtune_ctrl_str (dump);
3059 }
3060
3061
3062 /* Override various settings based on options. If MAIN_ARGS_P, the
3063 options are from the command line, otherwise they are from
3064 attributes. */
3065
3066 static void
3067 ix86_option_override_internal (bool main_args_p,
3068 struct gcc_options *opts,
3069 struct gcc_options *opts_set)
3070 {
3071 int i;
3072 unsigned int ix86_arch_mask;
3073 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3074 const char *prefix;
3075 const char *suffix;
3076 const char *sw;
3077
3078 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3079 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3080 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3081 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3082 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3083 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3084 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3085 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3086 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3087 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3088 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3089 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3090 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3091 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3092 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3093 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3094 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3095 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3096 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3097 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3098 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3099 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3100 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3101 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3102 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3103 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3104 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3105 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3106 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3107 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3108 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3109 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3110 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3111 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3112 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3113 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3114 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3115 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3116 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3117 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3118 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3119 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3120 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3121 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3122 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3123 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3124 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3125 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3126 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3127
3128 #define PTA_CORE2 \
3129 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3130 | PTA_CX16 | PTA_FXSR)
3131 #define PTA_NEHALEM \
3132 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3133 #define PTA_WESTMERE \
3134 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3135 #define PTA_SANDYBRIDGE \
3136 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3137 #define PTA_IVYBRIDGE \
3138 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3139 #define PTA_HASWELL \
3140 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3141 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3142 #define PTA_BROADWELL \
3143 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3144 #define PTA_BONNELL \
3145 (PTA_CORE2 | PTA_MOVBE)
3146 #define PTA_SILVERMONT \
3147 (PTA_WESTMERE | PTA_MOVBE)
3148
3149 /* if this reaches 64, need to widen struct pta flags below */
3150
3151 static struct pta
3152 {
3153 const char *const name; /* processor name or nickname. */
3154 const enum processor_type processor;
3155 const enum attr_cpu schedule;
3156 const unsigned HOST_WIDE_INT flags;
3157 }
3158 const processor_alias_table[] =
3159 {
3160 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3161 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3162 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3164 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3165 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3166 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3168 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3169 PTA_MMX | PTA_SSE | PTA_FXSR},
3170 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3172 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3173 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3174 PTA_MMX | PTA_SSE | PTA_FXSR},
3175 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3176 PTA_MMX | PTA_SSE | PTA_FXSR},
3177 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3179 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3180 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3181 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3184 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3185 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3186 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3187 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3188 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3189 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3191 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3192 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3193 PTA_SANDYBRIDGE},
3194 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3195 PTA_SANDYBRIDGE},
3196 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_IVYBRIDGE},
3198 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_IVYBRIDGE},
3200 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3202 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3203 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3205 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3207 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3208 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3209 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3210 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3211 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3213 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3214 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3215 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3216 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3217 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3219 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3221 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"x86-64", PROCESSOR_K8, CPU_K8,
3224 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3225 {"k8", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"opteron", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3236 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3237 {"athlon64", PROCESSOR_K8, CPU_K8,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3239 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3240 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3241 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3242 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3243 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3244 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3245 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3246 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3247 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3248 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3249 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3250 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3251 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3252 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3253 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3254 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3255 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3256 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3257 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3258 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3260 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3261 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3262 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3263 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3264 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3265 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3266 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3267 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3268 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3269 | PTA_XSAVEOPT | PTA_FSGSBASE},
3270 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3273 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3274 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3275 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3276 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3277 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3278 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3279 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3280 | PTA_FXSR | PTA_XSAVE},
3281 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3283 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3284 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3285 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3286 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3287
3288 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3289 PTA_64BIT
3290 | PTA_HLE /* flags are only used for -march switch. */ },
3291 };
3292
3293 /* -mrecip options. */
3294 static struct
3295 {
3296 const char *string; /* option name */
3297 unsigned int mask; /* mask bits to set */
3298 }
3299 const recip_options[] =
3300 {
3301 { "all", RECIP_MASK_ALL },
3302 { "none", RECIP_MASK_NONE },
3303 { "div", RECIP_MASK_DIV },
3304 { "sqrt", RECIP_MASK_SQRT },
3305 { "vec-div", RECIP_MASK_VEC_DIV },
3306 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3307 };
3308
3309 int const pta_size = ARRAY_SIZE (processor_alias_table);
3310
3311 /* Set up prefix/suffix so the error messages refer to either the command
3312 line argument, or the attribute(target). */
3313 if (main_args_p)
3314 {
3315 prefix = "-m";
3316 suffix = "";
3317 sw = "switch";
3318 }
3319 else
3320 {
3321 prefix = "option(\"";
3322 suffix = "\")";
3323 sw = "attribute";
3324 }
3325
3326 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3327 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3328 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3330 #ifdef TARGET_BI_ARCH
3331 else
3332 {
3333 #if TARGET_BI_ARCH == 1
3334 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3335 is on and OPTION_MASK_ABI_X32 is off. We turn off
3336 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3337 -mx32. */
3338 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3339 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3340 #else
3341 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3342 on and OPTION_MASK_ABI_64 is off. We turn off
3343 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3344 -m64. */
3345 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3347 #endif
3348 }
3349 #endif
3350
3351 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3352 {
3353 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3354 OPTION_MASK_ABI_64 for TARGET_X32. */
3355 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3356 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3357 }
3358 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3360 | OPTION_MASK_ABI_X32
3361 | OPTION_MASK_ABI_64);
3362 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3363 {
3364 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3365 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3366 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3367 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3368 }
3369
3370 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3371 SUBTARGET_OVERRIDE_OPTIONS;
3372 #endif
3373
3374 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3375 SUBSUBTARGET_OVERRIDE_OPTIONS;
3376 #endif
3377
3378 /* -fPIC is the default for x86_64. */
3379 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3380 opts->x_flag_pic = 2;
3381
3382 /* Need to check -mtune=generic first. */
3383 if (opts->x_ix86_tune_string)
3384 {
3385 /* As special support for cross compilers we read -mtune=native
3386 as -mtune=generic. With native compilers we won't see the
3387 -mtune=native, as it was changed by the driver. */
3388 if (!strcmp (opts->x_ix86_tune_string, "native"))
3389 {
3390 opts->x_ix86_tune_string = "generic";
3391 }
3392 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3393 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3394 "%stune=k8%s or %stune=generic%s instead as appropriate",
3395 prefix, suffix, prefix, suffix, prefix, suffix);
3396 }
3397 else
3398 {
3399 if (opts->x_ix86_arch_string)
3400 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3401 if (!opts->x_ix86_tune_string)
3402 {
3403 opts->x_ix86_tune_string
3404 = processor_target_table[TARGET_CPU_DEFAULT].name;
3405 ix86_tune_defaulted = 1;
3406 }
3407
3408 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3409 or defaulted. We need to use a sensible tune option. */
3410 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3411 {
3412 opts->x_ix86_tune_string = "generic";
3413 }
3414 }
3415
3416 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3417 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3418 {
3419 /* rep; movq isn't available in 32-bit code. */
3420 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3421 opts->x_ix86_stringop_alg = no_stringop;
3422 }
3423
3424 if (!opts->x_ix86_arch_string)
3425 opts->x_ix86_arch_string
3426 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3427 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3428 else
3429 ix86_arch_specified = 1;
3430
3431 if (opts_set->x_ix86_pmode)
3432 {
3433 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3434 && opts->x_ix86_pmode == PMODE_SI)
3435 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3436 && opts->x_ix86_pmode == PMODE_DI))
3437 error ("address mode %qs not supported in the %s bit mode",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3439 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3440 }
3441 else
3442 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3443 ? PMODE_DI : PMODE_SI;
3444
3445 if (!opts_set->x_ix86_abi)
3446 opts->x_ix86_abi = DEFAULT_ABI;
3447
3448 /* For targets using ms ABI enable ms-extensions, if not
3449 explicit turned off. For non-ms ABI we turn off this
3450 option. */
3451 if (!opts_set->x_flag_ms_extensions)
3452 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3453
3454 if (opts_set->x_ix86_cmodel)
3455 {
3456 switch (opts->x_ix86_cmodel)
3457 {
3458 case CM_SMALL:
3459 case CM_SMALL_PIC:
3460 if (opts->x_flag_pic)
3461 opts->x_ix86_cmodel = CM_SMALL_PIC;
3462 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in the %s bit mode",
3464 "small", "32");
3465 break;
3466
3467 case CM_MEDIUM:
3468 case CM_MEDIUM_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "medium", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "medium");
3477 break;
3478
3479 case CM_LARGE:
3480 case CM_LARGE_PIC:
3481 if (opts->x_flag_pic)
3482 opts->x_ix86_cmodel = CM_LARGE_PIC;
3483 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3484 error ("code model %qs not supported in the %s bit mode",
3485 "large", "32");
3486 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3487 error ("code model %qs not supported in x32 mode",
3488 "large");
3489 break;
3490
3491 case CM_32:
3492 if (opts->x_flag_pic)
3493 error ("code model %s does not support PIC mode", "32");
3494 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3495 error ("code model %qs not supported in the %s bit mode",
3496 "32", "64");
3497 break;
3498
3499 case CM_KERNEL:
3500 if (opts->x_flag_pic)
3501 {
3502 error ("code model %s does not support PIC mode", "kernel");
3503 opts->x_ix86_cmodel = CM_32;
3504 }
3505 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3506 error ("code model %qs not supported in the %s bit mode",
3507 "kernel", "32");
3508 break;
3509
3510 default:
3511 gcc_unreachable ();
3512 }
3513 }
3514 else
3515 {
3516 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3517 use of rip-relative addressing. This eliminates fixups that
3518 would otherwise be needed if this object is to be placed in a
3519 DLL, and is essentially just as efficient as direct addressing. */
3520 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3521 && (TARGET_RDOS || TARGET_PECOFF))
3522 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3523 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3524 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3525 else
3526 opts->x_ix86_cmodel = CM_32;
3527 }
3528 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3529 {
3530 error ("-masm=intel not supported in this configuration");
3531 opts->x_ix86_asm_dialect = ASM_ATT;
3532 }
3533 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3534 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3535 sorry ("%i-bit mode not compiled in",
3536 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3537
3538 for (i = 0; i < pta_size; i++)
3539 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3540 {
3541 ix86_schedule = processor_alias_table[i].schedule;
3542 ix86_arch = processor_alias_table[i].processor;
3543 /* Default cpu tuning to the architecture. */
3544 ix86_tune = ix86_arch;
3545
3546 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3547 && !(processor_alias_table[i].flags & PTA_64BIT))
3548 error ("CPU you selected does not support x86-64 "
3549 "instruction set");
3550
3551 if (processor_alias_table[i].flags & PTA_MMX
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3554 if (processor_alias_table[i].flags & PTA_3DNOW
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3557 if (processor_alias_table[i].flags & PTA_3DNOW_A
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3560 if (processor_alias_table[i].flags & PTA_SSE
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3563 if (processor_alias_table[i].flags & PTA_SSE2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3566 if (processor_alias_table[i].flags & PTA_SSE3
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3569 if (processor_alias_table[i].flags & PTA_SSSE3
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3572 if (processor_alias_table[i].flags & PTA_SSE4_1
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3575 if (processor_alias_table[i].flags & PTA_SSE4_2
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3578 if (processor_alias_table[i].flags & PTA_AVX
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3581 if (processor_alias_table[i].flags & PTA_AVX2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3584 if (processor_alias_table[i].flags & PTA_FMA
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3587 if (processor_alias_table[i].flags & PTA_SSE4A
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3590 if (processor_alias_table[i].flags & PTA_FMA4
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3593 if (processor_alias_table[i].flags & PTA_XOP
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3596 if (processor_alias_table[i].flags & PTA_LWP
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3599 if (processor_alias_table[i].flags & PTA_ABM
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3602 if (processor_alias_table[i].flags & PTA_BMI
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3605 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3608 if (processor_alias_table[i].flags & PTA_TBM
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3611 if (processor_alias_table[i].flags & PTA_BMI2
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3614 if (processor_alias_table[i].flags & PTA_CX16
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3617 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3620 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3624 if (processor_alias_table[i].flags & PTA_MOVBE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3627 if (processor_alias_table[i].flags & PTA_AES
3628 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3629 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3630 if (processor_alias_table[i].flags & PTA_SHA
3631 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3632 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3633 if (processor_alias_table[i].flags & PTA_PCLMUL
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3636 if (processor_alias_table[i].flags & PTA_FSGSBASE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3639 if (processor_alias_table[i].flags & PTA_RDRND
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3642 if (processor_alias_table[i].flags & PTA_F16C
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3645 if (processor_alias_table[i].flags & PTA_RTM
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3648 if (processor_alias_table[i].flags & PTA_HLE
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3651 if (processor_alias_table[i].flags & PTA_PRFCHW
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3654 if (processor_alias_table[i].flags & PTA_RDSEED
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3657 if (processor_alias_table[i].flags & PTA_ADX
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3660 if (processor_alias_table[i].flags & PTA_FXSR
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3663 if (processor_alias_table[i].flags & PTA_XSAVE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3666 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3669 if (processor_alias_table[i].flags & PTA_AVX512F
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3672 if (processor_alias_table[i].flags & PTA_AVX512ER
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3675 if (processor_alias_table[i].flags & PTA_AVX512PF
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3678 if (processor_alias_table[i].flags & PTA_AVX512CD
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3681 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3684 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3687 if (processor_alias_table[i].flags & PTA_XSAVEC
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3690 if (processor_alias_table[i].flags & PTA_XSAVES
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3693 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3694 x86_prefetch_sse = true;
3695
3696 break;
3697 }
3698
3699 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3700 error ("generic CPU can be used only for %stune=%s %s",
3701 prefix, suffix, sw);
3702 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3703 error ("intel CPU can be used only for %stune=%s %s",
3704 prefix, suffix, sw);
3705 else if (i == pta_size)
3706 error ("bad value (%s) for %sarch=%s %s",
3707 opts->x_ix86_arch_string, prefix, suffix, sw);
3708
3709 ix86_arch_mask = 1u << ix86_arch;
3710 for (i = 0; i < X86_ARCH_LAST; ++i)
3711 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3712
3713 for (i = 0; i < pta_size; i++)
3714 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3715 {
3716 ix86_schedule = processor_alias_table[i].schedule;
3717 ix86_tune = processor_alias_table[i].processor;
3718 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3719 {
3720 if (!(processor_alias_table[i].flags & PTA_64BIT))
3721 {
3722 if (ix86_tune_defaulted)
3723 {
3724 opts->x_ix86_tune_string = "x86-64";
3725 for (i = 0; i < pta_size; i++)
3726 if (! strcmp (opts->x_ix86_tune_string,
3727 processor_alias_table[i].name))
3728 break;
3729 ix86_schedule = processor_alias_table[i].schedule;
3730 ix86_tune = processor_alias_table[i].processor;
3731 }
3732 else
3733 error ("CPU you selected does not support x86-64 "
3734 "instruction set");
3735 }
3736 }
3737 /* Intel CPUs have always interpreted SSE prefetch instructions as
3738 NOPs; so, we can enable SSE prefetch instructions even when
3739 -mtune (rather than -march) points us to a processor that has them.
3740 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3741 higher processors. */
3742 if (TARGET_CMOV
3743 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3744 x86_prefetch_sse = true;
3745 break;
3746 }
3747
3748 if (ix86_tune_specified && i == pta_size)
3749 error ("bad value (%s) for %stune=%s %s",
3750 opts->x_ix86_tune_string, prefix, suffix, sw);
3751
3752 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3753
3754 #ifndef USE_IX86_FRAME_POINTER
3755 #define USE_IX86_FRAME_POINTER 0
3756 #endif
3757
3758 #ifndef USE_X86_64_FRAME_POINTER
3759 #define USE_X86_64_FRAME_POINTER 0
3760 #endif
3761
3762 /* Set the default values for switches whose default depends on TARGET_64BIT
3763 in case they weren't overwritten by command line options. */
3764 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3765 {
3766 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3767 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3768 if (opts->x_flag_asynchronous_unwind_tables
3769 && !opts_set->x_flag_unwind_tables
3770 && TARGET_64BIT_MS_ABI)
3771 opts->x_flag_unwind_tables = 1;
3772 if (opts->x_flag_asynchronous_unwind_tables == 2)
3773 opts->x_flag_unwind_tables
3774 = opts->x_flag_asynchronous_unwind_tables = 1;
3775 if (opts->x_flag_pcc_struct_return == 2)
3776 opts->x_flag_pcc_struct_return = 0;
3777 }
3778 else
3779 {
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer
3782 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3783 if (opts->x_flag_asynchronous_unwind_tables == 2)
3784 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3785 if (opts->x_flag_pcc_struct_return == 2)
3786 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3787 }
3788
3789 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3790 if (opts->x_optimize_size)
3791 ix86_cost = &ix86_size_cost;
3792 else
3793 ix86_cost = ix86_tune_cost;
3794
3795 /* Arrange to set up i386_stack_locals for all functions. */
3796 init_machine_status = ix86_init_machine_status;
3797
3798 /* Validate -mregparm= value. */
3799 if (opts_set->x_ix86_regparm)
3800 {
3801 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3802 warning (0, "-mregparm is ignored in 64-bit mode");
3803 if (opts->x_ix86_regparm > REGPARM_MAX)
3804 {
3805 error ("-mregparm=%d is not between 0 and %d",
3806 opts->x_ix86_regparm, REGPARM_MAX);
3807 opts->x_ix86_regparm = 0;
3808 }
3809 }
3810 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3811 opts->x_ix86_regparm = REGPARM_MAX;
3812
3813 /* Default align_* from the processor table. */
3814 if (opts->x_align_loops == 0)
3815 {
3816 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3817 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3818 }
3819 if (opts->x_align_jumps == 0)
3820 {
3821 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3822 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3823 }
3824 if (opts->x_align_functions == 0)
3825 {
3826 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3827 }
3828
3829 /* Provide default for -mbranch-cost= value. */
3830 if (!opts_set->x_ix86_branch_cost)
3831 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3832
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3834 {
3835 opts->x_target_flags
3836 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3837
3838 /* Enable by default the SSE and MMX builtins. Do allow the user to
3839 explicitly disable any of these. In particular, disabling SSE and
3840 MMX for kernel code is extremely useful. */
3841 if (!ix86_arch_specified)
3842 opts->x_ix86_isa_flags
3843 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3844 | TARGET_SUBTARGET64_ISA_DEFAULT)
3845 & ~opts->x_ix86_isa_flags_explicit);
3846
3847 if (TARGET_RTD_P (opts->x_target_flags))
3848 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3849 }
3850 else
3851 {
3852 opts->x_target_flags
3853 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3854
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3858
3859 /* i386 ABI does not specify red zone. It still makes sense to use it
3860 when programmer takes care to stack from being destroyed. */
3861 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3862 opts->x_target_flags |= MASK_NO_RED_ZONE;
3863 }
3864
3865 /* Keep nonleaf frame pointers. */
3866 if (opts->x_flag_omit_frame_pointer)
3867 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3868 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3869 opts->x_flag_omit_frame_pointer = 1;
3870
3871 /* If we're doing fast math, we don't care about comparison order
3872 wrt NaNs. This lets us use a shorter comparison sequence. */
3873 if (opts->x_flag_finite_math_only)
3874 opts->x_target_flags &= ~MASK_IEEE_FP;
3875
3876 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3877 since the insns won't need emulation. */
3878 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3879 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3880
3881 /* Likewise, if the target doesn't have a 387, or we've specified
3882 software floating point, don't use 387 inline intrinsics. */
3883 if (!TARGET_80387_P (opts->x_target_flags))
3884 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3885
3886 /* Turn on MMX builtins for -msse. */
3887 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3888 opts->x_ix86_isa_flags
3889 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3890
3891 /* Enable SSE prefetch. */
3892 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3893 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3894 x86_prefetch_sse = true;
3895
3896 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3897 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3898 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3899 opts->x_ix86_isa_flags
3900 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3901
3902 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3903 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3904 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3905 opts->x_ix86_isa_flags
3906 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3907
3908 /* Enable lzcnt instruction for -mabm. */
3909 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3910 opts->x_ix86_isa_flags
3911 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3912
3913 /* Validate -mpreferred-stack-boundary= value or default it to
3914 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3915 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3916 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3917 {
3918 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3919 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3920 int max = (TARGET_SEH ? 4 : 12);
3921
3922 if (opts->x_ix86_preferred_stack_boundary_arg < min
3923 || opts->x_ix86_preferred_stack_boundary_arg > max)
3924 {
3925 if (min == max)
3926 error ("-mpreferred-stack-boundary is not supported "
3927 "for this target");
3928 else
3929 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3930 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3931 }
3932 else
3933 ix86_preferred_stack_boundary
3934 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3935 }
3936
3937 /* Set the default value for -mstackrealign. */
3938 if (opts->x_ix86_force_align_arg_pointer == -1)
3939 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3940
3941 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3942
3943 /* Validate -mincoming-stack-boundary= value or default it to
3944 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3945 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3946 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3947 {
3948 if (opts->x_ix86_incoming_stack_boundary_arg
3949 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3950 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3951 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3952 opts->x_ix86_incoming_stack_boundary_arg,
3953 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3954 else
3955 {
3956 ix86_user_incoming_stack_boundary
3957 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3958 ix86_incoming_stack_boundary
3959 = ix86_user_incoming_stack_boundary;
3960 }
3961 }
3962
3963 /* Accept -msseregparm only if at least SSE support is enabled. */
3964 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3965 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3966 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3967
3968 if (opts_set->x_ix86_fpmath)
3969 {
3970 if (opts->x_ix86_fpmath & FPMATH_SSE)
3971 {
3972 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3973 {
3974 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3975 opts->x_ix86_fpmath = FPMATH_387;
3976 }
3977 else if ((opts->x_ix86_fpmath & FPMATH_387)
3978 && !TARGET_80387_P (opts->x_target_flags))
3979 {
3980 warning (0, "387 instruction set disabled, using SSE arithmetics");
3981 opts->x_ix86_fpmath = FPMATH_SSE;
3982 }
3983 }
3984 }
3985 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3986 fpmath=387. The second is however default at many targets since the
3987 extra 80bit precision of temporaries is considered to be part of ABI.
3988 Overwrite the default at least for -ffast-math.
3989 TODO: -mfpmath=both seems to produce same performing code with bit
3990 smaller binaries. It is however not clear if register allocation is
3991 ready for this setting.
3992 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3993 codegen. We may switch to 387 with -ffast-math for size optimized
3994 functions. */
3995 else if (fast_math_flags_set_p (&global_options)
3996 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_fpmath = FPMATH_SSE;
3998 else
3999 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4000
4001 /* If the i387 is disabled, then do not return values in it. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4004
4005 /* Use external vectorized library in vectorizing intrinsics. */
4006 if (opts_set->x_ix86_veclibabi_type)
4007 switch (opts->x_ix86_veclibabi_type)
4008 {
4009 case ix86_veclibabi_type_svml:
4010 ix86_veclib_handler = ix86_veclibabi_svml;
4011 break;
4012
4013 case ix86_veclibabi_type_acml:
4014 ix86_veclib_handler = ix86_veclibabi_acml;
4015 break;
4016
4017 default:
4018 gcc_unreachable ();
4019 }
4020
4021 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4022 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4023 && !opts->x_optimize_size)
4024 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4025
4026 /* If stack probes are required, the space used for large function
4027 arguments on the stack must also be probed, so enable
4028 -maccumulate-outgoing-args so this happens in the prologue. */
4029 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4030 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4031 {
4032 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4033 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4034 "for correctness", prefix, suffix);
4035 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4036 }
4037
4038 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4039 {
4040 char *p;
4041 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4042 p = strchr (internal_label_prefix, 'X');
4043 internal_label_prefix_len = p - internal_label_prefix;
4044 *p = '\0';
4045 }
4046
4047 /* When scheduling description is not available, disable scheduler pass
4048 so it won't slow down the compilation and make x87 code slower. */
4049 if (!TARGET_SCHEDULE)
4050 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4051
4052 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4053 ix86_tune_cost->simultaneous_prefetches,
4054 opts->x_param_values,
4055 opts_set->x_param_values);
4056 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4057 ix86_tune_cost->prefetch_block,
4058 opts->x_param_values,
4059 opts_set->x_param_values);
4060 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4061 ix86_tune_cost->l1_cache_size,
4062 opts->x_param_values,
4063 opts_set->x_param_values);
4064 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4065 ix86_tune_cost->l2_cache_size,
4066 opts->x_param_values,
4067 opts_set->x_param_values);
4068
4069 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4070 if (opts->x_flag_prefetch_loop_arrays < 0
4071 && HAVE_prefetch
4072 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4073 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4074 opts->x_flag_prefetch_loop_arrays = 1;
4075
4076 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4077 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4078 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4079 targetm.expand_builtin_va_start = NULL;
4080
4081 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4082 {
4083 ix86_gen_leave = gen_leave_rex64;
4084 if (Pmode == DImode)
4085 {
4086 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4087 ix86_gen_tls_local_dynamic_base_64
4088 = gen_tls_local_dynamic_base_64_di;
4089 }
4090 else
4091 {
4092 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4093 ix86_gen_tls_local_dynamic_base_64
4094 = gen_tls_local_dynamic_base_64_si;
4095 }
4096 }
4097 else
4098 ix86_gen_leave = gen_leave;
4099
4100 if (Pmode == DImode)
4101 {
4102 ix86_gen_add3 = gen_adddi3;
4103 ix86_gen_sub3 = gen_subdi3;
4104 ix86_gen_sub3_carry = gen_subdi3_carry;
4105 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4106 ix86_gen_andsp = gen_anddi3;
4107 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4108 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4109 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4110 ix86_gen_monitor = gen_sse3_monitor_di;
4111 }
4112 else
4113 {
4114 ix86_gen_add3 = gen_addsi3;
4115 ix86_gen_sub3 = gen_subsi3;
4116 ix86_gen_sub3_carry = gen_subsi3_carry;
4117 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4118 ix86_gen_andsp = gen_andsi3;
4119 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4120 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4121 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4122 ix86_gen_monitor = gen_sse3_monitor_si;
4123 }
4124
4125 #ifdef USE_IX86_CLD
4126 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4127 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4128 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4129 #endif
4130
4131 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4132 {
4133 if (opts->x_flag_fentry > 0)
4134 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4135 "with -fpic");
4136 opts->x_flag_fentry = 0;
4137 }
4138 else if (TARGET_SEH)
4139 {
4140 if (opts->x_flag_fentry == 0)
4141 sorry ("-mno-fentry isn%'t compatible with SEH");
4142 opts->x_flag_fentry = 1;
4143 }
4144 else if (opts->x_flag_fentry < 0)
4145 {
4146 #if defined(PROFILE_BEFORE_PROLOGUE)
4147 opts->x_flag_fentry = 1;
4148 #else
4149 opts->x_flag_fentry = 0;
4150 #endif
4151 }
4152
4153 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4154 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4155 AVX unaligned load/store. */
4156 if (!opts->x_optimize_size)
4157 {
4158 if (flag_expensive_optimizations
4159 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4160 opts->x_target_flags |= MASK_VZEROUPPER;
4161 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4162 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4163 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4164 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4165 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4166 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4167 /* Enable 128-bit AVX instruction generation
4168 for the auto-vectorizer. */
4169 if (TARGET_AVX128_OPTIMAL
4170 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4171 opts->x_target_flags |= MASK_PREFER_AVX128;
4172 }
4173
4174 if (opts->x_ix86_recip_name)
4175 {
4176 char *p = ASTRDUP (opts->x_ix86_recip_name);
4177 char *q;
4178 unsigned int mask, i;
4179 bool invert;
4180
4181 while ((q = strtok (p, ",")) != NULL)
4182 {
4183 p = NULL;
4184 if (*q == '!')
4185 {
4186 invert = true;
4187 q++;
4188 }
4189 else
4190 invert = false;
4191
4192 if (!strcmp (q, "default"))
4193 mask = RECIP_MASK_ALL;
4194 else
4195 {
4196 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4197 if (!strcmp (q, recip_options[i].string))
4198 {
4199 mask = recip_options[i].mask;
4200 break;
4201 }
4202
4203 if (i == ARRAY_SIZE (recip_options))
4204 {
4205 error ("unknown option for -mrecip=%s", q);
4206 invert = false;
4207 mask = RECIP_MASK_NONE;
4208 }
4209 }
4210
4211 opts->x_recip_mask_explicit |= mask;
4212 if (invert)
4213 opts->x_recip_mask &= ~mask;
4214 else
4215 opts->x_recip_mask |= mask;
4216 }
4217 }
4218
4219 if (TARGET_RECIP_P (opts->x_target_flags))
4220 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4221 else if (opts_set->x_target_flags & MASK_RECIP)
4222 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4223
4224 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4225 for 64-bit Bionic. */
4226 if (TARGET_HAS_BIONIC
4227 && !(opts_set->x_target_flags
4228 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4229 opts->x_target_flags |= (TARGET_64BIT
4230 ? MASK_LONG_DOUBLE_128
4231 : MASK_LONG_DOUBLE_64);
4232
4233 /* Only one of them can be active. */
4234 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4235 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4236
4237 /* Save the initial options in case the user does function specific
4238 options. */
4239 if (main_args_p)
4240 target_option_default_node = target_option_current_node
4241 = build_target_option_node (opts);
4242
4243 /* Handle stack protector */
4244 if (!opts_set->x_ix86_stack_protector_guard)
4245 opts->x_ix86_stack_protector_guard
4246 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4247
4248 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4249 if (opts->x_ix86_tune_memcpy_strategy)
4250 {
4251 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4252 ix86_parse_stringop_strategy_string (str, false);
4253 free (str);
4254 }
4255
4256 if (opts->x_ix86_tune_memset_strategy)
4257 {
4258 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4259 ix86_parse_stringop_strategy_string (str, true);
4260 free (str);
4261 }
4262 }
4263
4264 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4265
4266 static void
4267 ix86_option_override (void)
4268 {
4269 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4270 static struct register_pass_info insert_vzeroupper_info
4271 = { pass_insert_vzeroupper, "reload",
4272 1, PASS_POS_INSERT_AFTER
4273 };
4274
4275 ix86_option_override_internal (true, &global_options, &global_options_set);
4276
4277
4278 /* This needs to be done at start up. It's convenient to do it here. */
4279 register_pass (&insert_vzeroupper_info);
4280 }
4281
4282 /* Update register usage after having seen the compiler flags. */
4283
4284 static void
4285 ix86_conditional_register_usage (void)
4286 {
4287 int i, c_mask;
4288 unsigned int j;
4289
4290 /* The PIC register, if it exists, is fixed. */
4291 j = PIC_OFFSET_TABLE_REGNUM;
4292 if (j != INVALID_REGNUM)
4293 fixed_regs[j] = call_used_regs[j] = 1;
4294
4295 /* For 32-bit targets, squash the REX registers. */
4296 if (! TARGET_64BIT)
4297 {
4298 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4299 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4300 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4301 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4302 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4304 }
4305
4306 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4307 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4308 : TARGET_64BIT ? (1 << 2)
4309 : (1 << 1));
4310
4311 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4312
4313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4314 {
4315 /* Set/reset conditionally defined registers from
4316 CALL_USED_REGISTERS initializer. */
4317 if (call_used_regs[i] > 1)
4318 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4319
4320 /* Calculate registers of CLOBBERED_REGS register set
4321 as call used registers from GENERAL_REGS register set. */
4322 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4323 && call_used_regs[i])
4324 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4325 }
4326
4327 /* If MMX is disabled, squash the registers. */
4328 if (! TARGET_MMX)
4329 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4330 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4331 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332
4333 /* If SSE is disabled, squash the registers. */
4334 if (! TARGET_SSE)
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4337 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338
4339 /* If the FPU is disabled, squash the registers. */
4340 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4341 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4342 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4343 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4344
4345 /* If AVX512F is disabled, squash the registers. */
4346 if (! TARGET_AVX512F)
4347 {
4348 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4349 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4350
4351 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4353 }
4354 }
4355
4356 \f
4357 /* Save the current options */
4358
4359 static void
4360 ix86_function_specific_save (struct cl_target_option *ptr,
4361 struct gcc_options *opts)
4362 {
4363 ptr->arch = ix86_arch;
4364 ptr->schedule = ix86_schedule;
4365 ptr->tune = ix86_tune;
4366 ptr->branch_cost = ix86_branch_cost;
4367 ptr->tune_defaulted = ix86_tune_defaulted;
4368 ptr->arch_specified = ix86_arch_specified;
4369 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4370 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4371 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4372 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4373 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4374 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4375 ptr->x_ix86_abi = opts->x_ix86_abi;
4376 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4377 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4378 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4379 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4380 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4381 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4382 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4383 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4384 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4385 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4386 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4387 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4388 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4389 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4390 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4391 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4392 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4393 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4394 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4395 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4396
4397 /* The fields are char but the variables are not; make sure the
4398 values fit in the fields. */
4399 gcc_assert (ptr->arch == ix86_arch);
4400 gcc_assert (ptr->schedule == ix86_schedule);
4401 gcc_assert (ptr->tune == ix86_tune);
4402 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4403 }
4404
4405 /* Restore the current options */
4406
4407 static void
4408 ix86_function_specific_restore (struct gcc_options *opts,
4409 struct cl_target_option *ptr)
4410 {
4411 enum processor_type old_tune = ix86_tune;
4412 enum processor_type old_arch = ix86_arch;
4413 unsigned int ix86_arch_mask;
4414 int i;
4415
4416 /* We don't change -fPIC. */
4417 opts->x_flag_pic = flag_pic;
4418
4419 ix86_arch = (enum processor_type) ptr->arch;
4420 ix86_schedule = (enum attr_cpu) ptr->schedule;
4421 ix86_tune = (enum processor_type) ptr->tune;
4422 opts->x_ix86_branch_cost = ptr->branch_cost;
4423 ix86_tune_defaulted = ptr->tune_defaulted;
4424 ix86_arch_specified = ptr->arch_specified;
4425 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4426 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4427 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4428 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4429 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4430 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4431 opts->x_ix86_abi = ptr->x_ix86_abi;
4432 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4433 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4434 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4435 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4436 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4437 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4438 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4439 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4440 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4441 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4442 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4443 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4444 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4445 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4446 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4447 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4448 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4449 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4450 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4451 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4452
4453 /* Recreate the arch feature tests if the arch changed */
4454 if (old_arch != ix86_arch)
4455 {
4456 ix86_arch_mask = 1u << ix86_arch;
4457 for (i = 0; i < X86_ARCH_LAST; ++i)
4458 ix86_arch_features[i]
4459 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4460 }
4461
4462 /* Recreate the tune optimization tests */
4463 if (old_tune != ix86_tune)
4464 set_ix86_tune_features (ix86_tune, false);
4465 }
4466
4467 /* Print the current options */
4468
4469 static void
4470 ix86_function_specific_print (FILE *file, int indent,
4471 struct cl_target_option *ptr)
4472 {
4473 char *target_string
4474 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4475 NULL, NULL, ptr->x_ix86_fpmath, false);
4476
4477 gcc_assert (ptr->arch < PROCESSOR_max);
4478 fprintf (file, "%*sarch = %d (%s)\n",
4479 indent, "",
4480 ptr->arch, processor_target_table[ptr->arch].name);
4481
4482 gcc_assert (ptr->tune < PROCESSOR_max);
4483 fprintf (file, "%*stune = %d (%s)\n",
4484 indent, "",
4485 ptr->tune, processor_target_table[ptr->tune].name);
4486
4487 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4488
4489 if (target_string)
4490 {
4491 fprintf (file, "%*s%s\n", indent, "", target_string);
4492 free (target_string);
4493 }
4494 }
4495
4496 \f
4497 /* Inner function to process the attribute((target(...))), take an argument and
4498 set the current options from the argument. If we have a list, recursively go
4499 over the list. */
4500
4501 static bool
4502 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4503 struct gcc_options *opts,
4504 struct gcc_options *opts_set,
4505 struct gcc_options *enum_opts_set)
4506 {
4507 char *next_optstr;
4508 bool ret = true;
4509
4510 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4511 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4512 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4513 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4514 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4515
4516 enum ix86_opt_type
4517 {
4518 ix86_opt_unknown,
4519 ix86_opt_yes,
4520 ix86_opt_no,
4521 ix86_opt_str,
4522 ix86_opt_enum,
4523 ix86_opt_isa
4524 };
4525
4526 static const struct
4527 {
4528 const char *string;
4529 size_t len;
4530 enum ix86_opt_type type;
4531 int opt;
4532 int mask;
4533 } attrs[] = {
4534 /* isa options */
4535 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4536 IX86_ATTR_ISA ("abm", OPT_mabm),
4537 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4538 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4539 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4540 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4541 IX86_ATTR_ISA ("aes", OPT_maes),
4542 IX86_ATTR_ISA ("sha", OPT_msha),
4543 IX86_ATTR_ISA ("avx", OPT_mavx),
4544 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4545 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4546 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4547 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4548 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4549 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4550 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4551 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4552 IX86_ATTR_ISA ("sse", OPT_msse),
4553 IX86_ATTR_ISA ("sse2", OPT_msse2),
4554 IX86_ATTR_ISA ("sse3", OPT_msse3),
4555 IX86_ATTR_ISA ("sse4", OPT_msse4),
4556 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4557 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4558 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4559 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4560 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4561 IX86_ATTR_ISA ("fma", OPT_mfma),
4562 IX86_ATTR_ISA ("xop", OPT_mxop),
4563 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4564 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4565 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4566 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4567 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4568 IX86_ATTR_ISA ("hle", OPT_mhle),
4569 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4570 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4571 IX86_ATTR_ISA ("adx", OPT_madx),
4572 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4573 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4574 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4575 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4576 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4577 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4578 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4579
4580 /* enum options */
4581 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4582
4583 /* string options */
4584 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4585 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4586
4587 /* flag options */
4588 IX86_ATTR_YES ("cld",
4589 OPT_mcld,
4590 MASK_CLD),
4591
4592 IX86_ATTR_NO ("fancy-math-387",
4593 OPT_mfancy_math_387,
4594 MASK_NO_FANCY_MATH_387),
4595
4596 IX86_ATTR_YES ("ieee-fp",
4597 OPT_mieee_fp,
4598 MASK_IEEE_FP),
4599
4600 IX86_ATTR_YES ("inline-all-stringops",
4601 OPT_minline_all_stringops,
4602 MASK_INLINE_ALL_STRINGOPS),
4603
4604 IX86_ATTR_YES ("inline-stringops-dynamically",
4605 OPT_minline_stringops_dynamically,
4606 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4607
4608 IX86_ATTR_NO ("align-stringops",
4609 OPT_mno_align_stringops,
4610 MASK_NO_ALIGN_STRINGOPS),
4611
4612 IX86_ATTR_YES ("recip",
4613 OPT_mrecip,
4614 MASK_RECIP),
4615
4616 };
4617
4618 /* If this is a list, recurse to get the options. */
4619 if (TREE_CODE (args) == TREE_LIST)
4620 {
4621 bool ret = true;
4622
4623 for (; args; args = TREE_CHAIN (args))
4624 if (TREE_VALUE (args)
4625 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4626 p_strings, opts, opts_set,
4627 enum_opts_set))
4628 ret = false;
4629
4630 return ret;
4631 }
4632
4633 else if (TREE_CODE (args) != STRING_CST)
4634 {
4635 error ("attribute %<target%> argument not a string");
4636 return false;
4637 }
4638
4639 /* Handle multiple arguments separated by commas. */
4640 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4641
4642 while (next_optstr && *next_optstr != '\0')
4643 {
4644 char *p = next_optstr;
4645 char *orig_p = p;
4646 char *comma = strchr (next_optstr, ',');
4647 const char *opt_string;
4648 size_t len, opt_len;
4649 int opt;
4650 bool opt_set_p;
4651 char ch;
4652 unsigned i;
4653 enum ix86_opt_type type = ix86_opt_unknown;
4654 int mask = 0;
4655
4656 if (comma)
4657 {
4658 *comma = '\0';
4659 len = comma - next_optstr;
4660 next_optstr = comma + 1;
4661 }
4662 else
4663 {
4664 len = strlen (p);
4665 next_optstr = NULL;
4666 }
4667
4668 /* Recognize no-xxx. */
4669 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4670 {
4671 opt_set_p = false;
4672 p += 3;
4673 len -= 3;
4674 }
4675 else
4676 opt_set_p = true;
4677
4678 /* Find the option. */
4679 ch = *p;
4680 opt = N_OPTS;
4681 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4682 {
4683 type = attrs[i].type;
4684 opt_len = attrs[i].len;
4685 if (ch == attrs[i].string[0]
4686 && ((type != ix86_opt_str && type != ix86_opt_enum)
4687 ? len == opt_len
4688 : len > opt_len)
4689 && memcmp (p, attrs[i].string, opt_len) == 0)
4690 {
4691 opt = attrs[i].opt;
4692 mask = attrs[i].mask;
4693 opt_string = attrs[i].string;
4694 break;
4695 }
4696 }
4697
4698 /* Process the option. */
4699 if (opt == N_OPTS)
4700 {
4701 error ("attribute(target(\"%s\")) is unknown", orig_p);
4702 ret = false;
4703 }
4704
4705 else if (type == ix86_opt_isa)
4706 {
4707 struct cl_decoded_option decoded;
4708
4709 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4710 ix86_handle_option (opts, opts_set,
4711 &decoded, input_location);
4712 }
4713
4714 else if (type == ix86_opt_yes || type == ix86_opt_no)
4715 {
4716 if (type == ix86_opt_no)
4717 opt_set_p = !opt_set_p;
4718
4719 if (opt_set_p)
4720 opts->x_target_flags |= mask;
4721 else
4722 opts->x_target_flags &= ~mask;
4723 }
4724
4725 else if (type == ix86_opt_str)
4726 {
4727 if (p_strings[opt])
4728 {
4729 error ("option(\"%s\") was already specified", opt_string);
4730 ret = false;
4731 }
4732 else
4733 p_strings[opt] = xstrdup (p + opt_len);
4734 }
4735
4736 else if (type == ix86_opt_enum)
4737 {
4738 bool arg_ok;
4739 int value;
4740
4741 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4742 if (arg_ok)
4743 set_option (opts, enum_opts_set, opt, value,
4744 p + opt_len, DK_UNSPECIFIED, input_location,
4745 global_dc);
4746 else
4747 {
4748 error ("attribute(target(\"%s\")) is unknown", orig_p);
4749 ret = false;
4750 }
4751 }
4752
4753 else
4754 gcc_unreachable ();
4755 }
4756
4757 return ret;
4758 }
4759
4760 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4761
4762 tree
4763 ix86_valid_target_attribute_tree (tree args,
4764 struct gcc_options *opts,
4765 struct gcc_options *opts_set)
4766 {
4767 const char *orig_arch_string = opts->x_ix86_arch_string;
4768 const char *orig_tune_string = opts->x_ix86_tune_string;
4769 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4770 int orig_tune_defaulted = ix86_tune_defaulted;
4771 int orig_arch_specified = ix86_arch_specified;
4772 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4773 tree t = NULL_TREE;
4774 int i;
4775 struct cl_target_option *def
4776 = TREE_TARGET_OPTION (target_option_default_node);
4777 struct gcc_options enum_opts_set;
4778
4779 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4780
4781 /* Process each of the options on the chain. */
4782 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4783 opts_set, &enum_opts_set))
4784 return error_mark_node;
4785
4786 /* If the changed options are different from the default, rerun
4787 ix86_option_override_internal, and then save the options away.
4788 The string options are are attribute options, and will be undone
4789 when we copy the save structure. */
4790 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4791 || opts->x_target_flags != def->x_target_flags
4792 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4793 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4794 || enum_opts_set.x_ix86_fpmath)
4795 {
4796 /* If we are using the default tune= or arch=, undo the string assigned,
4797 and use the default. */
4798 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4799 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4800 else if (!orig_arch_specified)
4801 opts->x_ix86_arch_string = NULL;
4802
4803 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4804 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4805 else if (orig_tune_defaulted)
4806 opts->x_ix86_tune_string = NULL;
4807
4808 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4809 if (enum_opts_set.x_ix86_fpmath)
4810 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4811 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4812 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4813 {
4814 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4815 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4816 }
4817
4818 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4819 ix86_option_override_internal (false, opts, opts_set);
4820
4821 /* Add any builtin functions with the new isa if any. */
4822 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4823
4824 /* Save the current options unless we are validating options for
4825 #pragma. */
4826 t = build_target_option_node (opts);
4827
4828 opts->x_ix86_arch_string = orig_arch_string;
4829 opts->x_ix86_tune_string = orig_tune_string;
4830 opts_set->x_ix86_fpmath = orig_fpmath_set;
4831
4832 /* Free up memory allocated to hold the strings */
4833 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4834 free (option_strings[i]);
4835 }
4836
4837 return t;
4838 }
4839
4840 /* Hook to validate attribute((target("string"))). */
4841
4842 static bool
4843 ix86_valid_target_attribute_p (tree fndecl,
4844 tree ARG_UNUSED (name),
4845 tree args,
4846 int ARG_UNUSED (flags))
4847 {
4848 struct gcc_options func_options;
4849 tree new_target, new_optimize;
4850 bool ret = true;
4851
4852 /* attribute((target("default"))) does nothing, beyond
4853 affecting multi-versioning. */
4854 if (TREE_VALUE (args)
4855 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4856 && TREE_CHAIN (args) == NULL_TREE
4857 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4858 return true;
4859
4860 tree old_optimize = build_optimization_node (&global_options);
4861
4862 /* Get the optimization options of the current function. */
4863 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4864
4865 if (!func_optimize)
4866 func_optimize = old_optimize;
4867
4868 /* Init func_options. */
4869 memset (&func_options, 0, sizeof (func_options));
4870 init_options_struct (&func_options, NULL);
4871 lang_hooks.init_options_struct (&func_options);
4872
4873 cl_optimization_restore (&func_options,
4874 TREE_OPTIMIZATION (func_optimize));
4875
4876 /* Initialize func_options to the default before its target options can
4877 be set. */
4878 cl_target_option_restore (&func_options,
4879 TREE_TARGET_OPTION (target_option_default_node));
4880
4881 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4882 &global_options_set);
4883
4884 new_optimize = build_optimization_node (&func_options);
4885
4886 if (new_target == error_mark_node)
4887 ret = false;
4888
4889 else if (fndecl && new_target)
4890 {
4891 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4892
4893 if (old_optimize != new_optimize)
4894 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4895 }
4896
4897 return ret;
4898 }
4899
4900 \f
4901 /* Hook to determine if one function can safely inline another. */
4902
4903 static bool
4904 ix86_can_inline_p (tree caller, tree callee)
4905 {
4906 bool ret = false;
4907 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4908 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4909
4910 /* If callee has no option attributes, then it is ok to inline. */
4911 if (!callee_tree)
4912 ret = true;
4913
4914 /* If caller has no option attributes, but callee does then it is not ok to
4915 inline. */
4916 else if (!caller_tree)
4917 ret = false;
4918
4919 else
4920 {
4921 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4922 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4923
4924 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4925 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4926 function. */
4927 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4928 != callee_opts->x_ix86_isa_flags)
4929 ret = false;
4930
4931 /* See if we have the same non-isa options. */
4932 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4933 ret = false;
4934
4935 /* See if arch, tune, etc. are the same. */
4936 else if (caller_opts->arch != callee_opts->arch)
4937 ret = false;
4938
4939 else if (caller_opts->tune != callee_opts->tune)
4940 ret = false;
4941
4942 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4943 ret = false;
4944
4945 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4946 ret = false;
4947
4948 else
4949 ret = true;
4950 }
4951
4952 return ret;
4953 }
4954
4955 \f
4956 /* Remember the last target of ix86_set_current_function. */
4957 static GTY(()) tree ix86_previous_fndecl;
4958
4959 /* Invalidate ix86_previous_fndecl cache. */
4960 void
4961 ix86_reset_previous_fndecl (void)
4962 {
4963 ix86_previous_fndecl = NULL_TREE;
4964 }
4965
4966 /* Establish appropriate back-end context for processing the function
4967 FNDECL. The argument might be NULL to indicate processing at top
4968 level, outside of any function scope. */
4969 static void
4970 ix86_set_current_function (tree fndecl)
4971 {
4972 /* Only change the context if the function changes. This hook is called
4973 several times in the course of compiling a function, and we don't want to
4974 slow things down too much or call target_reinit when it isn't safe. */
4975 if (fndecl && fndecl != ix86_previous_fndecl)
4976 {
4977 tree old_tree = (ix86_previous_fndecl
4978 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4979 : NULL_TREE);
4980
4981 tree new_tree = (fndecl
4982 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4983 : NULL_TREE);
4984
4985 ix86_previous_fndecl = fndecl;
4986 if (old_tree == new_tree)
4987 ;
4988
4989 else if (new_tree)
4990 {
4991 cl_target_option_restore (&global_options,
4992 TREE_TARGET_OPTION (new_tree));
4993 if (TREE_TARGET_GLOBALS (new_tree))
4994 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4995 else
4996 TREE_TARGET_GLOBALS (new_tree)
4997 = save_target_globals_default_opts ();
4998 }
4999
5000 else if (old_tree)
5001 {
5002 new_tree = target_option_current_node;
5003 cl_target_option_restore (&global_options,
5004 TREE_TARGET_OPTION (new_tree));
5005 if (TREE_TARGET_GLOBALS (new_tree))
5006 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5007 else if (new_tree == target_option_default_node)
5008 restore_target_globals (&default_target_globals);
5009 else
5010 TREE_TARGET_GLOBALS (new_tree)
5011 = save_target_globals_default_opts ();
5012 }
5013 }
5014 }
5015
5016 \f
5017 /* Return true if this goes in large data/bss. */
5018
5019 static bool
5020 ix86_in_large_data_p (tree exp)
5021 {
5022 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5023 return false;
5024
5025 /* Functions are never large data. */
5026 if (TREE_CODE (exp) == FUNCTION_DECL)
5027 return false;
5028
5029 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5030 {
5031 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5032 if (strcmp (section, ".ldata") == 0
5033 || strcmp (section, ".lbss") == 0)
5034 return true;
5035 return false;
5036 }
5037 else
5038 {
5039 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5040
5041 /* If this is an incomplete type with size 0, then we can't put it
5042 in data because it might be too big when completed. */
5043 if (!size || size > ix86_section_threshold)
5044 return true;
5045 }
5046
5047 return false;
5048 }
5049
5050 /* Switch to the appropriate section for output of DECL.
5051 DECL is either a `VAR_DECL' node or a constant of some sort.
5052 RELOC indicates whether forming the initial value of DECL requires
5053 link-time relocations. */
5054
5055 ATTRIBUTE_UNUSED static section *
5056 x86_64_elf_select_section (tree decl, int reloc,
5057 unsigned HOST_WIDE_INT align)
5058 {
5059 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5060 && ix86_in_large_data_p (decl))
5061 {
5062 const char *sname = NULL;
5063 unsigned int flags = SECTION_WRITE;
5064 switch (categorize_decl_for_section (decl, reloc))
5065 {
5066 case SECCAT_DATA:
5067 sname = ".ldata";
5068 break;
5069 case SECCAT_DATA_REL:
5070 sname = ".ldata.rel";
5071 break;
5072 case SECCAT_DATA_REL_LOCAL:
5073 sname = ".ldata.rel.local";
5074 break;
5075 case SECCAT_DATA_REL_RO:
5076 sname = ".ldata.rel.ro";
5077 break;
5078 case SECCAT_DATA_REL_RO_LOCAL:
5079 sname = ".ldata.rel.ro.local";
5080 break;
5081 case SECCAT_BSS:
5082 sname = ".lbss";
5083 flags |= SECTION_BSS;
5084 break;
5085 case SECCAT_RODATA:
5086 case SECCAT_RODATA_MERGE_STR:
5087 case SECCAT_RODATA_MERGE_STR_INIT:
5088 case SECCAT_RODATA_MERGE_CONST:
5089 sname = ".lrodata";
5090 flags = 0;
5091 break;
5092 case SECCAT_SRODATA:
5093 case SECCAT_SDATA:
5094 case SECCAT_SBSS:
5095 gcc_unreachable ();
5096 case SECCAT_TEXT:
5097 case SECCAT_TDATA:
5098 case SECCAT_TBSS:
5099 /* We don't split these for medium model. Place them into
5100 default sections and hope for best. */
5101 break;
5102 }
5103 if (sname)
5104 {
5105 /* We might get called with string constants, but get_named_section
5106 doesn't like them as they are not DECLs. Also, we need to set
5107 flags in that case. */
5108 if (!DECL_P (decl))
5109 return get_section (sname, flags, NULL);
5110 return get_named_section (decl, sname, reloc);
5111 }
5112 }
5113 return default_elf_select_section (decl, reloc, align);
5114 }
5115
5116 /* Select a set of attributes for section NAME based on the properties
5117 of DECL and whether or not RELOC indicates that DECL's initializer
5118 might contain runtime relocations. */
5119
5120 static unsigned int ATTRIBUTE_UNUSED
5121 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5122 {
5123 unsigned int flags = default_section_type_flags (decl, name, reloc);
5124
5125 if (decl == NULL_TREE
5126 && (strcmp (name, ".ldata.rel.ro") == 0
5127 || strcmp (name, ".ldata.rel.ro.local") == 0))
5128 flags |= SECTION_RELRO;
5129
5130 if (strcmp (name, ".lbss") == 0
5131 || strncmp (name, ".lbss.", 5) == 0
5132 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5133 flags |= SECTION_BSS;
5134
5135 return flags;
5136 }
5137
5138 /* Build up a unique section name, expressed as a
5139 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5140 RELOC indicates whether the initial value of EXP requires
5141 link-time relocations. */
5142
5143 static void ATTRIBUTE_UNUSED
5144 x86_64_elf_unique_section (tree decl, int reloc)
5145 {
5146 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5147 && ix86_in_large_data_p (decl))
5148 {
5149 const char *prefix = NULL;
5150 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5151 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5152
5153 switch (categorize_decl_for_section (decl, reloc))
5154 {
5155 case SECCAT_DATA:
5156 case SECCAT_DATA_REL:
5157 case SECCAT_DATA_REL_LOCAL:
5158 case SECCAT_DATA_REL_RO:
5159 case SECCAT_DATA_REL_RO_LOCAL:
5160 prefix = one_only ? ".ld" : ".ldata";
5161 break;
5162 case SECCAT_BSS:
5163 prefix = one_only ? ".lb" : ".lbss";
5164 break;
5165 case SECCAT_RODATA:
5166 case SECCAT_RODATA_MERGE_STR:
5167 case SECCAT_RODATA_MERGE_STR_INIT:
5168 case SECCAT_RODATA_MERGE_CONST:
5169 prefix = one_only ? ".lr" : ".lrodata";
5170 break;
5171 case SECCAT_SRODATA:
5172 case SECCAT_SDATA:
5173 case SECCAT_SBSS:
5174 gcc_unreachable ();
5175 case SECCAT_TEXT:
5176 case SECCAT_TDATA:
5177 case SECCAT_TBSS:
5178 /* We don't split these for medium model. Place them into
5179 default sections and hope for best. */
5180 break;
5181 }
5182 if (prefix)
5183 {
5184 const char *name, *linkonce;
5185 char *string;
5186
5187 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5188 name = targetm.strip_name_encoding (name);
5189
5190 /* If we're using one_only, then there needs to be a .gnu.linkonce
5191 prefix to the section name. */
5192 linkonce = one_only ? ".gnu.linkonce" : "";
5193
5194 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5195
5196 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5197 return;
5198 }
5199 }
5200 default_unique_section (decl, reloc);
5201 }
5202
5203 #ifdef COMMON_ASM_OP
5204 /* This says how to output assembler code to declare an
5205 uninitialized external linkage data object.
5206
5207 For medium model x86-64 we need to use .largecomm opcode for
5208 large objects. */
5209 void
5210 x86_elf_aligned_common (FILE *file,
5211 const char *name, unsigned HOST_WIDE_INT size,
5212 int align)
5213 {
5214 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5215 && size > (unsigned int)ix86_section_threshold)
5216 fputs (".largecomm\t", file);
5217 else
5218 fputs (COMMON_ASM_OP, file);
5219 assemble_name (file, name);
5220 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5221 size, align / BITS_PER_UNIT);
5222 }
5223 #endif
5224
5225 /* Utility function for targets to use in implementing
5226 ASM_OUTPUT_ALIGNED_BSS. */
5227
5228 void
5229 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5230 const char *name, unsigned HOST_WIDE_INT size,
5231 int align)
5232 {
5233 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5234 && size > (unsigned int)ix86_section_threshold)
5235 switch_to_section (get_named_section (decl, ".lbss", 0));
5236 else
5237 switch_to_section (bss_section);
5238 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5239 #ifdef ASM_DECLARE_OBJECT_NAME
5240 last_assemble_variable_decl = decl;
5241 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5242 #else
5243 /* Standard thing is just output label for the object. */
5244 ASM_OUTPUT_LABEL (file, name);
5245 #endif /* ASM_DECLARE_OBJECT_NAME */
5246 ASM_OUTPUT_SKIP (file, size ? size : 1);
5247 }
5248 \f
5249 /* Decide whether we must probe the stack before any space allocation
5250 on this target. It's essentially TARGET_STACK_PROBE except when
5251 -fstack-check causes the stack to be already probed differently. */
5252
5253 bool
5254 ix86_target_stack_probe (void)
5255 {
5256 /* Do not probe the stack twice if static stack checking is enabled. */
5257 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5258 return false;
5259
5260 return TARGET_STACK_PROBE;
5261 }
5262 \f
5263 /* Decide whether we can make a sibling call to a function. DECL is the
5264 declaration of the function being targeted by the call and EXP is the
5265 CALL_EXPR representing the call. */
5266
5267 static bool
5268 ix86_function_ok_for_sibcall (tree decl, tree exp)
5269 {
5270 tree type, decl_or_type;
5271 rtx a, b;
5272
5273 /* If we are generating position-independent code, we cannot sibcall
5274 optimize any indirect call, or a direct call to a global function,
5275 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5276 if (!TARGET_MACHO
5277 && !TARGET_64BIT
5278 && flag_pic
5279 && (!decl || !targetm.binds_local_p (decl)))
5280 return false;
5281
5282 /* If we need to align the outgoing stack, then sibcalling would
5283 unalign the stack, which may break the called function. */
5284 if (ix86_minimum_incoming_stack_boundary (true)
5285 < PREFERRED_STACK_BOUNDARY)
5286 return false;
5287
5288 if (decl)
5289 {
5290 decl_or_type = decl;
5291 type = TREE_TYPE (decl);
5292 }
5293 else
5294 {
5295 /* We're looking at the CALL_EXPR, we need the type of the function. */
5296 type = CALL_EXPR_FN (exp); /* pointer expression */
5297 type = TREE_TYPE (type); /* pointer type */
5298 type = TREE_TYPE (type); /* function type */
5299 decl_or_type = type;
5300 }
5301
5302 /* Check that the return value locations are the same. Like
5303 if we are returning floats on the 80387 register stack, we cannot
5304 make a sibcall from a function that doesn't return a float to a
5305 function that does or, conversely, from a function that does return
5306 a float to a function that doesn't; the necessary stack adjustment
5307 would not be executed. This is also the place we notice
5308 differences in the return value ABI. Note that it is ok for one
5309 of the functions to have void return type as long as the return
5310 value of the other is passed in a register. */
5311 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5312 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5313 cfun->decl, false);
5314 if (STACK_REG_P (a) || STACK_REG_P (b))
5315 {
5316 if (!rtx_equal_p (a, b))
5317 return false;
5318 }
5319 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5320 ;
5321 else if (!rtx_equal_p (a, b))
5322 return false;
5323
5324 if (TARGET_64BIT)
5325 {
5326 /* The SYSV ABI has more call-clobbered registers;
5327 disallow sibcalls from MS to SYSV. */
5328 if (cfun->machine->call_abi == MS_ABI
5329 && ix86_function_type_abi (type) == SYSV_ABI)
5330 return false;
5331 }
5332 else
5333 {
5334 /* If this call is indirect, we'll need to be able to use a
5335 call-clobbered register for the address of the target function.
5336 Make sure that all such registers are not used for passing
5337 parameters. Note that DLLIMPORT functions are indirect. */
5338 if (!decl
5339 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5340 {
5341 if (ix86_function_regparm (type, NULL) >= 3)
5342 {
5343 /* ??? Need to count the actual number of registers to be used,
5344 not the possible number of registers. Fix later. */
5345 return false;
5346 }
5347 }
5348 }
5349
5350 /* Otherwise okay. That also includes certain types of indirect calls. */
5351 return true;
5352 }
5353
5354 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5355 and "sseregparm" calling convention attributes;
5356 arguments as in struct attribute_spec.handler. */
5357
5358 static tree
5359 ix86_handle_cconv_attribute (tree *node, tree name,
5360 tree args,
5361 int flags ATTRIBUTE_UNUSED,
5362 bool *no_add_attrs)
5363 {
5364 if (TREE_CODE (*node) != FUNCTION_TYPE
5365 && TREE_CODE (*node) != METHOD_TYPE
5366 && TREE_CODE (*node) != FIELD_DECL
5367 && TREE_CODE (*node) != TYPE_DECL)
5368 {
5369 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5370 name);
5371 *no_add_attrs = true;
5372 return NULL_TREE;
5373 }
5374
5375 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5376 if (is_attribute_p ("regparm", name))
5377 {
5378 tree cst;
5379
5380 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5381 {
5382 error ("fastcall and regparm attributes are not compatible");
5383 }
5384
5385 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5386 {
5387 error ("regparam and thiscall attributes are not compatible");
5388 }
5389
5390 cst = TREE_VALUE (args);
5391 if (TREE_CODE (cst) != INTEGER_CST)
5392 {
5393 warning (OPT_Wattributes,
5394 "%qE attribute requires an integer constant argument",
5395 name);
5396 *no_add_attrs = true;
5397 }
5398 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5399 {
5400 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5401 name, REGPARM_MAX);
5402 *no_add_attrs = true;
5403 }
5404
5405 return NULL_TREE;
5406 }
5407
5408 if (TARGET_64BIT)
5409 {
5410 /* Do not warn when emulating the MS ABI. */
5411 if ((TREE_CODE (*node) != FUNCTION_TYPE
5412 && TREE_CODE (*node) != METHOD_TYPE)
5413 || ix86_function_type_abi (*node) != MS_ABI)
5414 warning (OPT_Wattributes, "%qE attribute ignored",
5415 name);
5416 *no_add_attrs = true;
5417 return NULL_TREE;
5418 }
5419
5420 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5421 if (is_attribute_p ("fastcall", name))
5422 {
5423 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5424 {
5425 error ("fastcall and cdecl attributes are not compatible");
5426 }
5427 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5428 {
5429 error ("fastcall and stdcall attributes are not compatible");
5430 }
5431 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5432 {
5433 error ("fastcall and regparm attributes are not compatible");
5434 }
5435 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5436 {
5437 error ("fastcall and thiscall attributes are not compatible");
5438 }
5439 }
5440
5441 /* Can combine stdcall with fastcall (redundant), regparm and
5442 sseregparm. */
5443 else if (is_attribute_p ("stdcall", name))
5444 {
5445 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5446 {
5447 error ("stdcall and cdecl attributes are not compatible");
5448 }
5449 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5450 {
5451 error ("stdcall and fastcall attributes are not compatible");
5452 }
5453 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5454 {
5455 error ("stdcall and thiscall attributes are not compatible");
5456 }
5457 }
5458
5459 /* Can combine cdecl with regparm and sseregparm. */
5460 else if (is_attribute_p ("cdecl", name))
5461 {
5462 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5463 {
5464 error ("stdcall and cdecl attributes are not compatible");
5465 }
5466 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5467 {
5468 error ("fastcall and cdecl attributes are not compatible");
5469 }
5470 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5471 {
5472 error ("cdecl and thiscall attributes are not compatible");
5473 }
5474 }
5475 else if (is_attribute_p ("thiscall", name))
5476 {
5477 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5478 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5479 name);
5480 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5481 {
5482 error ("stdcall and thiscall attributes are not compatible");
5483 }
5484 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5485 {
5486 error ("fastcall and thiscall attributes are not compatible");
5487 }
5488 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5489 {
5490 error ("cdecl and thiscall attributes are not compatible");
5491 }
5492 }
5493
5494 /* Can combine sseregparm with all attributes. */
5495
5496 return NULL_TREE;
5497 }
5498
5499 /* The transactional memory builtins are implicitly regparm or fastcall
5500 depending on the ABI. Override the generic do-nothing attribute that
5501 these builtins were declared with, and replace it with one of the two
5502 attributes that we expect elsewhere. */
5503
5504 static tree
5505 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5506 tree args ATTRIBUTE_UNUSED,
5507 int flags, bool *no_add_attrs)
5508 {
5509 tree alt;
5510
5511 /* In no case do we want to add the placeholder attribute. */
5512 *no_add_attrs = true;
5513
5514 /* The 64-bit ABI is unchanged for transactional memory. */
5515 if (TARGET_64BIT)
5516 return NULL_TREE;
5517
5518 /* ??? Is there a better way to validate 32-bit windows? We have
5519 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5520 if (CHECK_STACK_LIMIT > 0)
5521 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5522 else
5523 {
5524 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5525 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5526 }
5527 decl_attributes (node, alt, flags);
5528
5529 return NULL_TREE;
5530 }
5531
5532 /* This function determines from TYPE the calling-convention. */
5533
5534 unsigned int
5535 ix86_get_callcvt (const_tree type)
5536 {
5537 unsigned int ret = 0;
5538 bool is_stdarg;
5539 tree attrs;
5540
5541 if (TARGET_64BIT)
5542 return IX86_CALLCVT_CDECL;
5543
5544 attrs = TYPE_ATTRIBUTES (type);
5545 if (attrs != NULL_TREE)
5546 {
5547 if (lookup_attribute ("cdecl", attrs))
5548 ret |= IX86_CALLCVT_CDECL;
5549 else if (lookup_attribute ("stdcall", attrs))
5550 ret |= IX86_CALLCVT_STDCALL;
5551 else if (lookup_attribute ("fastcall", attrs))
5552 ret |= IX86_CALLCVT_FASTCALL;
5553 else if (lookup_attribute ("thiscall", attrs))
5554 ret |= IX86_CALLCVT_THISCALL;
5555
5556 /* Regparam isn't allowed for thiscall and fastcall. */
5557 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5558 {
5559 if (lookup_attribute ("regparm", attrs))
5560 ret |= IX86_CALLCVT_REGPARM;
5561 if (lookup_attribute ("sseregparm", attrs))
5562 ret |= IX86_CALLCVT_SSEREGPARM;
5563 }
5564
5565 if (IX86_BASE_CALLCVT(ret) != 0)
5566 return ret;
5567 }
5568
5569 is_stdarg = stdarg_p (type);
5570 if (TARGET_RTD && !is_stdarg)
5571 return IX86_CALLCVT_STDCALL | ret;
5572
5573 if (ret != 0
5574 || is_stdarg
5575 || TREE_CODE (type) != METHOD_TYPE
5576 || ix86_function_type_abi (type) != MS_ABI)
5577 return IX86_CALLCVT_CDECL | ret;
5578
5579 return IX86_CALLCVT_THISCALL;
5580 }
5581
5582 /* Return 0 if the attributes for two types are incompatible, 1 if they
5583 are compatible, and 2 if they are nearly compatible (which causes a
5584 warning to be generated). */
5585
5586 static int
5587 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5588 {
5589 unsigned int ccvt1, ccvt2;
5590
5591 if (TREE_CODE (type1) != FUNCTION_TYPE
5592 && TREE_CODE (type1) != METHOD_TYPE)
5593 return 1;
5594
5595 ccvt1 = ix86_get_callcvt (type1);
5596 ccvt2 = ix86_get_callcvt (type2);
5597 if (ccvt1 != ccvt2)
5598 return 0;
5599 if (ix86_function_regparm (type1, NULL)
5600 != ix86_function_regparm (type2, NULL))
5601 return 0;
5602
5603 return 1;
5604 }
5605 \f
5606 /* Return the regparm value for a function with the indicated TYPE and DECL.
5607 DECL may be NULL when calling function indirectly
5608 or considering a libcall. */
5609
5610 static int
5611 ix86_function_regparm (const_tree type, const_tree decl)
5612 {
5613 tree attr;
5614 int regparm;
5615 unsigned int ccvt;
5616
5617 if (TARGET_64BIT)
5618 return (ix86_function_type_abi (type) == SYSV_ABI
5619 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5620 ccvt = ix86_get_callcvt (type);
5621 regparm = ix86_regparm;
5622
5623 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5624 {
5625 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5626 if (attr)
5627 {
5628 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5629 return regparm;
5630 }
5631 }
5632 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5633 return 2;
5634 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5635 return 1;
5636
5637 /* Use register calling convention for local functions when possible. */
5638 if (decl
5639 && TREE_CODE (decl) == FUNCTION_DECL
5640 /* Caller and callee must agree on the calling convention, so
5641 checking here just optimize means that with
5642 __attribute__((optimize (...))) caller could use regparm convention
5643 and callee not, or vice versa. Instead look at whether the callee
5644 is optimized or not. */
5645 && opt_for_fn (decl, optimize)
5646 && !(profile_flag && !flag_fentry))
5647 {
5648 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5649 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5650 if (i && i->local && i->can_change_signature)
5651 {
5652 int local_regparm, globals = 0, regno;
5653
5654 /* Make sure no regparm register is taken by a
5655 fixed register variable. */
5656 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5657 if (fixed_regs[local_regparm])
5658 break;
5659
5660 /* We don't want to use regparm(3) for nested functions as
5661 these use a static chain pointer in the third argument. */
5662 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5663 local_regparm = 2;
5664
5665 /* In 32-bit mode save a register for the split stack. */
5666 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5667 local_regparm = 2;
5668
5669 /* Each fixed register usage increases register pressure,
5670 so less registers should be used for argument passing.
5671 This functionality can be overriden by an explicit
5672 regparm value. */
5673 for (regno = AX_REG; regno <= DI_REG; regno++)
5674 if (fixed_regs[regno])
5675 globals++;
5676
5677 local_regparm
5678 = globals < local_regparm ? local_regparm - globals : 0;
5679
5680 if (local_regparm > regparm)
5681 regparm = local_regparm;
5682 }
5683 }
5684
5685 return regparm;
5686 }
5687
5688 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5689 DFmode (2) arguments in SSE registers for a function with the
5690 indicated TYPE and DECL. DECL may be NULL when calling function
5691 indirectly or considering a libcall. Otherwise return 0. */
5692
5693 static int
5694 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5695 {
5696 gcc_assert (!TARGET_64BIT);
5697
5698 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5699 by the sseregparm attribute. */
5700 if (TARGET_SSEREGPARM
5701 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5702 {
5703 if (!TARGET_SSE)
5704 {
5705 if (warn)
5706 {
5707 if (decl)
5708 error ("calling %qD with attribute sseregparm without "
5709 "SSE/SSE2 enabled", decl);
5710 else
5711 error ("calling %qT with attribute sseregparm without "
5712 "SSE/SSE2 enabled", type);
5713 }
5714 return 0;
5715 }
5716
5717 return 2;
5718 }
5719
5720 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5721 (and DFmode for SSE2) arguments in SSE registers. */
5722 if (decl && TARGET_SSE_MATH && optimize
5723 && !(profile_flag && !flag_fentry))
5724 {
5725 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5726 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5727 if (i && i->local && i->can_change_signature)
5728 return TARGET_SSE2 ? 2 : 1;
5729 }
5730
5731 return 0;
5732 }
5733
5734 /* Return true if EAX is live at the start of the function. Used by
5735 ix86_expand_prologue to determine if we need special help before
5736 calling allocate_stack_worker. */
5737
5738 static bool
5739 ix86_eax_live_at_start_p (void)
5740 {
5741 /* Cheat. Don't bother working forward from ix86_function_regparm
5742 to the function type to whether an actual argument is located in
5743 eax. Instead just look at cfg info, which is still close enough
5744 to correct at this point. This gives false positives for broken
5745 functions that might use uninitialized data that happens to be
5746 allocated in eax, but who cares? */
5747 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5748 }
5749
5750 static bool
5751 ix86_keep_aggregate_return_pointer (tree fntype)
5752 {
5753 tree attr;
5754
5755 if (!TARGET_64BIT)
5756 {
5757 attr = lookup_attribute ("callee_pop_aggregate_return",
5758 TYPE_ATTRIBUTES (fntype));
5759 if (attr)
5760 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5761
5762 /* For 32-bit MS-ABI the default is to keep aggregate
5763 return pointer. */
5764 if (ix86_function_type_abi (fntype) == MS_ABI)
5765 return true;
5766 }
5767 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5768 }
5769
5770 /* Value is the number of bytes of arguments automatically
5771 popped when returning from a subroutine call.
5772 FUNDECL is the declaration node of the function (as a tree),
5773 FUNTYPE is the data type of the function (as a tree),
5774 or for a library call it is an identifier node for the subroutine name.
5775 SIZE is the number of bytes of arguments passed on the stack.
5776
5777 On the 80386, the RTD insn may be used to pop them if the number
5778 of args is fixed, but if the number is variable then the caller
5779 must pop them all. RTD can't be used for library calls now
5780 because the library is compiled with the Unix compiler.
5781 Use of RTD is a selectable option, since it is incompatible with
5782 standard Unix calling sequences. If the option is not selected,
5783 the caller must always pop the args.
5784
5785 The attribute stdcall is equivalent to RTD on a per module basis. */
5786
5787 static int
5788 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5789 {
5790 unsigned int ccvt;
5791
5792 /* None of the 64-bit ABIs pop arguments. */
5793 if (TARGET_64BIT)
5794 return 0;
5795
5796 ccvt = ix86_get_callcvt (funtype);
5797
5798 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5799 | IX86_CALLCVT_THISCALL)) != 0
5800 && ! stdarg_p (funtype))
5801 return size;
5802
5803 /* Lose any fake structure return argument if it is passed on the stack. */
5804 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5805 && !ix86_keep_aggregate_return_pointer (funtype))
5806 {
5807 int nregs = ix86_function_regparm (funtype, fundecl);
5808 if (nregs == 0)
5809 return GET_MODE_SIZE (Pmode);
5810 }
5811
5812 return 0;
5813 }
5814
5815 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5816
5817 static bool
5818 ix86_legitimate_combined_insn (rtx insn)
5819 {
5820 /* Check operand constraints in case hard registers were propagated
5821 into insn pattern. This check prevents combine pass from
5822 generating insn patterns with invalid hard register operands.
5823 These invalid insns can eventually confuse reload to error out
5824 with a spill failure. See also PRs 46829 and 46843. */
5825 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5826 {
5827 int i;
5828
5829 extract_insn (insn);
5830 preprocess_constraints ();
5831
5832 for (i = 0; i < recog_data.n_operands; i++)
5833 {
5834 rtx op = recog_data.operand[i];
5835 enum machine_mode mode = GET_MODE (op);
5836 struct operand_alternative *op_alt;
5837 int offset = 0;
5838 bool win;
5839 int j;
5840
5841 /* For pre-AVX disallow unaligned loads/stores where the
5842 instructions don't support it. */
5843 if (!TARGET_AVX
5844 && VECTOR_MODE_P (GET_MODE (op))
5845 && misaligned_operand (op, GET_MODE (op)))
5846 {
5847 int min_align = get_attr_ssememalign (insn);
5848 if (min_align == 0)
5849 return false;
5850 }
5851
5852 /* A unary operator may be accepted by the predicate, but it
5853 is irrelevant for matching constraints. */
5854 if (UNARY_P (op))
5855 op = XEXP (op, 0);
5856
5857 if (GET_CODE (op) == SUBREG)
5858 {
5859 if (REG_P (SUBREG_REG (op))
5860 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5861 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5862 GET_MODE (SUBREG_REG (op)),
5863 SUBREG_BYTE (op),
5864 GET_MODE (op));
5865 op = SUBREG_REG (op);
5866 }
5867
5868 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5869 continue;
5870
5871 op_alt = recog_op_alt[i];
5872
5873 /* Operand has no constraints, anything is OK. */
5874 win = !recog_data.n_alternatives;
5875
5876 for (j = 0; j < recog_data.n_alternatives; j++)
5877 {
5878 if (op_alt[j].anything_ok
5879 || (op_alt[j].matches != -1
5880 && operands_match_p
5881 (recog_data.operand[i],
5882 recog_data.operand[op_alt[j].matches]))
5883 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5884 {
5885 win = true;
5886 break;
5887 }
5888 }
5889
5890 if (!win)
5891 return false;
5892 }
5893 }
5894
5895 return true;
5896 }
5897 \f
5898 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5899
5900 static unsigned HOST_WIDE_INT
5901 ix86_asan_shadow_offset (void)
5902 {
5903 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5904 : HOST_WIDE_INT_C (0x7fff8000))
5905 : (HOST_WIDE_INT_1 << 29);
5906 }
5907 \f
5908 /* Argument support functions. */
5909
5910 /* Return true when register may be used to pass function parameters. */
5911 bool
5912 ix86_function_arg_regno_p (int regno)
5913 {
5914 int i;
5915 const int *parm_regs;
5916
5917 if (!TARGET_64BIT)
5918 {
5919 if (TARGET_MACHO)
5920 return (regno < REGPARM_MAX
5921 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5922 else
5923 return (regno < REGPARM_MAX
5924 || (TARGET_MMX && MMX_REGNO_P (regno)
5925 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5926 || (TARGET_SSE && SSE_REGNO_P (regno)
5927 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5928 }
5929
5930 if (TARGET_SSE && SSE_REGNO_P (regno)
5931 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5932 return true;
5933
5934 /* TODO: The function should depend on current function ABI but
5935 builtins.c would need updating then. Therefore we use the
5936 default ABI. */
5937
5938 /* RAX is used as hidden argument to va_arg functions. */
5939 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5940 return true;
5941
5942 if (ix86_abi == MS_ABI)
5943 parm_regs = x86_64_ms_abi_int_parameter_registers;
5944 else
5945 parm_regs = x86_64_int_parameter_registers;
5946 for (i = 0; i < (ix86_abi == MS_ABI
5947 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5948 if (regno == parm_regs[i])
5949 return true;
5950 return false;
5951 }
5952
5953 /* Return if we do not know how to pass TYPE solely in registers. */
5954
5955 static bool
5956 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5957 {
5958 if (must_pass_in_stack_var_size_or_pad (mode, type))
5959 return true;
5960
5961 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5962 The layout_type routine is crafty and tries to trick us into passing
5963 currently unsupported vector types on the stack by using TImode. */
5964 return (!TARGET_64BIT && mode == TImode
5965 && type && TREE_CODE (type) != VECTOR_TYPE);
5966 }
5967
5968 /* It returns the size, in bytes, of the area reserved for arguments passed
5969 in registers for the function represented by fndecl dependent to the used
5970 abi format. */
5971 int
5972 ix86_reg_parm_stack_space (const_tree fndecl)
5973 {
5974 enum calling_abi call_abi = SYSV_ABI;
5975 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5976 call_abi = ix86_function_abi (fndecl);
5977 else
5978 call_abi = ix86_function_type_abi (fndecl);
5979 if (TARGET_64BIT && call_abi == MS_ABI)
5980 return 32;
5981 return 0;
5982 }
5983
5984 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5985 call abi used. */
5986 enum calling_abi
5987 ix86_function_type_abi (const_tree fntype)
5988 {
5989 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5990 {
5991 enum calling_abi abi = ix86_abi;
5992 if (abi == SYSV_ABI)
5993 {
5994 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5995 abi = MS_ABI;
5996 }
5997 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5998 abi = SYSV_ABI;
5999 return abi;
6000 }
6001 return ix86_abi;
6002 }
6003
6004 /* We add this as a workaround in order to use libc_has_function
6005 hook in i386.md. */
6006 bool
6007 ix86_libc_has_function (enum function_class fn_class)
6008 {
6009 return targetm.libc_has_function (fn_class);
6010 }
6011
6012 static bool
6013 ix86_function_ms_hook_prologue (const_tree fn)
6014 {
6015 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6016 {
6017 if (decl_function_context (fn) != NULL_TREE)
6018 error_at (DECL_SOURCE_LOCATION (fn),
6019 "ms_hook_prologue is not compatible with nested function");
6020 else
6021 return true;
6022 }
6023 return false;
6024 }
6025
6026 static enum calling_abi
6027 ix86_function_abi (const_tree fndecl)
6028 {
6029 if (! fndecl)
6030 return ix86_abi;
6031 return ix86_function_type_abi (TREE_TYPE (fndecl));
6032 }
6033
6034 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6035 call abi used. */
6036 enum calling_abi
6037 ix86_cfun_abi (void)
6038 {
6039 if (! cfun)
6040 return ix86_abi;
6041 return cfun->machine->call_abi;
6042 }
6043
6044 /* Write the extra assembler code needed to declare a function properly. */
6045
6046 void
6047 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6048 tree decl)
6049 {
6050 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6051
6052 if (is_ms_hook)
6053 {
6054 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6055 unsigned int filler_cc = 0xcccccccc;
6056
6057 for (i = 0; i < filler_count; i += 4)
6058 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6059 }
6060
6061 #ifdef SUBTARGET_ASM_UNWIND_INIT
6062 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6063 #endif
6064
6065 ASM_OUTPUT_LABEL (asm_out_file, fname);
6066
6067 /* Output magic byte marker, if hot-patch attribute is set. */
6068 if (is_ms_hook)
6069 {
6070 if (TARGET_64BIT)
6071 {
6072 /* leaq [%rsp + 0], %rsp */
6073 asm_fprintf (asm_out_file, ASM_BYTE
6074 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6075 }
6076 else
6077 {
6078 /* movl.s %edi, %edi
6079 push %ebp
6080 movl.s %esp, %ebp */
6081 asm_fprintf (asm_out_file, ASM_BYTE
6082 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6083 }
6084 }
6085 }
6086
6087 /* regclass.c */
6088 extern void init_regs (void);
6089
6090 /* Implementation of call abi switching target hook. Specific to FNDECL
6091 the specific call register sets are set. See also
6092 ix86_conditional_register_usage for more details. */
6093 void
6094 ix86_call_abi_override (const_tree fndecl)
6095 {
6096 if (fndecl == NULL_TREE)
6097 cfun->machine->call_abi = ix86_abi;
6098 else
6099 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6100 }
6101
6102 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6103 expensive re-initialization of init_regs each time we switch function context
6104 since this is needed only during RTL expansion. */
6105 static void
6106 ix86_maybe_switch_abi (void)
6107 {
6108 if (TARGET_64BIT &&
6109 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6110 reinit_regs ();
6111 }
6112
6113 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6114 for a call to a function whose data type is FNTYPE.
6115 For a library call, FNTYPE is 0. */
6116
6117 void
6118 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6119 tree fntype, /* tree ptr for function decl */
6120 rtx libname, /* SYMBOL_REF of library name or 0 */
6121 tree fndecl,
6122 int caller)
6123 {
6124 struct cgraph_local_info *i;
6125
6126 memset (cum, 0, sizeof (*cum));
6127
6128 if (fndecl)
6129 {
6130 i = cgraph_local_info (fndecl);
6131 cum->call_abi = ix86_function_abi (fndecl);
6132 }
6133 else
6134 {
6135 i = NULL;
6136 cum->call_abi = ix86_function_type_abi (fntype);
6137 }
6138
6139 cum->caller = caller;
6140
6141 /* Set up the number of registers to use for passing arguments. */
6142 cum->nregs = ix86_regparm;
6143 if (TARGET_64BIT)
6144 {
6145 cum->nregs = (cum->call_abi == SYSV_ABI
6146 ? X86_64_REGPARM_MAX
6147 : X86_64_MS_REGPARM_MAX);
6148 }
6149 if (TARGET_SSE)
6150 {
6151 cum->sse_nregs = SSE_REGPARM_MAX;
6152 if (TARGET_64BIT)
6153 {
6154 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6155 ? X86_64_SSE_REGPARM_MAX
6156 : X86_64_MS_SSE_REGPARM_MAX);
6157 }
6158 }
6159 if (TARGET_MMX)
6160 cum->mmx_nregs = MMX_REGPARM_MAX;
6161 cum->warn_avx512f = true;
6162 cum->warn_avx = true;
6163 cum->warn_sse = true;
6164 cum->warn_mmx = true;
6165
6166 /* Because type might mismatch in between caller and callee, we need to
6167 use actual type of function for local calls.
6168 FIXME: cgraph_analyze can be told to actually record if function uses
6169 va_start so for local functions maybe_vaarg can be made aggressive
6170 helping K&R code.
6171 FIXME: once typesytem is fixed, we won't need this code anymore. */
6172 if (i && i->local && i->can_change_signature)
6173 fntype = TREE_TYPE (fndecl);
6174 cum->maybe_vaarg = (fntype
6175 ? (!prototype_p (fntype) || stdarg_p (fntype))
6176 : !libname);
6177
6178 if (!TARGET_64BIT)
6179 {
6180 /* If there are variable arguments, then we won't pass anything
6181 in registers in 32-bit mode. */
6182 if (stdarg_p (fntype))
6183 {
6184 cum->nregs = 0;
6185 cum->sse_nregs = 0;
6186 cum->mmx_nregs = 0;
6187 cum->warn_avx512f = false;
6188 cum->warn_avx = false;
6189 cum->warn_sse = false;
6190 cum->warn_mmx = false;
6191 return;
6192 }
6193
6194 /* Use ecx and edx registers if function has fastcall attribute,
6195 else look for regparm information. */
6196 if (fntype)
6197 {
6198 unsigned int ccvt = ix86_get_callcvt (fntype);
6199 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6200 {
6201 cum->nregs = 1;
6202 cum->fastcall = 1; /* Same first register as in fastcall. */
6203 }
6204 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6205 {
6206 cum->nregs = 2;
6207 cum->fastcall = 1;
6208 }
6209 else
6210 cum->nregs = ix86_function_regparm (fntype, fndecl);
6211 }
6212
6213 /* Set up the number of SSE registers used for passing SFmode
6214 and DFmode arguments. Warn for mismatching ABI. */
6215 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6216 }
6217 }
6218
6219 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6220 But in the case of vector types, it is some vector mode.
6221
6222 When we have only some of our vector isa extensions enabled, then there
6223 are some modes for which vector_mode_supported_p is false. For these
6224 modes, the generic vector support in gcc will choose some non-vector mode
6225 in order to implement the type. By computing the natural mode, we'll
6226 select the proper ABI location for the operand and not depend on whatever
6227 the middle-end decides to do with these vector types.
6228
6229 The midde-end can't deal with the vector types > 16 bytes. In this
6230 case, we return the original mode and warn ABI change if CUM isn't
6231 NULL.
6232
6233 If INT_RETURN is true, warn ABI change if the vector mode isn't
6234 available for function return value. */
6235
6236 static enum machine_mode
6237 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6238 bool in_return)
6239 {
6240 enum machine_mode mode = TYPE_MODE (type);
6241
6242 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6243 {
6244 HOST_WIDE_INT size = int_size_in_bytes (type);
6245 if ((size == 8 || size == 16 || size == 32 || size == 64)
6246 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6247 && TYPE_VECTOR_SUBPARTS (type) > 1)
6248 {
6249 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6250
6251 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6252 mode = MIN_MODE_VECTOR_FLOAT;
6253 else
6254 mode = MIN_MODE_VECTOR_INT;
6255
6256 /* Get the mode which has this inner mode and number of units. */
6257 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6258 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6259 && GET_MODE_INNER (mode) == innermode)
6260 {
6261 if (size == 64 && !TARGET_AVX512F)
6262 {
6263 static bool warnedavx512f;
6264 static bool warnedavx512f_ret;
6265
6266 if (cum && cum->warn_avx512f && !warnedavx512f)
6267 {
6268 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6269 "without AVX512F enabled changes the ABI"))
6270 warnedavx512f = true;
6271 }
6272 else if (in_return && !warnedavx512f_ret)
6273 {
6274 if (warning (OPT_Wpsabi, "AVX512F vector return "
6275 "without AVX512F enabled changes the ABI"))
6276 warnedavx512f_ret = true;
6277 }
6278
6279 return TYPE_MODE (type);
6280 }
6281 else if (size == 32 && !TARGET_AVX)
6282 {
6283 static bool warnedavx;
6284 static bool warnedavx_ret;
6285
6286 if (cum && cum->warn_avx && !warnedavx)
6287 {
6288 if (warning (OPT_Wpsabi, "AVX vector argument "
6289 "without AVX enabled changes the ABI"))
6290 warnedavx = true;
6291 }
6292 else if (in_return && !warnedavx_ret)
6293 {
6294 if (warning (OPT_Wpsabi, "AVX vector return "
6295 "without AVX enabled changes the ABI"))
6296 warnedavx_ret = true;
6297 }
6298
6299 return TYPE_MODE (type);
6300 }
6301 else if (((size == 8 && TARGET_64BIT) || size == 16)
6302 && !TARGET_SSE)
6303 {
6304 static bool warnedsse;
6305 static bool warnedsse_ret;
6306
6307 if (cum && cum->warn_sse && !warnedsse)
6308 {
6309 if (warning (OPT_Wpsabi, "SSE vector argument "
6310 "without SSE enabled changes the ABI"))
6311 warnedsse = true;
6312 }
6313 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6314 {
6315 if (warning (OPT_Wpsabi, "SSE vector return "
6316 "without SSE enabled changes the ABI"))
6317 warnedsse_ret = true;
6318 }
6319 }
6320 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6321 {
6322 static bool warnedmmx;
6323 static bool warnedmmx_ret;
6324
6325 if (cum && cum->warn_mmx && !warnedmmx)
6326 {
6327 if (warning (OPT_Wpsabi, "MMX vector argument "
6328 "without MMX enabled changes the ABI"))
6329 warnedmmx = true;
6330 }
6331 else if (in_return && !warnedmmx_ret)
6332 {
6333 if (warning (OPT_Wpsabi, "MMX vector return "
6334 "without MMX enabled changes the ABI"))
6335 warnedmmx_ret = true;
6336 }
6337 }
6338 return mode;
6339 }
6340
6341 gcc_unreachable ();
6342 }
6343 }
6344
6345 return mode;
6346 }
6347
6348 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6349 this may not agree with the mode that the type system has chosen for the
6350 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6351 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6352
6353 static rtx
6354 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6355 unsigned int regno)
6356 {
6357 rtx tmp;
6358
6359 if (orig_mode != BLKmode)
6360 tmp = gen_rtx_REG (orig_mode, regno);
6361 else
6362 {
6363 tmp = gen_rtx_REG (mode, regno);
6364 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6365 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6366 }
6367
6368 return tmp;
6369 }
6370
6371 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6372 of this code is to classify each 8bytes of incoming argument by the register
6373 class and assign registers accordingly. */
6374
6375 /* Return the union class of CLASS1 and CLASS2.
6376 See the x86-64 PS ABI for details. */
6377
6378 static enum x86_64_reg_class
6379 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6380 {
6381 /* Rule #1: If both classes are equal, this is the resulting class. */
6382 if (class1 == class2)
6383 return class1;
6384
6385 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6386 the other class. */
6387 if (class1 == X86_64_NO_CLASS)
6388 return class2;
6389 if (class2 == X86_64_NO_CLASS)
6390 return class1;
6391
6392 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6393 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6394 return X86_64_MEMORY_CLASS;
6395
6396 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6397 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6398 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6399 return X86_64_INTEGERSI_CLASS;
6400 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6401 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6402 return X86_64_INTEGER_CLASS;
6403
6404 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6405 MEMORY is used. */
6406 if (class1 == X86_64_X87_CLASS
6407 || class1 == X86_64_X87UP_CLASS
6408 || class1 == X86_64_COMPLEX_X87_CLASS
6409 || class2 == X86_64_X87_CLASS
6410 || class2 == X86_64_X87UP_CLASS
6411 || class2 == X86_64_COMPLEX_X87_CLASS)
6412 return X86_64_MEMORY_CLASS;
6413
6414 /* Rule #6: Otherwise class SSE is used. */
6415 return X86_64_SSE_CLASS;
6416 }
6417
6418 /* Classify the argument of type TYPE and mode MODE.
6419 CLASSES will be filled by the register class used to pass each word
6420 of the operand. The number of words is returned. In case the parameter
6421 should be passed in memory, 0 is returned. As a special case for zero
6422 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6423
6424 BIT_OFFSET is used internally for handling records and specifies offset
6425 of the offset in bits modulo 512 to avoid overflow cases.
6426
6427 See the x86-64 PS ABI for details.
6428 */
6429
6430 static int
6431 classify_argument (enum machine_mode mode, const_tree type,
6432 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6433 {
6434 HOST_WIDE_INT bytes =
6435 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6436 int words
6437 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6438
6439 /* Variable sized entities are always passed/returned in memory. */
6440 if (bytes < 0)
6441 return 0;
6442
6443 if (mode != VOIDmode
6444 && targetm.calls.must_pass_in_stack (mode, type))
6445 return 0;
6446
6447 if (type && AGGREGATE_TYPE_P (type))
6448 {
6449 int i;
6450 tree field;
6451 enum x86_64_reg_class subclasses[MAX_CLASSES];
6452
6453 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6454 if (bytes > 64)
6455 return 0;
6456
6457 for (i = 0; i < words; i++)
6458 classes[i] = X86_64_NO_CLASS;
6459
6460 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6461 signalize memory class, so handle it as special case. */
6462 if (!words)
6463 {
6464 classes[0] = X86_64_NO_CLASS;
6465 return 1;
6466 }
6467
6468 /* Classify each field of record and merge classes. */
6469 switch (TREE_CODE (type))
6470 {
6471 case RECORD_TYPE:
6472 /* And now merge the fields of structure. */
6473 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6474 {
6475 if (TREE_CODE (field) == FIELD_DECL)
6476 {
6477 int num;
6478
6479 if (TREE_TYPE (field) == error_mark_node)
6480 continue;
6481
6482 /* Bitfields are always classified as integer. Handle them
6483 early, since later code would consider them to be
6484 misaligned integers. */
6485 if (DECL_BIT_FIELD (field))
6486 {
6487 for (i = (int_bit_position (field)
6488 + (bit_offset % 64)) / 8 / 8;
6489 i < ((int_bit_position (field) + (bit_offset % 64))
6490 + tree_to_shwi (DECL_SIZE (field))
6491 + 63) / 8 / 8; i++)
6492 classes[i] =
6493 merge_classes (X86_64_INTEGER_CLASS,
6494 classes[i]);
6495 }
6496 else
6497 {
6498 int pos;
6499
6500 type = TREE_TYPE (field);
6501
6502 /* Flexible array member is ignored. */
6503 if (TYPE_MODE (type) == BLKmode
6504 && TREE_CODE (type) == ARRAY_TYPE
6505 && TYPE_SIZE (type) == NULL_TREE
6506 && TYPE_DOMAIN (type) != NULL_TREE
6507 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6508 == NULL_TREE))
6509 {
6510 static bool warned;
6511
6512 if (!warned && warn_psabi)
6513 {
6514 warned = true;
6515 inform (input_location,
6516 "the ABI of passing struct with"
6517 " a flexible array member has"
6518 " changed in GCC 4.4");
6519 }
6520 continue;
6521 }
6522 num = classify_argument (TYPE_MODE (type), type,
6523 subclasses,
6524 (int_bit_position (field)
6525 + bit_offset) % 512);
6526 if (!num)
6527 return 0;
6528 pos = (int_bit_position (field)
6529 + (bit_offset % 64)) / 8 / 8;
6530 for (i = 0; i < num && (i + pos) < words; i++)
6531 classes[i + pos] =
6532 merge_classes (subclasses[i], classes[i + pos]);
6533 }
6534 }
6535 }
6536 break;
6537
6538 case ARRAY_TYPE:
6539 /* Arrays are handled as small records. */
6540 {
6541 int num;
6542 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6543 TREE_TYPE (type), subclasses, bit_offset);
6544 if (!num)
6545 return 0;
6546
6547 /* The partial classes are now full classes. */
6548 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6549 subclasses[0] = X86_64_SSE_CLASS;
6550 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6551 && !((bit_offset % 64) == 0 && bytes == 4))
6552 subclasses[0] = X86_64_INTEGER_CLASS;
6553
6554 for (i = 0; i < words; i++)
6555 classes[i] = subclasses[i % num];
6556
6557 break;
6558 }
6559 case UNION_TYPE:
6560 case QUAL_UNION_TYPE:
6561 /* Unions are similar to RECORD_TYPE but offset is always 0.
6562 */
6563 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6564 {
6565 if (TREE_CODE (field) == FIELD_DECL)
6566 {
6567 int num;
6568
6569 if (TREE_TYPE (field) == error_mark_node)
6570 continue;
6571
6572 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6573 TREE_TYPE (field), subclasses,
6574 bit_offset);
6575 if (!num)
6576 return 0;
6577 for (i = 0; i < num; i++)
6578 classes[i] = merge_classes (subclasses[i], classes[i]);
6579 }
6580 }
6581 break;
6582
6583 default:
6584 gcc_unreachable ();
6585 }
6586
6587 if (words > 2)
6588 {
6589 /* When size > 16 bytes, if the first one isn't
6590 X86_64_SSE_CLASS or any other ones aren't
6591 X86_64_SSEUP_CLASS, everything should be passed in
6592 memory. */
6593 if (classes[0] != X86_64_SSE_CLASS)
6594 return 0;
6595
6596 for (i = 1; i < words; i++)
6597 if (classes[i] != X86_64_SSEUP_CLASS)
6598 return 0;
6599 }
6600
6601 /* Final merger cleanup. */
6602 for (i = 0; i < words; i++)
6603 {
6604 /* If one class is MEMORY, everything should be passed in
6605 memory. */
6606 if (classes[i] == X86_64_MEMORY_CLASS)
6607 return 0;
6608
6609 /* The X86_64_SSEUP_CLASS should be always preceded by
6610 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6611 if (classes[i] == X86_64_SSEUP_CLASS
6612 && classes[i - 1] != X86_64_SSE_CLASS
6613 && classes[i - 1] != X86_64_SSEUP_CLASS)
6614 {
6615 /* The first one should never be X86_64_SSEUP_CLASS. */
6616 gcc_assert (i != 0);
6617 classes[i] = X86_64_SSE_CLASS;
6618 }
6619
6620 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6621 everything should be passed in memory. */
6622 if (classes[i] == X86_64_X87UP_CLASS
6623 && (classes[i - 1] != X86_64_X87_CLASS))
6624 {
6625 static bool warned;
6626
6627 /* The first one should never be X86_64_X87UP_CLASS. */
6628 gcc_assert (i != 0);
6629 if (!warned && warn_psabi)
6630 {
6631 warned = true;
6632 inform (input_location,
6633 "the ABI of passing union with long double"
6634 " has changed in GCC 4.4");
6635 }
6636 return 0;
6637 }
6638 }
6639 return words;
6640 }
6641
6642 /* Compute alignment needed. We align all types to natural boundaries with
6643 exception of XFmode that is aligned to 64bits. */
6644 if (mode != VOIDmode && mode != BLKmode)
6645 {
6646 int mode_alignment = GET_MODE_BITSIZE (mode);
6647
6648 if (mode == XFmode)
6649 mode_alignment = 128;
6650 else if (mode == XCmode)
6651 mode_alignment = 256;
6652 if (COMPLEX_MODE_P (mode))
6653 mode_alignment /= 2;
6654 /* Misaligned fields are always returned in memory. */
6655 if (bit_offset % mode_alignment)
6656 return 0;
6657 }
6658
6659 /* for V1xx modes, just use the base mode */
6660 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6661 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6662 mode = GET_MODE_INNER (mode);
6663
6664 /* Classification of atomic types. */
6665 switch (mode)
6666 {
6667 case SDmode:
6668 case DDmode:
6669 classes[0] = X86_64_SSE_CLASS;
6670 return 1;
6671 case TDmode:
6672 classes[0] = X86_64_SSE_CLASS;
6673 classes[1] = X86_64_SSEUP_CLASS;
6674 return 2;
6675 case DImode:
6676 case SImode:
6677 case HImode:
6678 case QImode:
6679 case CSImode:
6680 case CHImode:
6681 case CQImode:
6682 {
6683 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6684
6685 /* Analyze last 128 bits only. */
6686 size = (size - 1) & 0x7f;
6687
6688 if (size < 32)
6689 {
6690 classes[0] = X86_64_INTEGERSI_CLASS;
6691 return 1;
6692 }
6693 else if (size < 64)
6694 {
6695 classes[0] = X86_64_INTEGER_CLASS;
6696 return 1;
6697 }
6698 else if (size < 64+32)
6699 {
6700 classes[0] = X86_64_INTEGER_CLASS;
6701 classes[1] = X86_64_INTEGERSI_CLASS;
6702 return 2;
6703 }
6704 else if (size < 64+64)
6705 {
6706 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6707 return 2;
6708 }
6709 else
6710 gcc_unreachable ();
6711 }
6712 case CDImode:
6713 case TImode:
6714 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6715 return 2;
6716 case COImode:
6717 case OImode:
6718 /* OImode shouldn't be used directly. */
6719 gcc_unreachable ();
6720 case CTImode:
6721 return 0;
6722 case SFmode:
6723 if (!(bit_offset % 64))
6724 classes[0] = X86_64_SSESF_CLASS;
6725 else
6726 classes[0] = X86_64_SSE_CLASS;
6727 return 1;
6728 case DFmode:
6729 classes[0] = X86_64_SSEDF_CLASS;
6730 return 1;
6731 case XFmode:
6732 classes[0] = X86_64_X87_CLASS;
6733 classes[1] = X86_64_X87UP_CLASS;
6734 return 2;
6735 case TFmode:
6736 classes[0] = X86_64_SSE_CLASS;
6737 classes[1] = X86_64_SSEUP_CLASS;
6738 return 2;
6739 case SCmode:
6740 classes[0] = X86_64_SSE_CLASS;
6741 if (!(bit_offset % 64))
6742 return 1;
6743 else
6744 {
6745 static bool warned;
6746
6747 if (!warned && warn_psabi)
6748 {
6749 warned = true;
6750 inform (input_location,
6751 "the ABI of passing structure with complex float"
6752 " member has changed in GCC 4.4");
6753 }
6754 classes[1] = X86_64_SSESF_CLASS;
6755 return 2;
6756 }
6757 case DCmode:
6758 classes[0] = X86_64_SSEDF_CLASS;
6759 classes[1] = X86_64_SSEDF_CLASS;
6760 return 2;
6761 case XCmode:
6762 classes[0] = X86_64_COMPLEX_X87_CLASS;
6763 return 1;
6764 case TCmode:
6765 /* This modes is larger than 16 bytes. */
6766 return 0;
6767 case V8SFmode:
6768 case V8SImode:
6769 case V32QImode:
6770 case V16HImode:
6771 case V4DFmode:
6772 case V4DImode:
6773 classes[0] = X86_64_SSE_CLASS;
6774 classes[1] = X86_64_SSEUP_CLASS;
6775 classes[2] = X86_64_SSEUP_CLASS;
6776 classes[3] = X86_64_SSEUP_CLASS;
6777 return 4;
6778 case V8DFmode:
6779 case V16SFmode:
6780 case V8DImode:
6781 case V16SImode:
6782 case V32HImode:
6783 case V64QImode:
6784 classes[0] = X86_64_SSE_CLASS;
6785 classes[1] = X86_64_SSEUP_CLASS;
6786 classes[2] = X86_64_SSEUP_CLASS;
6787 classes[3] = X86_64_SSEUP_CLASS;
6788 classes[4] = X86_64_SSEUP_CLASS;
6789 classes[5] = X86_64_SSEUP_CLASS;
6790 classes[6] = X86_64_SSEUP_CLASS;
6791 classes[7] = X86_64_SSEUP_CLASS;
6792 return 8;
6793 case V4SFmode:
6794 case V4SImode:
6795 case V16QImode:
6796 case V8HImode:
6797 case V2DFmode:
6798 case V2DImode:
6799 classes[0] = X86_64_SSE_CLASS;
6800 classes[1] = X86_64_SSEUP_CLASS;
6801 return 2;
6802 case V1TImode:
6803 case V1DImode:
6804 case V2SFmode:
6805 case V2SImode:
6806 case V4HImode:
6807 case V8QImode:
6808 classes[0] = X86_64_SSE_CLASS;
6809 return 1;
6810 case BLKmode:
6811 case VOIDmode:
6812 return 0;
6813 default:
6814 gcc_assert (VECTOR_MODE_P (mode));
6815
6816 if (bytes > 16)
6817 return 0;
6818
6819 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6820
6821 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6822 classes[0] = X86_64_INTEGERSI_CLASS;
6823 else
6824 classes[0] = X86_64_INTEGER_CLASS;
6825 classes[1] = X86_64_INTEGER_CLASS;
6826 return 1 + (bytes > 8);
6827 }
6828 }
6829
6830 /* Examine the argument and return set number of register required in each
6831 class. Return true iff parameter should be passed in memory. */
6832
6833 static bool
6834 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6835 int *int_nregs, int *sse_nregs)
6836 {
6837 enum x86_64_reg_class regclass[MAX_CLASSES];
6838 int n = classify_argument (mode, type, regclass, 0);
6839
6840 *int_nregs = 0;
6841 *sse_nregs = 0;
6842
6843 if (!n)
6844 return true;
6845 for (n--; n >= 0; n--)
6846 switch (regclass[n])
6847 {
6848 case X86_64_INTEGER_CLASS:
6849 case X86_64_INTEGERSI_CLASS:
6850 (*int_nregs)++;
6851 break;
6852 case X86_64_SSE_CLASS:
6853 case X86_64_SSESF_CLASS:
6854 case X86_64_SSEDF_CLASS:
6855 (*sse_nregs)++;
6856 break;
6857 case X86_64_NO_CLASS:
6858 case X86_64_SSEUP_CLASS:
6859 break;
6860 case X86_64_X87_CLASS:
6861 case X86_64_X87UP_CLASS:
6862 case X86_64_COMPLEX_X87_CLASS:
6863 if (!in_return)
6864 return true;
6865 break;
6866 case X86_64_MEMORY_CLASS:
6867 gcc_unreachable ();
6868 }
6869
6870 return false;
6871 }
6872
6873 /* Construct container for the argument used by GCC interface. See
6874 FUNCTION_ARG for the detailed description. */
6875
6876 static rtx
6877 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6878 const_tree type, int in_return, int nintregs, int nsseregs,
6879 const int *intreg, int sse_regno)
6880 {
6881 /* The following variables hold the static issued_error state. */
6882 static bool issued_sse_arg_error;
6883 static bool issued_sse_ret_error;
6884 static bool issued_x87_ret_error;
6885
6886 enum machine_mode tmpmode;
6887 int bytes =
6888 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6889 enum x86_64_reg_class regclass[MAX_CLASSES];
6890 int n;
6891 int i;
6892 int nexps = 0;
6893 int needed_sseregs, needed_intregs;
6894 rtx exp[MAX_CLASSES];
6895 rtx ret;
6896
6897 n = classify_argument (mode, type, regclass, 0);
6898 if (!n)
6899 return NULL;
6900 if (examine_argument (mode, type, in_return, &needed_intregs,
6901 &needed_sseregs))
6902 return NULL;
6903 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6904 return NULL;
6905
6906 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6907 some less clueful developer tries to use floating-point anyway. */
6908 if (needed_sseregs && !TARGET_SSE)
6909 {
6910 if (in_return)
6911 {
6912 if (!issued_sse_ret_error)
6913 {
6914 error ("SSE register return with SSE disabled");
6915 issued_sse_ret_error = true;
6916 }
6917 }
6918 else if (!issued_sse_arg_error)
6919 {
6920 error ("SSE register argument with SSE disabled");
6921 issued_sse_arg_error = true;
6922 }
6923 return NULL;
6924 }
6925
6926 /* Likewise, error if the ABI requires us to return values in the
6927 x87 registers and the user specified -mno-80387. */
6928 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6929 for (i = 0; i < n; i++)
6930 if (regclass[i] == X86_64_X87_CLASS
6931 || regclass[i] == X86_64_X87UP_CLASS
6932 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6933 {
6934 if (!issued_x87_ret_error)
6935 {
6936 error ("x87 register return with x87 disabled");
6937 issued_x87_ret_error = true;
6938 }
6939 return NULL;
6940 }
6941
6942 /* First construct simple cases. Avoid SCmode, since we want to use
6943 single register to pass this type. */
6944 if (n == 1 && mode != SCmode)
6945 switch (regclass[0])
6946 {
6947 case X86_64_INTEGER_CLASS:
6948 case X86_64_INTEGERSI_CLASS:
6949 return gen_rtx_REG (mode, intreg[0]);
6950 case X86_64_SSE_CLASS:
6951 case X86_64_SSESF_CLASS:
6952 case X86_64_SSEDF_CLASS:
6953 if (mode != BLKmode)
6954 return gen_reg_or_parallel (mode, orig_mode,
6955 SSE_REGNO (sse_regno));
6956 break;
6957 case X86_64_X87_CLASS:
6958 case X86_64_COMPLEX_X87_CLASS:
6959 return gen_rtx_REG (mode, FIRST_STACK_REG);
6960 case X86_64_NO_CLASS:
6961 /* Zero sized array, struct or class. */
6962 return NULL;
6963 default:
6964 gcc_unreachable ();
6965 }
6966 if (n == 2
6967 && regclass[0] == X86_64_SSE_CLASS
6968 && regclass[1] == X86_64_SSEUP_CLASS
6969 && mode != BLKmode)
6970 return gen_reg_or_parallel (mode, orig_mode,
6971 SSE_REGNO (sse_regno));
6972 if (n == 4
6973 && regclass[0] == X86_64_SSE_CLASS
6974 && regclass[1] == X86_64_SSEUP_CLASS
6975 && regclass[2] == X86_64_SSEUP_CLASS
6976 && regclass[3] == X86_64_SSEUP_CLASS
6977 && mode != BLKmode)
6978 return gen_reg_or_parallel (mode, orig_mode,
6979 SSE_REGNO (sse_regno));
6980 if (n == 8
6981 && regclass[0] == X86_64_SSE_CLASS
6982 && regclass[1] == X86_64_SSEUP_CLASS
6983 && regclass[2] == X86_64_SSEUP_CLASS
6984 && regclass[3] == X86_64_SSEUP_CLASS
6985 && regclass[4] == X86_64_SSEUP_CLASS
6986 && regclass[5] == X86_64_SSEUP_CLASS
6987 && regclass[6] == X86_64_SSEUP_CLASS
6988 && regclass[7] == X86_64_SSEUP_CLASS
6989 && mode != BLKmode)
6990 return gen_reg_or_parallel (mode, orig_mode,
6991 SSE_REGNO (sse_regno));
6992 if (n == 2
6993 && regclass[0] == X86_64_X87_CLASS
6994 && regclass[1] == X86_64_X87UP_CLASS)
6995 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6996
6997 if (n == 2
6998 && regclass[0] == X86_64_INTEGER_CLASS
6999 && regclass[1] == X86_64_INTEGER_CLASS
7000 && (mode == CDImode || mode == TImode)
7001 && intreg[0] + 1 == intreg[1])
7002 return gen_rtx_REG (mode, intreg[0]);
7003
7004 /* Otherwise figure out the entries of the PARALLEL. */
7005 for (i = 0; i < n; i++)
7006 {
7007 int pos;
7008
7009 switch (regclass[i])
7010 {
7011 case X86_64_NO_CLASS:
7012 break;
7013 case X86_64_INTEGER_CLASS:
7014 case X86_64_INTEGERSI_CLASS:
7015 /* Merge TImodes on aligned occasions here too. */
7016 if (i * 8 + 8 > bytes)
7017 tmpmode
7018 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7019 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7020 tmpmode = SImode;
7021 else
7022 tmpmode = DImode;
7023 /* We've requested 24 bytes we
7024 don't have mode for. Use DImode. */
7025 if (tmpmode == BLKmode)
7026 tmpmode = DImode;
7027 exp [nexps++]
7028 = gen_rtx_EXPR_LIST (VOIDmode,
7029 gen_rtx_REG (tmpmode, *intreg),
7030 GEN_INT (i*8));
7031 intreg++;
7032 break;
7033 case X86_64_SSESF_CLASS:
7034 exp [nexps++]
7035 = gen_rtx_EXPR_LIST (VOIDmode,
7036 gen_rtx_REG (SFmode,
7037 SSE_REGNO (sse_regno)),
7038 GEN_INT (i*8));
7039 sse_regno++;
7040 break;
7041 case X86_64_SSEDF_CLASS:
7042 exp [nexps++]
7043 = gen_rtx_EXPR_LIST (VOIDmode,
7044 gen_rtx_REG (DFmode,
7045 SSE_REGNO (sse_regno)),
7046 GEN_INT (i*8));
7047 sse_regno++;
7048 break;
7049 case X86_64_SSE_CLASS:
7050 pos = i;
7051 switch (n)
7052 {
7053 case 1:
7054 tmpmode = DImode;
7055 break;
7056 case 2:
7057 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7058 {
7059 tmpmode = TImode;
7060 i++;
7061 }
7062 else
7063 tmpmode = DImode;
7064 break;
7065 case 4:
7066 gcc_assert (i == 0
7067 && regclass[1] == X86_64_SSEUP_CLASS
7068 && regclass[2] == X86_64_SSEUP_CLASS
7069 && regclass[3] == X86_64_SSEUP_CLASS);
7070 tmpmode = OImode;
7071 i += 3;
7072 break;
7073 case 8:
7074 gcc_assert (i == 0
7075 && regclass[1] == X86_64_SSEUP_CLASS
7076 && regclass[2] == X86_64_SSEUP_CLASS
7077 && regclass[3] == X86_64_SSEUP_CLASS
7078 && regclass[4] == X86_64_SSEUP_CLASS
7079 && regclass[5] == X86_64_SSEUP_CLASS
7080 && regclass[6] == X86_64_SSEUP_CLASS
7081 && regclass[7] == X86_64_SSEUP_CLASS);
7082 tmpmode = XImode;
7083 i += 7;
7084 break;
7085 default:
7086 gcc_unreachable ();
7087 }
7088 exp [nexps++]
7089 = gen_rtx_EXPR_LIST (VOIDmode,
7090 gen_rtx_REG (tmpmode,
7091 SSE_REGNO (sse_regno)),
7092 GEN_INT (pos*8));
7093 sse_regno++;
7094 break;
7095 default:
7096 gcc_unreachable ();
7097 }
7098 }
7099
7100 /* Empty aligned struct, union or class. */
7101 if (nexps == 0)
7102 return NULL;
7103
7104 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7105 for (i = 0; i < nexps; i++)
7106 XVECEXP (ret, 0, i) = exp [i];
7107 return ret;
7108 }
7109
7110 /* Update the data in CUM to advance over an argument of mode MODE
7111 and data type TYPE. (TYPE is null for libcalls where that information
7112 may not be available.) */
7113
7114 static void
7115 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7116 const_tree type, HOST_WIDE_INT bytes,
7117 HOST_WIDE_INT words)
7118 {
7119 switch (mode)
7120 {
7121 default:
7122 break;
7123
7124 case BLKmode:
7125 if (bytes < 0)
7126 break;
7127 /* FALLTHRU */
7128
7129 case DImode:
7130 case SImode:
7131 case HImode:
7132 case QImode:
7133 cum->words += words;
7134 cum->nregs -= words;
7135 cum->regno += words;
7136
7137 if (cum->nregs <= 0)
7138 {
7139 cum->nregs = 0;
7140 cum->regno = 0;
7141 }
7142 break;
7143
7144 case OImode:
7145 /* OImode shouldn't be used directly. */
7146 gcc_unreachable ();
7147
7148 case DFmode:
7149 if (cum->float_in_sse < 2)
7150 break;
7151 case SFmode:
7152 if (cum->float_in_sse < 1)
7153 break;
7154 /* FALLTHRU */
7155
7156 case V8SFmode:
7157 case V8SImode:
7158 case V64QImode:
7159 case V32HImode:
7160 case V16SImode:
7161 case V8DImode:
7162 case V16SFmode:
7163 case V8DFmode:
7164 case V32QImode:
7165 case V16HImode:
7166 case V4DFmode:
7167 case V4DImode:
7168 case TImode:
7169 case V16QImode:
7170 case V8HImode:
7171 case V4SImode:
7172 case V2DImode:
7173 case V4SFmode:
7174 case V2DFmode:
7175 if (!type || !AGGREGATE_TYPE_P (type))
7176 {
7177 cum->sse_words += words;
7178 cum->sse_nregs -= 1;
7179 cum->sse_regno += 1;
7180 if (cum->sse_nregs <= 0)
7181 {
7182 cum->sse_nregs = 0;
7183 cum->sse_regno = 0;
7184 }
7185 }
7186 break;
7187
7188 case V8QImode:
7189 case V4HImode:
7190 case V2SImode:
7191 case V2SFmode:
7192 case V1TImode:
7193 case V1DImode:
7194 if (!type || !AGGREGATE_TYPE_P (type))
7195 {
7196 cum->mmx_words += words;
7197 cum->mmx_nregs -= 1;
7198 cum->mmx_regno += 1;
7199 if (cum->mmx_nregs <= 0)
7200 {
7201 cum->mmx_nregs = 0;
7202 cum->mmx_regno = 0;
7203 }
7204 }
7205 break;
7206 }
7207 }
7208
7209 static void
7210 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7211 const_tree type, HOST_WIDE_INT words, bool named)
7212 {
7213 int int_nregs, sse_nregs;
7214
7215 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7216 if (!named && (VALID_AVX512F_REG_MODE (mode)
7217 || VALID_AVX256_REG_MODE (mode)))
7218 return;
7219
7220 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7221 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7222 {
7223 cum->nregs -= int_nregs;
7224 cum->sse_nregs -= sse_nregs;
7225 cum->regno += int_nregs;
7226 cum->sse_regno += sse_nregs;
7227 }
7228 else
7229 {
7230 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7231 cum->words = (cum->words + align - 1) & ~(align - 1);
7232 cum->words += words;
7233 }
7234 }
7235
7236 static void
7237 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7238 HOST_WIDE_INT words)
7239 {
7240 /* Otherwise, this should be passed indirect. */
7241 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7242
7243 cum->words += words;
7244 if (cum->nregs > 0)
7245 {
7246 cum->nregs -= 1;
7247 cum->regno += 1;
7248 }
7249 }
7250
7251 /* Update the data in CUM to advance over an argument of mode MODE and
7252 data type TYPE. (TYPE is null for libcalls where that information
7253 may not be available.) */
7254
7255 static void
7256 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7257 const_tree type, bool named)
7258 {
7259 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7260 HOST_WIDE_INT bytes, words;
7261
7262 if (mode == BLKmode)
7263 bytes = int_size_in_bytes (type);
7264 else
7265 bytes = GET_MODE_SIZE (mode);
7266 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7267
7268 if (type)
7269 mode = type_natural_mode (type, NULL, false);
7270
7271 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7272 function_arg_advance_ms_64 (cum, bytes, words);
7273 else if (TARGET_64BIT)
7274 function_arg_advance_64 (cum, mode, type, words, named);
7275 else
7276 function_arg_advance_32 (cum, mode, type, bytes, words);
7277 }
7278
7279 /* Define where to put the arguments to a function.
7280 Value is zero to push the argument on the stack,
7281 or a hard register in which to store the argument.
7282
7283 MODE is the argument's machine mode.
7284 TYPE is the data type of the argument (as a tree).
7285 This is null for libcalls where that information may
7286 not be available.
7287 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7288 the preceding args and about the function being called.
7289 NAMED is nonzero if this argument is a named parameter
7290 (otherwise it is an extra parameter matching an ellipsis). */
7291
7292 static rtx
7293 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7294 enum machine_mode orig_mode, const_tree type,
7295 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7296 {
7297 /* Avoid the AL settings for the Unix64 ABI. */
7298 if (mode == VOIDmode)
7299 return constm1_rtx;
7300
7301 switch (mode)
7302 {
7303 default:
7304 break;
7305
7306 case BLKmode:
7307 if (bytes < 0)
7308 break;
7309 /* FALLTHRU */
7310 case DImode:
7311 case SImode:
7312 case HImode:
7313 case QImode:
7314 if (words <= cum->nregs)
7315 {
7316 int regno = cum->regno;
7317
7318 /* Fastcall allocates the first two DWORD (SImode) or
7319 smaller arguments to ECX and EDX if it isn't an
7320 aggregate type . */
7321 if (cum->fastcall)
7322 {
7323 if (mode == BLKmode
7324 || mode == DImode
7325 || (type && AGGREGATE_TYPE_P (type)))
7326 break;
7327
7328 /* ECX not EAX is the first allocated register. */
7329 if (regno == AX_REG)
7330 regno = CX_REG;
7331 }
7332 return gen_rtx_REG (mode, regno);
7333 }
7334 break;
7335
7336 case DFmode:
7337 if (cum->float_in_sse < 2)
7338 break;
7339 case SFmode:
7340 if (cum->float_in_sse < 1)
7341 break;
7342 /* FALLTHRU */
7343 case TImode:
7344 /* In 32bit, we pass TImode in xmm registers. */
7345 case V16QImode:
7346 case V8HImode:
7347 case V4SImode:
7348 case V2DImode:
7349 case V4SFmode:
7350 case V2DFmode:
7351 if (!type || !AGGREGATE_TYPE_P (type))
7352 {
7353 if (cum->sse_nregs)
7354 return gen_reg_or_parallel (mode, orig_mode,
7355 cum->sse_regno + FIRST_SSE_REG);
7356 }
7357 break;
7358
7359 case OImode:
7360 case XImode:
7361 /* OImode and XImode shouldn't be used directly. */
7362 gcc_unreachable ();
7363
7364 case V64QImode:
7365 case V32HImode:
7366 case V16SImode:
7367 case V8DImode:
7368 case V16SFmode:
7369 case V8DFmode:
7370 case V8SFmode:
7371 case V8SImode:
7372 case V32QImode:
7373 case V16HImode:
7374 case V4DFmode:
7375 case V4DImode:
7376 if (!type || !AGGREGATE_TYPE_P (type))
7377 {
7378 if (cum->sse_nregs)
7379 return gen_reg_or_parallel (mode, orig_mode,
7380 cum->sse_regno + FIRST_SSE_REG);
7381 }
7382 break;
7383
7384 case V8QImode:
7385 case V4HImode:
7386 case V2SImode:
7387 case V2SFmode:
7388 case V1TImode:
7389 case V1DImode:
7390 if (!type || !AGGREGATE_TYPE_P (type))
7391 {
7392 if (cum->mmx_nregs)
7393 return gen_reg_or_parallel (mode, orig_mode,
7394 cum->mmx_regno + FIRST_MMX_REG);
7395 }
7396 break;
7397 }
7398
7399 return NULL_RTX;
7400 }
7401
7402 static rtx
7403 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7404 enum machine_mode orig_mode, const_tree type, bool named)
7405 {
7406 /* Handle a hidden AL argument containing number of registers
7407 for varargs x86-64 functions. */
7408 if (mode == VOIDmode)
7409 return GEN_INT (cum->maybe_vaarg
7410 ? (cum->sse_nregs < 0
7411 ? X86_64_SSE_REGPARM_MAX
7412 : cum->sse_regno)
7413 : -1);
7414
7415 switch (mode)
7416 {
7417 default:
7418 break;
7419
7420 case V8SFmode:
7421 case V8SImode:
7422 case V32QImode:
7423 case V16HImode:
7424 case V4DFmode:
7425 case V4DImode:
7426 case V16SFmode:
7427 case V16SImode:
7428 case V64QImode:
7429 case V32HImode:
7430 case V8DFmode:
7431 case V8DImode:
7432 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7433 if (!named)
7434 return NULL;
7435 break;
7436 }
7437
7438 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7439 cum->sse_nregs,
7440 &x86_64_int_parameter_registers [cum->regno],
7441 cum->sse_regno);
7442 }
7443
7444 static rtx
7445 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7446 enum machine_mode orig_mode, bool named,
7447 HOST_WIDE_INT bytes)
7448 {
7449 unsigned int regno;
7450
7451 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7452 We use value of -2 to specify that current function call is MSABI. */
7453 if (mode == VOIDmode)
7454 return GEN_INT (-2);
7455
7456 /* If we've run out of registers, it goes on the stack. */
7457 if (cum->nregs == 0)
7458 return NULL_RTX;
7459
7460 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7461
7462 /* Only floating point modes are passed in anything but integer regs. */
7463 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7464 {
7465 if (named)
7466 regno = cum->regno + FIRST_SSE_REG;
7467 else
7468 {
7469 rtx t1, t2;
7470
7471 /* Unnamed floating parameters are passed in both the
7472 SSE and integer registers. */
7473 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7474 t2 = gen_rtx_REG (mode, regno);
7475 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7476 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7477 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7478 }
7479 }
7480 /* Handle aggregated types passed in register. */
7481 if (orig_mode == BLKmode)
7482 {
7483 if (bytes > 0 && bytes <= 8)
7484 mode = (bytes > 4 ? DImode : SImode);
7485 if (mode == BLKmode)
7486 mode = DImode;
7487 }
7488
7489 return gen_reg_or_parallel (mode, orig_mode, regno);
7490 }
7491
7492 /* Return where to put the arguments to a function.
7493 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7494
7495 MODE is the argument's machine mode. TYPE is the data type of the
7496 argument. It is null for libcalls where that information may not be
7497 available. CUM gives information about the preceding args and about
7498 the function being called. NAMED is nonzero if this argument is a
7499 named parameter (otherwise it is an extra parameter matching an
7500 ellipsis). */
7501
7502 static rtx
7503 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7504 const_tree type, bool named)
7505 {
7506 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7507 enum machine_mode mode = omode;
7508 HOST_WIDE_INT bytes, words;
7509 rtx arg;
7510
7511 if (mode == BLKmode)
7512 bytes = int_size_in_bytes (type);
7513 else
7514 bytes = GET_MODE_SIZE (mode);
7515 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7516
7517 /* To simplify the code below, represent vector types with a vector mode
7518 even if MMX/SSE are not active. */
7519 if (type && TREE_CODE (type) == VECTOR_TYPE)
7520 mode = type_natural_mode (type, cum, false);
7521
7522 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7523 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7524 else if (TARGET_64BIT)
7525 arg = function_arg_64 (cum, mode, omode, type, named);
7526 else
7527 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7528
7529 return arg;
7530 }
7531
7532 /* A C expression that indicates when an argument must be passed by
7533 reference. If nonzero for an argument, a copy of that argument is
7534 made in memory and a pointer to the argument is passed instead of
7535 the argument itself. The pointer is passed in whatever way is
7536 appropriate for passing a pointer to that type. */
7537
7538 static bool
7539 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7540 const_tree type, bool named ATTRIBUTE_UNUSED)
7541 {
7542 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7543
7544 /* See Windows x64 Software Convention. */
7545 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7546 {
7547 int msize = (int) GET_MODE_SIZE (mode);
7548 if (type)
7549 {
7550 /* Arrays are passed by reference. */
7551 if (TREE_CODE (type) == ARRAY_TYPE)
7552 return true;
7553
7554 if (AGGREGATE_TYPE_P (type))
7555 {
7556 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7557 are passed by reference. */
7558 msize = int_size_in_bytes (type);
7559 }
7560 }
7561
7562 /* __m128 is passed by reference. */
7563 switch (msize) {
7564 case 1: case 2: case 4: case 8:
7565 break;
7566 default:
7567 return true;
7568 }
7569 }
7570 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7571 return 1;
7572
7573 return 0;
7574 }
7575
7576 /* Return true when TYPE should be 128bit aligned for 32bit argument
7577 passing ABI. XXX: This function is obsolete and is only used for
7578 checking psABI compatibility with previous versions of GCC. */
7579
7580 static bool
7581 ix86_compat_aligned_value_p (const_tree type)
7582 {
7583 enum machine_mode mode = TYPE_MODE (type);
7584 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7585 || mode == TDmode
7586 || mode == TFmode
7587 || mode == TCmode)
7588 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7589 return true;
7590 if (TYPE_ALIGN (type) < 128)
7591 return false;
7592
7593 if (AGGREGATE_TYPE_P (type))
7594 {
7595 /* Walk the aggregates recursively. */
7596 switch (TREE_CODE (type))
7597 {
7598 case RECORD_TYPE:
7599 case UNION_TYPE:
7600 case QUAL_UNION_TYPE:
7601 {
7602 tree field;
7603
7604 /* Walk all the structure fields. */
7605 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7606 {
7607 if (TREE_CODE (field) == FIELD_DECL
7608 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7609 return true;
7610 }
7611 break;
7612 }
7613
7614 case ARRAY_TYPE:
7615 /* Just for use if some languages passes arrays by value. */
7616 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7617 return true;
7618 break;
7619
7620 default:
7621 gcc_unreachable ();
7622 }
7623 }
7624 return false;
7625 }
7626
7627 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7628 XXX: This function is obsolete and is only used for checking psABI
7629 compatibility with previous versions of GCC. */
7630
7631 static unsigned int
7632 ix86_compat_function_arg_boundary (enum machine_mode mode,
7633 const_tree type, unsigned int align)
7634 {
7635 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7636 natural boundaries. */
7637 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7638 {
7639 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7640 make an exception for SSE modes since these require 128bit
7641 alignment.
7642
7643 The handling here differs from field_alignment. ICC aligns MMX
7644 arguments to 4 byte boundaries, while structure fields are aligned
7645 to 8 byte boundaries. */
7646 if (!type)
7647 {
7648 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7649 align = PARM_BOUNDARY;
7650 }
7651 else
7652 {
7653 if (!ix86_compat_aligned_value_p (type))
7654 align = PARM_BOUNDARY;
7655 }
7656 }
7657 if (align > BIGGEST_ALIGNMENT)
7658 align = BIGGEST_ALIGNMENT;
7659 return align;
7660 }
7661
7662 /* Return true when TYPE should be 128bit aligned for 32bit argument
7663 passing ABI. */
7664
7665 static bool
7666 ix86_contains_aligned_value_p (const_tree type)
7667 {
7668 enum machine_mode mode = TYPE_MODE (type);
7669
7670 if (mode == XFmode || mode == XCmode)
7671 return false;
7672
7673 if (TYPE_ALIGN (type) < 128)
7674 return false;
7675
7676 if (AGGREGATE_TYPE_P (type))
7677 {
7678 /* Walk the aggregates recursively. */
7679 switch (TREE_CODE (type))
7680 {
7681 case RECORD_TYPE:
7682 case UNION_TYPE:
7683 case QUAL_UNION_TYPE:
7684 {
7685 tree field;
7686
7687 /* Walk all the structure fields. */
7688 for (field = TYPE_FIELDS (type);
7689 field;
7690 field = DECL_CHAIN (field))
7691 {
7692 if (TREE_CODE (field) == FIELD_DECL
7693 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7694 return true;
7695 }
7696 break;
7697 }
7698
7699 case ARRAY_TYPE:
7700 /* Just for use if some languages passes arrays by value. */
7701 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7702 return true;
7703 break;
7704
7705 default:
7706 gcc_unreachable ();
7707 }
7708 }
7709 else
7710 return TYPE_ALIGN (type) >= 128;
7711
7712 return false;
7713 }
7714
7715 /* Gives the alignment boundary, in bits, of an argument with the
7716 specified mode and type. */
7717
7718 static unsigned int
7719 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7720 {
7721 unsigned int align;
7722 if (type)
7723 {
7724 /* Since the main variant type is used for call, we convert it to
7725 the main variant type. */
7726 type = TYPE_MAIN_VARIANT (type);
7727 align = TYPE_ALIGN (type);
7728 }
7729 else
7730 align = GET_MODE_ALIGNMENT (mode);
7731 if (align < PARM_BOUNDARY)
7732 align = PARM_BOUNDARY;
7733 else
7734 {
7735 static bool warned;
7736 unsigned int saved_align = align;
7737
7738 if (!TARGET_64BIT)
7739 {
7740 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7741 if (!type)
7742 {
7743 if (mode == XFmode || mode == XCmode)
7744 align = PARM_BOUNDARY;
7745 }
7746 else if (!ix86_contains_aligned_value_p (type))
7747 align = PARM_BOUNDARY;
7748
7749 if (align < 128)
7750 align = PARM_BOUNDARY;
7751 }
7752
7753 if (warn_psabi
7754 && !warned
7755 && align != ix86_compat_function_arg_boundary (mode, type,
7756 saved_align))
7757 {
7758 warned = true;
7759 inform (input_location,
7760 "The ABI for passing parameters with %d-byte"
7761 " alignment has changed in GCC 4.6",
7762 align / BITS_PER_UNIT);
7763 }
7764 }
7765
7766 return align;
7767 }
7768
7769 /* Return true if N is a possible register number of function value. */
7770
7771 static bool
7772 ix86_function_value_regno_p (const unsigned int regno)
7773 {
7774 switch (regno)
7775 {
7776 case AX_REG:
7777 case DX_REG:
7778 return true;
7779 case DI_REG:
7780 case SI_REG:
7781 return TARGET_64BIT && ix86_abi != MS_ABI;
7782
7783 /* Complex values are returned in %st(0)/%st(1) pair. */
7784 case ST0_REG:
7785 case ST1_REG:
7786 /* TODO: The function should depend on current function ABI but
7787 builtins.c would need updating then. Therefore we use the
7788 default ABI. */
7789 if (TARGET_64BIT && ix86_abi == MS_ABI)
7790 return false;
7791 return TARGET_FLOAT_RETURNS_IN_80387;
7792
7793 /* Complex values are returned in %xmm0/%xmm1 pair. */
7794 case XMM0_REG:
7795 case XMM1_REG:
7796 return TARGET_SSE;
7797
7798 case MM0_REG:
7799 if (TARGET_MACHO || TARGET_64BIT)
7800 return false;
7801 return TARGET_MMX;
7802 }
7803
7804 return false;
7805 }
7806
7807 /* Define how to find the value returned by a function.
7808 VALTYPE is the data type of the value (as a tree).
7809 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7810 otherwise, FUNC is 0. */
7811
7812 static rtx
7813 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7814 const_tree fntype, const_tree fn)
7815 {
7816 unsigned int regno;
7817
7818 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7819 we normally prevent this case when mmx is not available. However
7820 some ABIs may require the result to be returned like DImode. */
7821 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7822 regno = FIRST_MMX_REG;
7823
7824 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7825 we prevent this case when sse is not available. However some ABIs
7826 may require the result to be returned like integer TImode. */
7827 else if (mode == TImode
7828 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7829 regno = FIRST_SSE_REG;
7830
7831 /* 32-byte vector modes in %ymm0. */
7832 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7833 regno = FIRST_SSE_REG;
7834
7835 /* 64-byte vector modes in %zmm0. */
7836 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7837 regno = FIRST_SSE_REG;
7838
7839 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7840 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7841 regno = FIRST_FLOAT_REG;
7842 else
7843 /* Most things go in %eax. */
7844 regno = AX_REG;
7845
7846 /* Override FP return register with %xmm0 for local functions when
7847 SSE math is enabled or for functions with sseregparm attribute. */
7848 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7849 {
7850 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7851 if ((sse_level >= 1 && mode == SFmode)
7852 || (sse_level == 2 && mode == DFmode))
7853 regno = FIRST_SSE_REG;
7854 }
7855
7856 /* OImode shouldn't be used directly. */
7857 gcc_assert (mode != OImode);
7858
7859 return gen_rtx_REG (orig_mode, regno);
7860 }
7861
7862 static rtx
7863 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7864 const_tree valtype)
7865 {
7866 rtx ret;
7867
7868 /* Handle libcalls, which don't provide a type node. */
7869 if (valtype == NULL)
7870 {
7871 unsigned int regno;
7872
7873 switch (mode)
7874 {
7875 case SFmode:
7876 case SCmode:
7877 case DFmode:
7878 case DCmode:
7879 case TFmode:
7880 case SDmode:
7881 case DDmode:
7882 case TDmode:
7883 regno = FIRST_SSE_REG;
7884 break;
7885 case XFmode:
7886 case XCmode:
7887 regno = FIRST_FLOAT_REG;
7888 break;
7889 case TCmode:
7890 return NULL;
7891 default:
7892 regno = AX_REG;
7893 }
7894
7895 return gen_rtx_REG (mode, regno);
7896 }
7897 else if (POINTER_TYPE_P (valtype))
7898 {
7899 /* Pointers are always returned in word_mode. */
7900 mode = word_mode;
7901 }
7902
7903 ret = construct_container (mode, orig_mode, valtype, 1,
7904 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7905 x86_64_int_return_registers, 0);
7906
7907 /* For zero sized structures, construct_container returns NULL, but we
7908 need to keep rest of compiler happy by returning meaningful value. */
7909 if (!ret)
7910 ret = gen_rtx_REG (orig_mode, AX_REG);
7911
7912 return ret;
7913 }
7914
7915 static rtx
7916 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7917 const_tree valtype)
7918 {
7919 unsigned int regno = AX_REG;
7920
7921 if (TARGET_SSE)
7922 {
7923 switch (GET_MODE_SIZE (mode))
7924 {
7925 case 16:
7926 if (valtype != NULL_TREE
7927 && !VECTOR_INTEGER_TYPE_P (valtype)
7928 && !VECTOR_INTEGER_TYPE_P (valtype)
7929 && !INTEGRAL_TYPE_P (valtype)
7930 && !VECTOR_FLOAT_TYPE_P (valtype))
7931 break;
7932 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7933 && !COMPLEX_MODE_P (mode))
7934 regno = FIRST_SSE_REG;
7935 break;
7936 case 8:
7937 case 4:
7938 if (mode == SFmode || mode == DFmode)
7939 regno = FIRST_SSE_REG;
7940 break;
7941 default:
7942 break;
7943 }
7944 }
7945 return gen_rtx_REG (orig_mode, regno);
7946 }
7947
7948 static rtx
7949 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7950 enum machine_mode orig_mode, enum machine_mode mode)
7951 {
7952 const_tree fn, fntype;
7953
7954 fn = NULL_TREE;
7955 if (fntype_or_decl && DECL_P (fntype_or_decl))
7956 fn = fntype_or_decl;
7957 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7958
7959 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7960 return function_value_ms_64 (orig_mode, mode, valtype);
7961 else if (TARGET_64BIT)
7962 return function_value_64 (orig_mode, mode, valtype);
7963 else
7964 return function_value_32 (orig_mode, mode, fntype, fn);
7965 }
7966
7967 static rtx
7968 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7969 bool outgoing ATTRIBUTE_UNUSED)
7970 {
7971 enum machine_mode mode, orig_mode;
7972
7973 orig_mode = TYPE_MODE (valtype);
7974 mode = type_natural_mode (valtype, NULL, true);
7975 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7976 }
7977
7978 /* Pointer function arguments and return values are promoted to
7979 word_mode. */
7980
7981 static enum machine_mode
7982 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7983 int *punsignedp, const_tree fntype,
7984 int for_return)
7985 {
7986 if (type != NULL_TREE && POINTER_TYPE_P (type))
7987 {
7988 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7989 return word_mode;
7990 }
7991 return default_promote_function_mode (type, mode, punsignedp, fntype,
7992 for_return);
7993 }
7994
7995 /* Return true if a structure, union or array with MODE containing FIELD
7996 should be accessed using BLKmode. */
7997
7998 static bool
7999 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8000 {
8001 /* Union with XFmode must be in BLKmode. */
8002 return (mode == XFmode
8003 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8004 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8005 }
8006
8007 rtx
8008 ix86_libcall_value (enum machine_mode mode)
8009 {
8010 return ix86_function_value_1 (NULL, NULL, mode, mode);
8011 }
8012
8013 /* Return true iff type is returned in memory. */
8014
8015 static bool
8016 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8017 {
8018 #ifdef SUBTARGET_RETURN_IN_MEMORY
8019 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8020 #else
8021 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8022 HOST_WIDE_INT size;
8023
8024 if (TARGET_64BIT)
8025 {
8026 if (ix86_function_type_abi (fntype) == MS_ABI)
8027 {
8028 size = int_size_in_bytes (type);
8029
8030 /* __m128 is returned in xmm0. */
8031 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8032 || INTEGRAL_TYPE_P (type)
8033 || VECTOR_FLOAT_TYPE_P (type))
8034 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8035 && !COMPLEX_MODE_P (mode)
8036 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8037 return false;
8038
8039 /* Otherwise, the size must be exactly in [1248]. */
8040 return size != 1 && size != 2 && size != 4 && size != 8;
8041 }
8042 else
8043 {
8044 int needed_intregs, needed_sseregs;
8045
8046 return examine_argument (mode, type, 1,
8047 &needed_intregs, &needed_sseregs);
8048 }
8049 }
8050 else
8051 {
8052 if (mode == BLKmode)
8053 return true;
8054
8055 size = int_size_in_bytes (type);
8056
8057 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8058 return false;
8059
8060 if (VECTOR_MODE_P (mode) || mode == TImode)
8061 {
8062 /* User-created vectors small enough to fit in EAX. */
8063 if (size < 8)
8064 return false;
8065
8066 /* Unless ABI prescibes otherwise,
8067 MMX/3dNow values are returned in MM0 if available. */
8068
8069 if (size == 8)
8070 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8071
8072 /* SSE values are returned in XMM0 if available. */
8073 if (size == 16)
8074 return !TARGET_SSE;
8075
8076 /* AVX values are returned in YMM0 if available. */
8077 if (size == 32)
8078 return !TARGET_AVX;
8079
8080 /* AVX512F values are returned in ZMM0 if available. */
8081 if (size == 64)
8082 return !TARGET_AVX512F;
8083 }
8084
8085 if (mode == XFmode)
8086 return false;
8087
8088 if (size > 12)
8089 return true;
8090
8091 /* OImode shouldn't be used directly. */
8092 gcc_assert (mode != OImode);
8093
8094 return false;
8095 }
8096 #endif
8097 }
8098
8099 \f
8100 /* Create the va_list data type. */
8101
8102 /* Returns the calling convention specific va_list date type.
8103 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8104
8105 static tree
8106 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8107 {
8108 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8109
8110 /* For i386 we use plain pointer to argument area. */
8111 if (!TARGET_64BIT || abi == MS_ABI)
8112 return build_pointer_type (char_type_node);
8113
8114 record = lang_hooks.types.make_type (RECORD_TYPE);
8115 type_decl = build_decl (BUILTINS_LOCATION,
8116 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8117
8118 f_gpr = build_decl (BUILTINS_LOCATION,
8119 FIELD_DECL, get_identifier ("gp_offset"),
8120 unsigned_type_node);
8121 f_fpr = build_decl (BUILTINS_LOCATION,
8122 FIELD_DECL, get_identifier ("fp_offset"),
8123 unsigned_type_node);
8124 f_ovf = build_decl (BUILTINS_LOCATION,
8125 FIELD_DECL, get_identifier ("overflow_arg_area"),
8126 ptr_type_node);
8127 f_sav = build_decl (BUILTINS_LOCATION,
8128 FIELD_DECL, get_identifier ("reg_save_area"),
8129 ptr_type_node);
8130
8131 va_list_gpr_counter_field = f_gpr;
8132 va_list_fpr_counter_field = f_fpr;
8133
8134 DECL_FIELD_CONTEXT (f_gpr) = record;
8135 DECL_FIELD_CONTEXT (f_fpr) = record;
8136 DECL_FIELD_CONTEXT (f_ovf) = record;
8137 DECL_FIELD_CONTEXT (f_sav) = record;
8138
8139 TYPE_STUB_DECL (record) = type_decl;
8140 TYPE_NAME (record) = type_decl;
8141 TYPE_FIELDS (record) = f_gpr;
8142 DECL_CHAIN (f_gpr) = f_fpr;
8143 DECL_CHAIN (f_fpr) = f_ovf;
8144 DECL_CHAIN (f_ovf) = f_sav;
8145
8146 layout_type (record);
8147
8148 /* The correct type is an array type of one element. */
8149 return build_array_type (record, build_index_type (size_zero_node));
8150 }
8151
8152 /* Setup the builtin va_list data type and for 64-bit the additional
8153 calling convention specific va_list data types. */
8154
8155 static tree
8156 ix86_build_builtin_va_list (void)
8157 {
8158 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8159
8160 /* Initialize abi specific va_list builtin types. */
8161 if (TARGET_64BIT)
8162 {
8163 tree t;
8164 if (ix86_abi == MS_ABI)
8165 {
8166 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8167 if (TREE_CODE (t) != RECORD_TYPE)
8168 t = build_variant_type_copy (t);
8169 sysv_va_list_type_node = t;
8170 }
8171 else
8172 {
8173 t = ret;
8174 if (TREE_CODE (t) != RECORD_TYPE)
8175 t = build_variant_type_copy (t);
8176 sysv_va_list_type_node = t;
8177 }
8178 if (ix86_abi != MS_ABI)
8179 {
8180 t = ix86_build_builtin_va_list_abi (MS_ABI);
8181 if (TREE_CODE (t) != RECORD_TYPE)
8182 t = build_variant_type_copy (t);
8183 ms_va_list_type_node = t;
8184 }
8185 else
8186 {
8187 t = ret;
8188 if (TREE_CODE (t) != RECORD_TYPE)
8189 t = build_variant_type_copy (t);
8190 ms_va_list_type_node = t;
8191 }
8192 }
8193
8194 return ret;
8195 }
8196
8197 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8198
8199 static void
8200 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8201 {
8202 rtx save_area, mem;
8203 alias_set_type set;
8204 int i, max;
8205
8206 /* GPR size of varargs save area. */
8207 if (cfun->va_list_gpr_size)
8208 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8209 else
8210 ix86_varargs_gpr_size = 0;
8211
8212 /* FPR size of varargs save area. We don't need it if we don't pass
8213 anything in SSE registers. */
8214 if (TARGET_SSE && cfun->va_list_fpr_size)
8215 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8216 else
8217 ix86_varargs_fpr_size = 0;
8218
8219 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8220 return;
8221
8222 save_area = frame_pointer_rtx;
8223 set = get_varargs_alias_set ();
8224
8225 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8226 if (max > X86_64_REGPARM_MAX)
8227 max = X86_64_REGPARM_MAX;
8228
8229 for (i = cum->regno; i < max; i++)
8230 {
8231 mem = gen_rtx_MEM (word_mode,
8232 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8233 MEM_NOTRAP_P (mem) = 1;
8234 set_mem_alias_set (mem, set);
8235 emit_move_insn (mem,
8236 gen_rtx_REG (word_mode,
8237 x86_64_int_parameter_registers[i]));
8238 }
8239
8240 if (ix86_varargs_fpr_size)
8241 {
8242 enum machine_mode smode;
8243 rtx label, test;
8244
8245 /* Now emit code to save SSE registers. The AX parameter contains number
8246 of SSE parameter registers used to call this function, though all we
8247 actually check here is the zero/non-zero status. */
8248
8249 label = gen_label_rtx ();
8250 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8251 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8252 label));
8253
8254 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8255 we used movdqa (i.e. TImode) instead? Perhaps even better would
8256 be if we could determine the real mode of the data, via a hook
8257 into pass_stdarg. Ignore all that for now. */
8258 smode = V4SFmode;
8259 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8260 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8261
8262 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8263 if (max > X86_64_SSE_REGPARM_MAX)
8264 max = X86_64_SSE_REGPARM_MAX;
8265
8266 for (i = cum->sse_regno; i < max; ++i)
8267 {
8268 mem = plus_constant (Pmode, save_area,
8269 i * 16 + ix86_varargs_gpr_size);
8270 mem = gen_rtx_MEM (smode, mem);
8271 MEM_NOTRAP_P (mem) = 1;
8272 set_mem_alias_set (mem, set);
8273 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8274
8275 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8276 }
8277
8278 emit_label (label);
8279 }
8280 }
8281
8282 static void
8283 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8284 {
8285 alias_set_type set = get_varargs_alias_set ();
8286 int i;
8287
8288 /* Reset to zero, as there might be a sysv vaarg used
8289 before. */
8290 ix86_varargs_gpr_size = 0;
8291 ix86_varargs_fpr_size = 0;
8292
8293 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8294 {
8295 rtx reg, mem;
8296
8297 mem = gen_rtx_MEM (Pmode,
8298 plus_constant (Pmode, virtual_incoming_args_rtx,
8299 i * UNITS_PER_WORD));
8300 MEM_NOTRAP_P (mem) = 1;
8301 set_mem_alias_set (mem, set);
8302
8303 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8304 emit_move_insn (mem, reg);
8305 }
8306 }
8307
8308 static void
8309 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8310 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8311 int no_rtl)
8312 {
8313 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8314 CUMULATIVE_ARGS next_cum;
8315 tree fntype;
8316
8317 /* This argument doesn't appear to be used anymore. Which is good,
8318 because the old code here didn't suppress rtl generation. */
8319 gcc_assert (!no_rtl);
8320
8321 if (!TARGET_64BIT)
8322 return;
8323
8324 fntype = TREE_TYPE (current_function_decl);
8325
8326 /* For varargs, we do not want to skip the dummy va_dcl argument.
8327 For stdargs, we do want to skip the last named argument. */
8328 next_cum = *cum;
8329 if (stdarg_p (fntype))
8330 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8331 true);
8332
8333 if (cum->call_abi == MS_ABI)
8334 setup_incoming_varargs_ms_64 (&next_cum);
8335 else
8336 setup_incoming_varargs_64 (&next_cum);
8337 }
8338
8339 /* Checks if TYPE is of kind va_list char *. */
8340
8341 static bool
8342 is_va_list_char_pointer (tree type)
8343 {
8344 tree canonic;
8345
8346 /* For 32-bit it is always true. */
8347 if (!TARGET_64BIT)
8348 return true;
8349 canonic = ix86_canonical_va_list_type (type);
8350 return (canonic == ms_va_list_type_node
8351 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8352 }
8353
8354 /* Implement va_start. */
8355
8356 static void
8357 ix86_va_start (tree valist, rtx nextarg)
8358 {
8359 HOST_WIDE_INT words, n_gpr, n_fpr;
8360 tree f_gpr, f_fpr, f_ovf, f_sav;
8361 tree gpr, fpr, ovf, sav, t;
8362 tree type;
8363 rtx ovf_rtx;
8364
8365 if (flag_split_stack
8366 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8367 {
8368 unsigned int scratch_regno;
8369
8370 /* When we are splitting the stack, we can't refer to the stack
8371 arguments using internal_arg_pointer, because they may be on
8372 the old stack. The split stack prologue will arrange to
8373 leave a pointer to the old stack arguments in a scratch
8374 register, which we here copy to a pseudo-register. The split
8375 stack prologue can't set the pseudo-register directly because
8376 it (the prologue) runs before any registers have been saved. */
8377
8378 scratch_regno = split_stack_prologue_scratch_regno ();
8379 if (scratch_regno != INVALID_REGNUM)
8380 {
8381 rtx reg, seq;
8382
8383 reg = gen_reg_rtx (Pmode);
8384 cfun->machine->split_stack_varargs_pointer = reg;
8385
8386 start_sequence ();
8387 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8388 seq = get_insns ();
8389 end_sequence ();
8390
8391 push_topmost_sequence ();
8392 emit_insn_after (seq, entry_of_function ());
8393 pop_topmost_sequence ();
8394 }
8395 }
8396
8397 /* Only 64bit target needs something special. */
8398 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8399 {
8400 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8401 std_expand_builtin_va_start (valist, nextarg);
8402 else
8403 {
8404 rtx va_r, next;
8405
8406 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8407 next = expand_binop (ptr_mode, add_optab,
8408 cfun->machine->split_stack_varargs_pointer,
8409 crtl->args.arg_offset_rtx,
8410 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8411 convert_move (va_r, next, 0);
8412 }
8413 return;
8414 }
8415
8416 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8417 f_fpr = DECL_CHAIN (f_gpr);
8418 f_ovf = DECL_CHAIN (f_fpr);
8419 f_sav = DECL_CHAIN (f_ovf);
8420
8421 valist = build_simple_mem_ref (valist);
8422 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8423 /* The following should be folded into the MEM_REF offset. */
8424 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8425 f_gpr, NULL_TREE);
8426 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8427 f_fpr, NULL_TREE);
8428 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8429 f_ovf, NULL_TREE);
8430 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8431 f_sav, NULL_TREE);
8432
8433 /* Count number of gp and fp argument registers used. */
8434 words = crtl->args.info.words;
8435 n_gpr = crtl->args.info.regno;
8436 n_fpr = crtl->args.info.sse_regno;
8437
8438 if (cfun->va_list_gpr_size)
8439 {
8440 type = TREE_TYPE (gpr);
8441 t = build2 (MODIFY_EXPR, type,
8442 gpr, build_int_cst (type, n_gpr * 8));
8443 TREE_SIDE_EFFECTS (t) = 1;
8444 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8445 }
8446
8447 if (TARGET_SSE && cfun->va_list_fpr_size)
8448 {
8449 type = TREE_TYPE (fpr);
8450 t = build2 (MODIFY_EXPR, type, fpr,
8451 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8452 TREE_SIDE_EFFECTS (t) = 1;
8453 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8454 }
8455
8456 /* Find the overflow area. */
8457 type = TREE_TYPE (ovf);
8458 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8459 ovf_rtx = crtl->args.internal_arg_pointer;
8460 else
8461 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8462 t = make_tree (type, ovf_rtx);
8463 if (words != 0)
8464 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8465 t = build2 (MODIFY_EXPR, type, ovf, t);
8466 TREE_SIDE_EFFECTS (t) = 1;
8467 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8468
8469 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8470 {
8471 /* Find the register save area.
8472 Prologue of the function save it right above stack frame. */
8473 type = TREE_TYPE (sav);
8474 t = make_tree (type, frame_pointer_rtx);
8475 if (!ix86_varargs_gpr_size)
8476 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8477 t = build2 (MODIFY_EXPR, type, sav, t);
8478 TREE_SIDE_EFFECTS (t) = 1;
8479 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8480 }
8481 }
8482
8483 /* Implement va_arg. */
8484
8485 static tree
8486 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8487 gimple_seq *post_p)
8488 {
8489 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8490 tree f_gpr, f_fpr, f_ovf, f_sav;
8491 tree gpr, fpr, ovf, sav, t;
8492 int size, rsize;
8493 tree lab_false, lab_over = NULL_TREE;
8494 tree addr, t2;
8495 rtx container;
8496 int indirect_p = 0;
8497 tree ptrtype;
8498 enum machine_mode nat_mode;
8499 unsigned int arg_boundary;
8500
8501 /* Only 64bit target needs something special. */
8502 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8503 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8504
8505 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8506 f_fpr = DECL_CHAIN (f_gpr);
8507 f_ovf = DECL_CHAIN (f_fpr);
8508 f_sav = DECL_CHAIN (f_ovf);
8509
8510 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8511 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8512 valist = build_va_arg_indirect_ref (valist);
8513 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8514 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8515 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8516
8517 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8518 if (indirect_p)
8519 type = build_pointer_type (type);
8520 size = int_size_in_bytes (type);
8521 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8522
8523 nat_mode = type_natural_mode (type, NULL, false);
8524 switch (nat_mode)
8525 {
8526 case V8SFmode:
8527 case V8SImode:
8528 case V32QImode:
8529 case V16HImode:
8530 case V4DFmode:
8531 case V4DImode:
8532 case V16SFmode:
8533 case V16SImode:
8534 case V64QImode:
8535 case V32HImode:
8536 case V8DFmode:
8537 case V8DImode:
8538 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8539 if (!TARGET_64BIT_MS_ABI)
8540 {
8541 container = NULL;
8542 break;
8543 }
8544
8545 default:
8546 container = construct_container (nat_mode, TYPE_MODE (type),
8547 type, 0, X86_64_REGPARM_MAX,
8548 X86_64_SSE_REGPARM_MAX, intreg,
8549 0);
8550 break;
8551 }
8552
8553 /* Pull the value out of the saved registers. */
8554
8555 addr = create_tmp_var (ptr_type_node, "addr");
8556
8557 if (container)
8558 {
8559 int needed_intregs, needed_sseregs;
8560 bool need_temp;
8561 tree int_addr, sse_addr;
8562
8563 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8564 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8565
8566 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8567
8568 need_temp = (!REG_P (container)
8569 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8570 || TYPE_ALIGN (type) > 128));
8571
8572 /* In case we are passing structure, verify that it is consecutive block
8573 on the register save area. If not we need to do moves. */
8574 if (!need_temp && !REG_P (container))
8575 {
8576 /* Verify that all registers are strictly consecutive */
8577 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8578 {
8579 int i;
8580
8581 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8582 {
8583 rtx slot = XVECEXP (container, 0, i);
8584 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8585 || INTVAL (XEXP (slot, 1)) != i * 16)
8586 need_temp = 1;
8587 }
8588 }
8589 else
8590 {
8591 int i;
8592
8593 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8594 {
8595 rtx slot = XVECEXP (container, 0, i);
8596 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8597 || INTVAL (XEXP (slot, 1)) != i * 8)
8598 need_temp = 1;
8599 }
8600 }
8601 }
8602 if (!need_temp)
8603 {
8604 int_addr = addr;
8605 sse_addr = addr;
8606 }
8607 else
8608 {
8609 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8610 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8611 }
8612
8613 /* First ensure that we fit completely in registers. */
8614 if (needed_intregs)
8615 {
8616 t = build_int_cst (TREE_TYPE (gpr),
8617 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8618 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8619 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8620 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8621 gimplify_and_add (t, pre_p);
8622 }
8623 if (needed_sseregs)
8624 {
8625 t = build_int_cst (TREE_TYPE (fpr),
8626 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8627 + X86_64_REGPARM_MAX * 8);
8628 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8629 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8630 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8631 gimplify_and_add (t, pre_p);
8632 }
8633
8634 /* Compute index to start of area used for integer regs. */
8635 if (needed_intregs)
8636 {
8637 /* int_addr = gpr + sav; */
8638 t = fold_build_pointer_plus (sav, gpr);
8639 gimplify_assign (int_addr, t, pre_p);
8640 }
8641 if (needed_sseregs)
8642 {
8643 /* sse_addr = fpr + sav; */
8644 t = fold_build_pointer_plus (sav, fpr);
8645 gimplify_assign (sse_addr, t, pre_p);
8646 }
8647 if (need_temp)
8648 {
8649 int i, prev_size = 0;
8650 tree temp = create_tmp_var (type, "va_arg_tmp");
8651
8652 /* addr = &temp; */
8653 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8654 gimplify_assign (addr, t, pre_p);
8655
8656 for (i = 0; i < XVECLEN (container, 0); i++)
8657 {
8658 rtx slot = XVECEXP (container, 0, i);
8659 rtx reg = XEXP (slot, 0);
8660 enum machine_mode mode = GET_MODE (reg);
8661 tree piece_type;
8662 tree addr_type;
8663 tree daddr_type;
8664 tree src_addr, src;
8665 int src_offset;
8666 tree dest_addr, dest;
8667 int cur_size = GET_MODE_SIZE (mode);
8668
8669 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8670 prev_size = INTVAL (XEXP (slot, 1));
8671 if (prev_size + cur_size > size)
8672 {
8673 cur_size = size - prev_size;
8674 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8675 if (mode == BLKmode)
8676 mode = QImode;
8677 }
8678 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8679 if (mode == GET_MODE (reg))
8680 addr_type = build_pointer_type (piece_type);
8681 else
8682 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8683 true);
8684 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8685 true);
8686
8687 if (SSE_REGNO_P (REGNO (reg)))
8688 {
8689 src_addr = sse_addr;
8690 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8691 }
8692 else
8693 {
8694 src_addr = int_addr;
8695 src_offset = REGNO (reg) * 8;
8696 }
8697 src_addr = fold_convert (addr_type, src_addr);
8698 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8699
8700 dest_addr = fold_convert (daddr_type, addr);
8701 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8702 if (cur_size == GET_MODE_SIZE (mode))
8703 {
8704 src = build_va_arg_indirect_ref (src_addr);
8705 dest = build_va_arg_indirect_ref (dest_addr);
8706
8707 gimplify_assign (dest, src, pre_p);
8708 }
8709 else
8710 {
8711 tree copy
8712 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8713 3, dest_addr, src_addr,
8714 size_int (cur_size));
8715 gimplify_and_add (copy, pre_p);
8716 }
8717 prev_size += cur_size;
8718 }
8719 }
8720
8721 if (needed_intregs)
8722 {
8723 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8724 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8725 gimplify_assign (gpr, t, pre_p);
8726 }
8727
8728 if (needed_sseregs)
8729 {
8730 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8731 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8732 gimplify_assign (fpr, t, pre_p);
8733 }
8734
8735 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8736
8737 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8738 }
8739
8740 /* ... otherwise out of the overflow area. */
8741
8742 /* When we align parameter on stack for caller, if the parameter
8743 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8744 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8745 here with caller. */
8746 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8747 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8748 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8749
8750 /* Care for on-stack alignment if needed. */
8751 if (arg_boundary <= 64 || size == 0)
8752 t = ovf;
8753 else
8754 {
8755 HOST_WIDE_INT align = arg_boundary / 8;
8756 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8757 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8758 build_int_cst (TREE_TYPE (t), -align));
8759 }
8760
8761 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8762 gimplify_assign (addr, t, pre_p);
8763
8764 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8765 gimplify_assign (unshare_expr (ovf), t, pre_p);
8766
8767 if (container)
8768 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8769
8770 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8771 addr = fold_convert (ptrtype, addr);
8772
8773 if (indirect_p)
8774 addr = build_va_arg_indirect_ref (addr);
8775 return build_va_arg_indirect_ref (addr);
8776 }
8777 \f
8778 /* Return true if OPNUM's MEM should be matched
8779 in movabs* patterns. */
8780
8781 bool
8782 ix86_check_movabs (rtx insn, int opnum)
8783 {
8784 rtx set, mem;
8785
8786 set = PATTERN (insn);
8787 if (GET_CODE (set) == PARALLEL)
8788 set = XVECEXP (set, 0, 0);
8789 gcc_assert (GET_CODE (set) == SET);
8790 mem = XEXP (set, opnum);
8791 while (GET_CODE (mem) == SUBREG)
8792 mem = SUBREG_REG (mem);
8793 gcc_assert (MEM_P (mem));
8794 return volatile_ok || !MEM_VOLATILE_P (mem);
8795 }
8796 \f
8797 /* Initialize the table of extra 80387 mathematical constants. */
8798
8799 static void
8800 init_ext_80387_constants (void)
8801 {
8802 static const char * cst[5] =
8803 {
8804 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8805 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8806 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8807 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8808 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8809 };
8810 int i;
8811
8812 for (i = 0; i < 5; i++)
8813 {
8814 real_from_string (&ext_80387_constants_table[i], cst[i]);
8815 /* Ensure each constant is rounded to XFmode precision. */
8816 real_convert (&ext_80387_constants_table[i],
8817 XFmode, &ext_80387_constants_table[i]);
8818 }
8819
8820 ext_80387_constants_init = 1;
8821 }
8822
8823 /* Return non-zero if the constant is something that
8824 can be loaded with a special instruction. */
8825
8826 int
8827 standard_80387_constant_p (rtx x)
8828 {
8829 enum machine_mode mode = GET_MODE (x);
8830
8831 REAL_VALUE_TYPE r;
8832
8833 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8834 return -1;
8835
8836 if (x == CONST0_RTX (mode))
8837 return 1;
8838 if (x == CONST1_RTX (mode))
8839 return 2;
8840
8841 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8842
8843 /* For XFmode constants, try to find a special 80387 instruction when
8844 optimizing for size or on those CPUs that benefit from them. */
8845 if (mode == XFmode
8846 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8847 {
8848 int i;
8849
8850 if (! ext_80387_constants_init)
8851 init_ext_80387_constants ();
8852
8853 for (i = 0; i < 5; i++)
8854 if (real_identical (&r, &ext_80387_constants_table[i]))
8855 return i + 3;
8856 }
8857
8858 /* Load of the constant -0.0 or -1.0 will be split as
8859 fldz;fchs or fld1;fchs sequence. */
8860 if (real_isnegzero (&r))
8861 return 8;
8862 if (real_identical (&r, &dconstm1))
8863 return 9;
8864
8865 return 0;
8866 }
8867
8868 /* Return the opcode of the special instruction to be used to load
8869 the constant X. */
8870
8871 const char *
8872 standard_80387_constant_opcode (rtx x)
8873 {
8874 switch (standard_80387_constant_p (x))
8875 {
8876 case 1:
8877 return "fldz";
8878 case 2:
8879 return "fld1";
8880 case 3:
8881 return "fldlg2";
8882 case 4:
8883 return "fldln2";
8884 case 5:
8885 return "fldl2e";
8886 case 6:
8887 return "fldl2t";
8888 case 7:
8889 return "fldpi";
8890 case 8:
8891 case 9:
8892 return "#";
8893 default:
8894 gcc_unreachable ();
8895 }
8896 }
8897
8898 /* Return the CONST_DOUBLE representing the 80387 constant that is
8899 loaded by the specified special instruction. The argument IDX
8900 matches the return value from standard_80387_constant_p. */
8901
8902 rtx
8903 standard_80387_constant_rtx (int idx)
8904 {
8905 int i;
8906
8907 if (! ext_80387_constants_init)
8908 init_ext_80387_constants ();
8909
8910 switch (idx)
8911 {
8912 case 3:
8913 case 4:
8914 case 5:
8915 case 6:
8916 case 7:
8917 i = idx - 3;
8918 break;
8919
8920 default:
8921 gcc_unreachable ();
8922 }
8923
8924 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8925 XFmode);
8926 }
8927
8928 /* Return 1 if X is all 0s and 2 if x is all 1s
8929 in supported SSE/AVX vector mode. */
8930
8931 int
8932 standard_sse_constant_p (rtx x)
8933 {
8934 enum machine_mode mode = GET_MODE (x);
8935
8936 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8937 return 1;
8938 if (vector_all_ones_operand (x, mode))
8939 switch (mode)
8940 {
8941 case V16QImode:
8942 case V8HImode:
8943 case V4SImode:
8944 case V2DImode:
8945 if (TARGET_SSE2)
8946 return 2;
8947 case V32QImode:
8948 case V16HImode:
8949 case V8SImode:
8950 case V4DImode:
8951 if (TARGET_AVX2)
8952 return 2;
8953 case V64QImode:
8954 case V32HImode:
8955 case V16SImode:
8956 case V8DImode:
8957 if (TARGET_AVX512F)
8958 return 2;
8959 default:
8960 break;
8961 }
8962
8963 return 0;
8964 }
8965
8966 /* Return the opcode of the special instruction to be used to load
8967 the constant X. */
8968
8969 const char *
8970 standard_sse_constant_opcode (rtx insn, rtx x)
8971 {
8972 switch (standard_sse_constant_p (x))
8973 {
8974 case 1:
8975 switch (get_attr_mode (insn))
8976 {
8977 case MODE_XI:
8978 case MODE_V16SF:
8979 return "vpxord\t%g0, %g0, %g0";
8980 case MODE_V8DF:
8981 return "vpxorq\t%g0, %g0, %g0";
8982 case MODE_TI:
8983 return "%vpxor\t%0, %d0";
8984 case MODE_V2DF:
8985 return "%vxorpd\t%0, %d0";
8986 case MODE_V4SF:
8987 return "%vxorps\t%0, %d0";
8988
8989 case MODE_OI:
8990 return "vpxor\t%x0, %x0, %x0";
8991 case MODE_V4DF:
8992 return "vxorpd\t%x0, %x0, %x0";
8993 case MODE_V8SF:
8994 return "vxorps\t%x0, %x0, %x0";
8995
8996 default:
8997 break;
8998 }
8999
9000 case 2:
9001 if (get_attr_mode (insn) == MODE_XI
9002 || get_attr_mode (insn) == MODE_V8DF
9003 || get_attr_mode (insn) == MODE_V16SF)
9004 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9005 if (TARGET_AVX)
9006 return "vpcmpeqd\t%0, %0, %0";
9007 else
9008 return "pcmpeqd\t%0, %0";
9009
9010 default:
9011 break;
9012 }
9013 gcc_unreachable ();
9014 }
9015
9016 /* Returns true if OP contains a symbol reference */
9017
9018 bool
9019 symbolic_reference_mentioned_p (rtx op)
9020 {
9021 const char *fmt;
9022 int i;
9023
9024 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9025 return true;
9026
9027 fmt = GET_RTX_FORMAT (GET_CODE (op));
9028 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9029 {
9030 if (fmt[i] == 'E')
9031 {
9032 int j;
9033
9034 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9035 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9036 return true;
9037 }
9038
9039 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9040 return true;
9041 }
9042
9043 return false;
9044 }
9045
9046 /* Return true if it is appropriate to emit `ret' instructions in the
9047 body of a function. Do this only if the epilogue is simple, needing a
9048 couple of insns. Prior to reloading, we can't tell how many registers
9049 must be saved, so return false then. Return false if there is no frame
9050 marker to de-allocate. */
9051
9052 bool
9053 ix86_can_use_return_insn_p (void)
9054 {
9055 struct ix86_frame frame;
9056
9057 if (! reload_completed || frame_pointer_needed)
9058 return 0;
9059
9060 /* Don't allow more than 32k pop, since that's all we can do
9061 with one instruction. */
9062 if (crtl->args.pops_args && crtl->args.size >= 32768)
9063 return 0;
9064
9065 ix86_compute_frame_layout (&frame);
9066 return (frame.stack_pointer_offset == UNITS_PER_WORD
9067 && (frame.nregs + frame.nsseregs) == 0);
9068 }
9069 \f
9070 /* Value should be nonzero if functions must have frame pointers.
9071 Zero means the frame pointer need not be set up (and parms may
9072 be accessed via the stack pointer) in functions that seem suitable. */
9073
9074 static bool
9075 ix86_frame_pointer_required (void)
9076 {
9077 /* If we accessed previous frames, then the generated code expects
9078 to be able to access the saved ebp value in our frame. */
9079 if (cfun->machine->accesses_prev_frame)
9080 return true;
9081
9082 /* Several x86 os'es need a frame pointer for other reasons,
9083 usually pertaining to setjmp. */
9084 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9085 return true;
9086
9087 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9088 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9089 return true;
9090
9091 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9092 allocation is 4GB. */
9093 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9094 return true;
9095
9096 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9097 turns off the frame pointer by default. Turn it back on now if
9098 we've not got a leaf function. */
9099 if (TARGET_OMIT_LEAF_FRAME_POINTER
9100 && (!crtl->is_leaf
9101 || ix86_current_function_calls_tls_descriptor))
9102 return true;
9103
9104 if (crtl->profile && !flag_fentry)
9105 return true;
9106
9107 return false;
9108 }
9109
9110 /* Record that the current function accesses previous call frames. */
9111
9112 void
9113 ix86_setup_frame_addresses (void)
9114 {
9115 cfun->machine->accesses_prev_frame = 1;
9116 }
9117 \f
9118 #ifndef USE_HIDDEN_LINKONCE
9119 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9120 # define USE_HIDDEN_LINKONCE 1
9121 # else
9122 # define USE_HIDDEN_LINKONCE 0
9123 # endif
9124 #endif
9125
9126 static int pic_labels_used;
9127
9128 /* Fills in the label name that should be used for a pc thunk for
9129 the given register. */
9130
9131 static void
9132 get_pc_thunk_name (char name[32], unsigned int regno)
9133 {
9134 gcc_assert (!TARGET_64BIT);
9135
9136 if (USE_HIDDEN_LINKONCE)
9137 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9138 else
9139 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9140 }
9141
9142
9143 /* This function generates code for -fpic that loads %ebx with
9144 the return address of the caller and then returns. */
9145
9146 static void
9147 ix86_code_end (void)
9148 {
9149 rtx xops[2];
9150 int regno;
9151
9152 for (regno = AX_REG; regno <= SP_REG; regno++)
9153 {
9154 char name[32];
9155 tree decl;
9156
9157 if (!(pic_labels_used & (1 << regno)))
9158 continue;
9159
9160 get_pc_thunk_name (name, regno);
9161
9162 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9163 get_identifier (name),
9164 build_function_type_list (void_type_node, NULL_TREE));
9165 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9166 NULL_TREE, void_type_node);
9167 TREE_PUBLIC (decl) = 1;
9168 TREE_STATIC (decl) = 1;
9169 DECL_IGNORED_P (decl) = 1;
9170
9171 #if TARGET_MACHO
9172 if (TARGET_MACHO)
9173 {
9174 switch_to_section (darwin_sections[text_coal_section]);
9175 fputs ("\t.weak_definition\t", asm_out_file);
9176 assemble_name (asm_out_file, name);
9177 fputs ("\n\t.private_extern\t", asm_out_file);
9178 assemble_name (asm_out_file, name);
9179 putc ('\n', asm_out_file);
9180 ASM_OUTPUT_LABEL (asm_out_file, name);
9181 DECL_WEAK (decl) = 1;
9182 }
9183 else
9184 #endif
9185 if (USE_HIDDEN_LINKONCE)
9186 {
9187 cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9188
9189 targetm.asm_out.unique_section (decl, 0);
9190 switch_to_section (get_named_section (decl, NULL, 0));
9191
9192 targetm.asm_out.globalize_label (asm_out_file, name);
9193 fputs ("\t.hidden\t", asm_out_file);
9194 assemble_name (asm_out_file, name);
9195 putc ('\n', asm_out_file);
9196 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9197 }
9198 else
9199 {
9200 switch_to_section (text_section);
9201 ASM_OUTPUT_LABEL (asm_out_file, name);
9202 }
9203
9204 DECL_INITIAL (decl) = make_node (BLOCK);
9205 current_function_decl = decl;
9206 init_function_start (decl);
9207 first_function_block_is_cold = false;
9208 /* Make sure unwind info is emitted for the thunk if needed. */
9209 final_start_function (emit_barrier (), asm_out_file, 1);
9210
9211 /* Pad stack IP move with 4 instructions (two NOPs count
9212 as one instruction). */
9213 if (TARGET_PAD_SHORT_FUNCTION)
9214 {
9215 int i = 8;
9216
9217 while (i--)
9218 fputs ("\tnop\n", asm_out_file);
9219 }
9220
9221 xops[0] = gen_rtx_REG (Pmode, regno);
9222 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9223 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9224 fputs ("\tret\n", asm_out_file);
9225 final_end_function ();
9226 init_insn_lengths ();
9227 free_after_compilation (cfun);
9228 set_cfun (NULL);
9229 current_function_decl = NULL;
9230 }
9231
9232 if (flag_split_stack)
9233 file_end_indicate_split_stack ();
9234 }
9235
9236 /* Emit code for the SET_GOT patterns. */
9237
9238 const char *
9239 output_set_got (rtx dest, rtx label)
9240 {
9241 rtx xops[3];
9242
9243 xops[0] = dest;
9244
9245 if (TARGET_VXWORKS_RTP && flag_pic)
9246 {
9247 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9248 xops[2] = gen_rtx_MEM (Pmode,
9249 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9250 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9251
9252 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9253 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9254 an unadorned address. */
9255 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9256 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9257 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9258 return "";
9259 }
9260
9261 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9262
9263 if (!flag_pic)
9264 {
9265 if (TARGET_MACHO)
9266 /* We don't need a pic base, we're not producing pic. */
9267 gcc_unreachable ();
9268
9269 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9270 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9271 targetm.asm_out.internal_label (asm_out_file, "L",
9272 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9273 }
9274 else
9275 {
9276 char name[32];
9277 get_pc_thunk_name (name, REGNO (dest));
9278 pic_labels_used |= 1 << REGNO (dest);
9279
9280 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9281 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9282 output_asm_insn ("call\t%X2", xops);
9283
9284 #if TARGET_MACHO
9285 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9286 This is what will be referenced by the Mach-O PIC subsystem. */
9287 if (machopic_should_output_picbase_label () || !label)
9288 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9289
9290 /* When we are restoring the pic base at the site of a nonlocal label,
9291 and we decided to emit the pic base above, we will still output a
9292 local label used for calculating the correction offset (even though
9293 the offset will be 0 in that case). */
9294 if (label)
9295 targetm.asm_out.internal_label (asm_out_file, "L",
9296 CODE_LABEL_NUMBER (label));
9297 #endif
9298 }
9299
9300 if (!TARGET_MACHO)
9301 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9302
9303 return "";
9304 }
9305
9306 /* Generate an "push" pattern for input ARG. */
9307
9308 static rtx
9309 gen_push (rtx arg)
9310 {
9311 struct machine_function *m = cfun->machine;
9312
9313 if (m->fs.cfa_reg == stack_pointer_rtx)
9314 m->fs.cfa_offset += UNITS_PER_WORD;
9315 m->fs.sp_offset += UNITS_PER_WORD;
9316
9317 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9318 arg = gen_rtx_REG (word_mode, REGNO (arg));
9319
9320 return gen_rtx_SET (VOIDmode,
9321 gen_rtx_MEM (word_mode,
9322 gen_rtx_PRE_DEC (Pmode,
9323 stack_pointer_rtx)),
9324 arg);
9325 }
9326
9327 /* Generate an "pop" pattern for input ARG. */
9328
9329 static rtx
9330 gen_pop (rtx arg)
9331 {
9332 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9333 arg = gen_rtx_REG (word_mode, REGNO (arg));
9334
9335 return gen_rtx_SET (VOIDmode,
9336 arg,
9337 gen_rtx_MEM (word_mode,
9338 gen_rtx_POST_INC (Pmode,
9339 stack_pointer_rtx)));
9340 }
9341
9342 /* Return >= 0 if there is an unused call-clobbered register available
9343 for the entire function. */
9344
9345 static unsigned int
9346 ix86_select_alt_pic_regnum (void)
9347 {
9348 if (crtl->is_leaf
9349 && !crtl->profile
9350 && !ix86_current_function_calls_tls_descriptor)
9351 {
9352 int i, drap;
9353 /* Can't use the same register for both PIC and DRAP. */
9354 if (crtl->drap_reg)
9355 drap = REGNO (crtl->drap_reg);
9356 else
9357 drap = -1;
9358 for (i = 2; i >= 0; --i)
9359 if (i != drap && !df_regs_ever_live_p (i))
9360 return i;
9361 }
9362
9363 return INVALID_REGNUM;
9364 }
9365
9366 /* Return TRUE if we need to save REGNO. */
9367
9368 static bool
9369 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9370 {
9371 if (pic_offset_table_rtx
9372 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9373 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9374 || crtl->profile
9375 || crtl->calls_eh_return
9376 || crtl->uses_const_pool
9377 || cfun->has_nonlocal_label))
9378 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9379
9380 if (crtl->calls_eh_return && maybe_eh_return)
9381 {
9382 unsigned i;
9383 for (i = 0; ; i++)
9384 {
9385 unsigned test = EH_RETURN_DATA_REGNO (i);
9386 if (test == INVALID_REGNUM)
9387 break;
9388 if (test == regno)
9389 return true;
9390 }
9391 }
9392
9393 if (crtl->drap_reg
9394 && regno == REGNO (crtl->drap_reg)
9395 && !cfun->machine->no_drap_save_restore)
9396 return true;
9397
9398 return (df_regs_ever_live_p (regno)
9399 && !call_used_regs[regno]
9400 && !fixed_regs[regno]
9401 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9402 }
9403
9404 /* Return number of saved general prupose registers. */
9405
9406 static int
9407 ix86_nsaved_regs (void)
9408 {
9409 int nregs = 0;
9410 int regno;
9411
9412 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9413 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9414 nregs ++;
9415 return nregs;
9416 }
9417
9418 /* Return number of saved SSE registrers. */
9419
9420 static int
9421 ix86_nsaved_sseregs (void)
9422 {
9423 int nregs = 0;
9424 int regno;
9425
9426 if (!TARGET_64BIT_MS_ABI)
9427 return 0;
9428 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9429 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9430 nregs ++;
9431 return nregs;
9432 }
9433
9434 /* Given FROM and TO register numbers, say whether this elimination is
9435 allowed. If stack alignment is needed, we can only replace argument
9436 pointer with hard frame pointer, or replace frame pointer with stack
9437 pointer. Otherwise, frame pointer elimination is automatically
9438 handled and all other eliminations are valid. */
9439
9440 static bool
9441 ix86_can_eliminate (const int from, const int to)
9442 {
9443 if (stack_realign_fp)
9444 return ((from == ARG_POINTER_REGNUM
9445 && to == HARD_FRAME_POINTER_REGNUM)
9446 || (from == FRAME_POINTER_REGNUM
9447 && to == STACK_POINTER_REGNUM));
9448 else
9449 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9450 }
9451
9452 /* Return the offset between two registers, one to be eliminated, and the other
9453 its replacement, at the start of a routine. */
9454
9455 HOST_WIDE_INT
9456 ix86_initial_elimination_offset (int from, int to)
9457 {
9458 struct ix86_frame frame;
9459 ix86_compute_frame_layout (&frame);
9460
9461 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9462 return frame.hard_frame_pointer_offset;
9463 else if (from == FRAME_POINTER_REGNUM
9464 && to == HARD_FRAME_POINTER_REGNUM)
9465 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9466 else
9467 {
9468 gcc_assert (to == STACK_POINTER_REGNUM);
9469
9470 if (from == ARG_POINTER_REGNUM)
9471 return frame.stack_pointer_offset;
9472
9473 gcc_assert (from == FRAME_POINTER_REGNUM);
9474 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9475 }
9476 }
9477
9478 /* In a dynamically-aligned function, we can't know the offset from
9479 stack pointer to frame pointer, so we must ensure that setjmp
9480 eliminates fp against the hard fp (%ebp) rather than trying to
9481 index from %esp up to the top of the frame across a gap that is
9482 of unknown (at compile-time) size. */
9483 static rtx
9484 ix86_builtin_setjmp_frame_value (void)
9485 {
9486 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9487 }
9488
9489 /* When using -fsplit-stack, the allocation routines set a field in
9490 the TCB to the bottom of the stack plus this much space, measured
9491 in bytes. */
9492
9493 #define SPLIT_STACK_AVAILABLE 256
9494
9495 /* Fill structure ix86_frame about frame of currently computed function. */
9496
9497 static void
9498 ix86_compute_frame_layout (struct ix86_frame *frame)
9499 {
9500 unsigned HOST_WIDE_INT stack_alignment_needed;
9501 HOST_WIDE_INT offset;
9502 unsigned HOST_WIDE_INT preferred_alignment;
9503 HOST_WIDE_INT size = get_frame_size ();
9504 HOST_WIDE_INT to_allocate;
9505
9506 frame->nregs = ix86_nsaved_regs ();
9507 frame->nsseregs = ix86_nsaved_sseregs ();
9508
9509 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9510 function prologues and leaf. */
9511 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9512 && (!crtl->is_leaf || cfun->calls_alloca != 0
9513 || ix86_current_function_calls_tls_descriptor))
9514 {
9515 crtl->preferred_stack_boundary = 128;
9516 crtl->stack_alignment_needed = 128;
9517 }
9518 /* preferred_stack_boundary is never updated for call
9519 expanded from tls descriptor. Update it here. We don't update it in
9520 expand stage because according to the comments before
9521 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9522 away. */
9523 else if (ix86_current_function_calls_tls_descriptor
9524 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9525 {
9526 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9527 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9528 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9529 }
9530
9531 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9532 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9533
9534 gcc_assert (!size || stack_alignment_needed);
9535 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9536 gcc_assert (preferred_alignment <= stack_alignment_needed);
9537
9538 /* For SEH we have to limit the amount of code movement into the prologue.
9539 At present we do this via a BLOCKAGE, at which point there's very little
9540 scheduling that can be done, which means that there's very little point
9541 in doing anything except PUSHs. */
9542 if (TARGET_SEH)
9543 cfun->machine->use_fast_prologue_epilogue = false;
9544
9545 /* During reload iteration the amount of registers saved can change.
9546 Recompute the value as needed. Do not recompute when amount of registers
9547 didn't change as reload does multiple calls to the function and does not
9548 expect the decision to change within single iteration. */
9549 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9550 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9551 {
9552 int count = frame->nregs;
9553 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9554
9555 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9556
9557 /* The fast prologue uses move instead of push to save registers. This
9558 is significantly longer, but also executes faster as modern hardware
9559 can execute the moves in parallel, but can't do that for push/pop.
9560
9561 Be careful about choosing what prologue to emit: When function takes
9562 many instructions to execute we may use slow version as well as in
9563 case function is known to be outside hot spot (this is known with
9564 feedback only). Weight the size of function by number of registers
9565 to save as it is cheap to use one or two push instructions but very
9566 slow to use many of them. */
9567 if (count)
9568 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9569 if (node->frequency < NODE_FREQUENCY_NORMAL
9570 || (flag_branch_probabilities
9571 && node->frequency < NODE_FREQUENCY_HOT))
9572 cfun->machine->use_fast_prologue_epilogue = false;
9573 else
9574 cfun->machine->use_fast_prologue_epilogue
9575 = !expensive_function_p (count);
9576 }
9577
9578 frame->save_regs_using_mov
9579 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9580 /* If static stack checking is enabled and done with probes,
9581 the registers need to be saved before allocating the frame. */
9582 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9583
9584 /* Skip return address. */
9585 offset = UNITS_PER_WORD;
9586
9587 /* Skip pushed static chain. */
9588 if (ix86_static_chain_on_stack)
9589 offset += UNITS_PER_WORD;
9590
9591 /* Skip saved base pointer. */
9592 if (frame_pointer_needed)
9593 offset += UNITS_PER_WORD;
9594 frame->hfp_save_offset = offset;
9595
9596 /* The traditional frame pointer location is at the top of the frame. */
9597 frame->hard_frame_pointer_offset = offset;
9598
9599 /* Register save area */
9600 offset += frame->nregs * UNITS_PER_WORD;
9601 frame->reg_save_offset = offset;
9602
9603 /* On SEH target, registers are pushed just before the frame pointer
9604 location. */
9605 if (TARGET_SEH)
9606 frame->hard_frame_pointer_offset = offset;
9607
9608 /* Align and set SSE register save area. */
9609 if (frame->nsseregs)
9610 {
9611 /* The only ABI that has saved SSE registers (Win64) also has a
9612 16-byte aligned default stack, and thus we don't need to be
9613 within the re-aligned local stack frame to save them. */
9614 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9615 offset = (offset + 16 - 1) & -16;
9616 offset += frame->nsseregs * 16;
9617 }
9618 frame->sse_reg_save_offset = offset;
9619
9620 /* The re-aligned stack starts here. Values before this point are not
9621 directly comparable with values below this point. In order to make
9622 sure that no value happens to be the same before and after, force
9623 the alignment computation below to add a non-zero value. */
9624 if (stack_realign_fp)
9625 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9626
9627 /* Va-arg area */
9628 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9629 offset += frame->va_arg_size;
9630
9631 /* Align start of frame for local function. */
9632 if (stack_realign_fp
9633 || offset != frame->sse_reg_save_offset
9634 || size != 0
9635 || !crtl->is_leaf
9636 || cfun->calls_alloca
9637 || ix86_current_function_calls_tls_descriptor)
9638 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9639
9640 /* Frame pointer points here. */
9641 frame->frame_pointer_offset = offset;
9642
9643 offset += size;
9644
9645 /* Add outgoing arguments area. Can be skipped if we eliminated
9646 all the function calls as dead code.
9647 Skipping is however impossible when function calls alloca. Alloca
9648 expander assumes that last crtl->outgoing_args_size
9649 of stack frame are unused. */
9650 if (ACCUMULATE_OUTGOING_ARGS
9651 && (!crtl->is_leaf || cfun->calls_alloca
9652 || ix86_current_function_calls_tls_descriptor))
9653 {
9654 offset += crtl->outgoing_args_size;
9655 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9656 }
9657 else
9658 frame->outgoing_arguments_size = 0;
9659
9660 /* Align stack boundary. Only needed if we're calling another function
9661 or using alloca. */
9662 if (!crtl->is_leaf || cfun->calls_alloca
9663 || ix86_current_function_calls_tls_descriptor)
9664 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9665
9666 /* We've reached end of stack frame. */
9667 frame->stack_pointer_offset = offset;
9668
9669 /* Size prologue needs to allocate. */
9670 to_allocate = offset - frame->sse_reg_save_offset;
9671
9672 if ((!to_allocate && frame->nregs <= 1)
9673 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9674 frame->save_regs_using_mov = false;
9675
9676 if (ix86_using_red_zone ()
9677 && crtl->sp_is_unchanging
9678 && crtl->is_leaf
9679 && !ix86_current_function_calls_tls_descriptor)
9680 {
9681 frame->red_zone_size = to_allocate;
9682 if (frame->save_regs_using_mov)
9683 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9684 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9685 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9686 }
9687 else
9688 frame->red_zone_size = 0;
9689 frame->stack_pointer_offset -= frame->red_zone_size;
9690
9691 /* The SEH frame pointer location is near the bottom of the frame.
9692 This is enforced by the fact that the difference between the
9693 stack pointer and the frame pointer is limited to 240 bytes in
9694 the unwind data structure. */
9695 if (TARGET_SEH)
9696 {
9697 HOST_WIDE_INT diff;
9698
9699 /* If we can leave the frame pointer where it is, do so. Also, returns
9700 the establisher frame for __builtin_frame_address (0). */
9701 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9702 if (diff <= SEH_MAX_FRAME_SIZE
9703 && (diff > 240 || (diff & 15) != 0)
9704 && !crtl->accesses_prior_frames)
9705 {
9706 /* Ideally we'd determine what portion of the local stack frame
9707 (within the constraint of the lowest 240) is most heavily used.
9708 But without that complication, simply bias the frame pointer
9709 by 128 bytes so as to maximize the amount of the local stack
9710 frame that is addressable with 8-bit offsets. */
9711 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9712 }
9713 }
9714 }
9715
9716 /* This is semi-inlined memory_address_length, but simplified
9717 since we know that we're always dealing with reg+offset, and
9718 to avoid having to create and discard all that rtl. */
9719
9720 static inline int
9721 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9722 {
9723 int len = 4;
9724
9725 if (offset == 0)
9726 {
9727 /* EBP and R13 cannot be encoded without an offset. */
9728 len = (regno == BP_REG || regno == R13_REG);
9729 }
9730 else if (IN_RANGE (offset, -128, 127))
9731 len = 1;
9732
9733 /* ESP and R12 must be encoded with a SIB byte. */
9734 if (regno == SP_REG || regno == R12_REG)
9735 len++;
9736
9737 return len;
9738 }
9739
9740 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9741 The valid base registers are taken from CFUN->MACHINE->FS. */
9742
9743 static rtx
9744 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9745 {
9746 const struct machine_function *m = cfun->machine;
9747 rtx base_reg = NULL;
9748 HOST_WIDE_INT base_offset = 0;
9749
9750 if (m->use_fast_prologue_epilogue)
9751 {
9752 /* Choose the base register most likely to allow the most scheduling
9753 opportunities. Generally FP is valid throughout the function,
9754 while DRAP must be reloaded within the epilogue. But choose either
9755 over the SP due to increased encoding size. */
9756
9757 if (m->fs.fp_valid)
9758 {
9759 base_reg = hard_frame_pointer_rtx;
9760 base_offset = m->fs.fp_offset - cfa_offset;
9761 }
9762 else if (m->fs.drap_valid)
9763 {
9764 base_reg = crtl->drap_reg;
9765 base_offset = 0 - cfa_offset;
9766 }
9767 else if (m->fs.sp_valid)
9768 {
9769 base_reg = stack_pointer_rtx;
9770 base_offset = m->fs.sp_offset - cfa_offset;
9771 }
9772 }
9773 else
9774 {
9775 HOST_WIDE_INT toffset;
9776 int len = 16, tlen;
9777
9778 /* Choose the base register with the smallest address encoding.
9779 With a tie, choose FP > DRAP > SP. */
9780 if (m->fs.sp_valid)
9781 {
9782 base_reg = stack_pointer_rtx;
9783 base_offset = m->fs.sp_offset - cfa_offset;
9784 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9785 }
9786 if (m->fs.drap_valid)
9787 {
9788 toffset = 0 - cfa_offset;
9789 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9790 if (tlen <= len)
9791 {
9792 base_reg = crtl->drap_reg;
9793 base_offset = toffset;
9794 len = tlen;
9795 }
9796 }
9797 if (m->fs.fp_valid)
9798 {
9799 toffset = m->fs.fp_offset - cfa_offset;
9800 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9801 if (tlen <= len)
9802 {
9803 base_reg = hard_frame_pointer_rtx;
9804 base_offset = toffset;
9805 len = tlen;
9806 }
9807 }
9808 }
9809 gcc_assert (base_reg != NULL);
9810
9811 return plus_constant (Pmode, base_reg, base_offset);
9812 }
9813
9814 /* Emit code to save registers in the prologue. */
9815
9816 static void
9817 ix86_emit_save_regs (void)
9818 {
9819 unsigned int regno;
9820 rtx insn;
9821
9822 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9823 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9824 {
9825 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9826 RTX_FRAME_RELATED_P (insn) = 1;
9827 }
9828 }
9829
9830 /* Emit a single register save at CFA - CFA_OFFSET. */
9831
9832 static void
9833 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9834 HOST_WIDE_INT cfa_offset)
9835 {
9836 struct machine_function *m = cfun->machine;
9837 rtx reg = gen_rtx_REG (mode, regno);
9838 rtx mem, addr, base, insn;
9839
9840 addr = choose_baseaddr (cfa_offset);
9841 mem = gen_frame_mem (mode, addr);
9842
9843 /* For SSE saves, we need to indicate the 128-bit alignment. */
9844 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9845
9846 insn = emit_move_insn (mem, reg);
9847 RTX_FRAME_RELATED_P (insn) = 1;
9848
9849 base = addr;
9850 if (GET_CODE (base) == PLUS)
9851 base = XEXP (base, 0);
9852 gcc_checking_assert (REG_P (base));
9853
9854 /* When saving registers into a re-aligned local stack frame, avoid
9855 any tricky guessing by dwarf2out. */
9856 if (m->fs.realigned)
9857 {
9858 gcc_checking_assert (stack_realign_drap);
9859
9860 if (regno == REGNO (crtl->drap_reg))
9861 {
9862 /* A bit of a hack. We force the DRAP register to be saved in
9863 the re-aligned stack frame, which provides us with a copy
9864 of the CFA that will last past the prologue. Install it. */
9865 gcc_checking_assert (cfun->machine->fs.fp_valid);
9866 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9867 cfun->machine->fs.fp_offset - cfa_offset);
9868 mem = gen_rtx_MEM (mode, addr);
9869 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9870 }
9871 else
9872 {
9873 /* The frame pointer is a stable reference within the
9874 aligned frame. Use it. */
9875 gcc_checking_assert (cfun->machine->fs.fp_valid);
9876 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9877 cfun->machine->fs.fp_offset - cfa_offset);
9878 mem = gen_rtx_MEM (mode, addr);
9879 add_reg_note (insn, REG_CFA_EXPRESSION,
9880 gen_rtx_SET (VOIDmode, mem, reg));
9881 }
9882 }
9883
9884 /* The memory may not be relative to the current CFA register,
9885 which means that we may need to generate a new pattern for
9886 use by the unwind info. */
9887 else if (base != m->fs.cfa_reg)
9888 {
9889 addr = plus_constant (Pmode, m->fs.cfa_reg,
9890 m->fs.cfa_offset - cfa_offset);
9891 mem = gen_rtx_MEM (mode, addr);
9892 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9893 }
9894 }
9895
9896 /* Emit code to save registers using MOV insns.
9897 First register is stored at CFA - CFA_OFFSET. */
9898 static void
9899 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9900 {
9901 unsigned int regno;
9902
9903 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9904 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9905 {
9906 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9907 cfa_offset -= UNITS_PER_WORD;
9908 }
9909 }
9910
9911 /* Emit code to save SSE registers using MOV insns.
9912 First register is stored at CFA - CFA_OFFSET. */
9913 static void
9914 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9915 {
9916 unsigned int regno;
9917
9918 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9919 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9920 {
9921 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9922 cfa_offset -= 16;
9923 }
9924 }
9925
9926 static GTY(()) rtx queued_cfa_restores;
9927
9928 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9929 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9930 Don't add the note if the previously saved value will be left untouched
9931 within stack red-zone till return, as unwinders can find the same value
9932 in the register and on the stack. */
9933
9934 static void
9935 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9936 {
9937 if (!crtl->shrink_wrapped
9938 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9939 return;
9940
9941 if (insn)
9942 {
9943 add_reg_note (insn, REG_CFA_RESTORE, reg);
9944 RTX_FRAME_RELATED_P (insn) = 1;
9945 }
9946 else
9947 queued_cfa_restores
9948 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9949 }
9950
9951 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9952
9953 static void
9954 ix86_add_queued_cfa_restore_notes (rtx insn)
9955 {
9956 rtx last;
9957 if (!queued_cfa_restores)
9958 return;
9959 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9960 ;
9961 XEXP (last, 1) = REG_NOTES (insn);
9962 REG_NOTES (insn) = queued_cfa_restores;
9963 queued_cfa_restores = NULL_RTX;
9964 RTX_FRAME_RELATED_P (insn) = 1;
9965 }
9966
9967 /* Expand prologue or epilogue stack adjustment.
9968 The pattern exist to put a dependency on all ebp-based memory accesses.
9969 STYLE should be negative if instructions should be marked as frame related,
9970 zero if %r11 register is live and cannot be freely used and positive
9971 otherwise. */
9972
9973 static void
9974 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9975 int style, bool set_cfa)
9976 {
9977 struct machine_function *m = cfun->machine;
9978 rtx insn;
9979 bool add_frame_related_expr = false;
9980
9981 if (Pmode == SImode)
9982 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9983 else if (x86_64_immediate_operand (offset, DImode))
9984 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9985 else
9986 {
9987 rtx tmp;
9988 /* r11 is used by indirect sibcall return as well, set before the
9989 epilogue and used after the epilogue. */
9990 if (style)
9991 tmp = gen_rtx_REG (DImode, R11_REG);
9992 else
9993 {
9994 gcc_assert (src != hard_frame_pointer_rtx
9995 && dest != hard_frame_pointer_rtx);
9996 tmp = hard_frame_pointer_rtx;
9997 }
9998 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9999 if (style < 0)
10000 add_frame_related_expr = true;
10001
10002 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10003 }
10004
10005 insn = emit_insn (insn);
10006 if (style >= 0)
10007 ix86_add_queued_cfa_restore_notes (insn);
10008
10009 if (set_cfa)
10010 {
10011 rtx r;
10012
10013 gcc_assert (m->fs.cfa_reg == src);
10014 m->fs.cfa_offset += INTVAL (offset);
10015 m->fs.cfa_reg = dest;
10016
10017 r = gen_rtx_PLUS (Pmode, src, offset);
10018 r = gen_rtx_SET (VOIDmode, dest, r);
10019 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10020 RTX_FRAME_RELATED_P (insn) = 1;
10021 }
10022 else if (style < 0)
10023 {
10024 RTX_FRAME_RELATED_P (insn) = 1;
10025 if (add_frame_related_expr)
10026 {
10027 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10028 r = gen_rtx_SET (VOIDmode, dest, r);
10029 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10030 }
10031 }
10032
10033 if (dest == stack_pointer_rtx)
10034 {
10035 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10036 bool valid = m->fs.sp_valid;
10037
10038 if (src == hard_frame_pointer_rtx)
10039 {
10040 valid = m->fs.fp_valid;
10041 ooffset = m->fs.fp_offset;
10042 }
10043 else if (src == crtl->drap_reg)
10044 {
10045 valid = m->fs.drap_valid;
10046 ooffset = 0;
10047 }
10048 else
10049 {
10050 /* Else there are two possibilities: SP itself, which we set
10051 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10052 taken care of this by hand along the eh_return path. */
10053 gcc_checking_assert (src == stack_pointer_rtx
10054 || offset == const0_rtx);
10055 }
10056
10057 m->fs.sp_offset = ooffset - INTVAL (offset);
10058 m->fs.sp_valid = valid;
10059 }
10060 }
10061
10062 /* Find an available register to be used as dynamic realign argument
10063 pointer regsiter. Such a register will be written in prologue and
10064 used in begin of body, so it must not be
10065 1. parameter passing register.
10066 2. GOT pointer.
10067 We reuse static-chain register if it is available. Otherwise, we
10068 use DI for i386 and R13 for x86-64. We chose R13 since it has
10069 shorter encoding.
10070
10071 Return: the regno of chosen register. */
10072
10073 static unsigned int
10074 find_drap_reg (void)
10075 {
10076 tree decl = cfun->decl;
10077
10078 if (TARGET_64BIT)
10079 {
10080 /* Use R13 for nested function or function need static chain.
10081 Since function with tail call may use any caller-saved
10082 registers in epilogue, DRAP must not use caller-saved
10083 register in such case. */
10084 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10085 return R13_REG;
10086
10087 return R10_REG;
10088 }
10089 else
10090 {
10091 /* Use DI for nested function or function need static chain.
10092 Since function with tail call may use any caller-saved
10093 registers in epilogue, DRAP must not use caller-saved
10094 register in such case. */
10095 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10096 return DI_REG;
10097
10098 /* Reuse static chain register if it isn't used for parameter
10099 passing. */
10100 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10101 {
10102 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10103 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10104 return CX_REG;
10105 }
10106 return DI_REG;
10107 }
10108 }
10109
10110 /* Return minimum incoming stack alignment. */
10111
10112 static unsigned int
10113 ix86_minimum_incoming_stack_boundary (bool sibcall)
10114 {
10115 unsigned int incoming_stack_boundary;
10116
10117 /* Prefer the one specified at command line. */
10118 if (ix86_user_incoming_stack_boundary)
10119 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10120 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10121 if -mstackrealign is used, it isn't used for sibcall check and
10122 estimated stack alignment is 128bit. */
10123 else if (!sibcall
10124 && !TARGET_64BIT
10125 && ix86_force_align_arg_pointer
10126 && crtl->stack_alignment_estimated == 128)
10127 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10128 else
10129 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10130
10131 /* Incoming stack alignment can be changed on individual functions
10132 via force_align_arg_pointer attribute. We use the smallest
10133 incoming stack boundary. */
10134 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10135 && lookup_attribute (ix86_force_align_arg_pointer_string,
10136 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10137 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10138
10139 /* The incoming stack frame has to be aligned at least at
10140 parm_stack_boundary. */
10141 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10142 incoming_stack_boundary = crtl->parm_stack_boundary;
10143
10144 /* Stack at entrance of main is aligned by runtime. We use the
10145 smallest incoming stack boundary. */
10146 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10147 && DECL_NAME (current_function_decl)
10148 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10149 && DECL_FILE_SCOPE_P (current_function_decl))
10150 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10151
10152 return incoming_stack_boundary;
10153 }
10154
10155 /* Update incoming stack boundary and estimated stack alignment. */
10156
10157 static void
10158 ix86_update_stack_boundary (void)
10159 {
10160 ix86_incoming_stack_boundary
10161 = ix86_minimum_incoming_stack_boundary (false);
10162
10163 /* x86_64 vararg needs 16byte stack alignment for register save
10164 area. */
10165 if (TARGET_64BIT
10166 && cfun->stdarg
10167 && crtl->stack_alignment_estimated < 128)
10168 crtl->stack_alignment_estimated = 128;
10169 }
10170
10171 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10172 needed or an rtx for DRAP otherwise. */
10173
10174 static rtx
10175 ix86_get_drap_rtx (void)
10176 {
10177 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10178 crtl->need_drap = true;
10179
10180 if (stack_realign_drap)
10181 {
10182 /* Assign DRAP to vDRAP and returns vDRAP */
10183 unsigned int regno = find_drap_reg ();
10184 rtx drap_vreg;
10185 rtx arg_ptr;
10186 rtx seq, insn;
10187
10188 arg_ptr = gen_rtx_REG (Pmode, regno);
10189 crtl->drap_reg = arg_ptr;
10190
10191 start_sequence ();
10192 drap_vreg = copy_to_reg (arg_ptr);
10193 seq = get_insns ();
10194 end_sequence ();
10195
10196 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10197 if (!optimize)
10198 {
10199 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10200 RTX_FRAME_RELATED_P (insn) = 1;
10201 }
10202 return drap_vreg;
10203 }
10204 else
10205 return NULL;
10206 }
10207
10208 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10209
10210 static rtx
10211 ix86_internal_arg_pointer (void)
10212 {
10213 return virtual_incoming_args_rtx;
10214 }
10215
10216 struct scratch_reg {
10217 rtx reg;
10218 bool saved;
10219 };
10220
10221 /* Return a short-lived scratch register for use on function entry.
10222 In 32-bit mode, it is valid only after the registers are saved
10223 in the prologue. This register must be released by means of
10224 release_scratch_register_on_entry once it is dead. */
10225
10226 static void
10227 get_scratch_register_on_entry (struct scratch_reg *sr)
10228 {
10229 int regno;
10230
10231 sr->saved = false;
10232
10233 if (TARGET_64BIT)
10234 {
10235 /* We always use R11 in 64-bit mode. */
10236 regno = R11_REG;
10237 }
10238 else
10239 {
10240 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10241 bool fastcall_p
10242 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10243 bool thiscall_p
10244 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10245 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10246 int regparm = ix86_function_regparm (fntype, decl);
10247 int drap_regno
10248 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10249
10250 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10251 for the static chain register. */
10252 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10253 && drap_regno != AX_REG)
10254 regno = AX_REG;
10255 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10256 for the static chain register. */
10257 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10258 regno = AX_REG;
10259 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10260 regno = DX_REG;
10261 /* ecx is the static chain register. */
10262 else if (regparm < 3 && !fastcall_p && !thiscall_p
10263 && !static_chain_p
10264 && drap_regno != CX_REG)
10265 regno = CX_REG;
10266 else if (ix86_save_reg (BX_REG, true))
10267 regno = BX_REG;
10268 /* esi is the static chain register. */
10269 else if (!(regparm == 3 && static_chain_p)
10270 && ix86_save_reg (SI_REG, true))
10271 regno = SI_REG;
10272 else if (ix86_save_reg (DI_REG, true))
10273 regno = DI_REG;
10274 else
10275 {
10276 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10277 sr->saved = true;
10278 }
10279 }
10280
10281 sr->reg = gen_rtx_REG (Pmode, regno);
10282 if (sr->saved)
10283 {
10284 rtx insn = emit_insn (gen_push (sr->reg));
10285 RTX_FRAME_RELATED_P (insn) = 1;
10286 }
10287 }
10288
10289 /* Release a scratch register obtained from the preceding function. */
10290
10291 static void
10292 release_scratch_register_on_entry (struct scratch_reg *sr)
10293 {
10294 if (sr->saved)
10295 {
10296 struct machine_function *m = cfun->machine;
10297 rtx x, insn = emit_insn (gen_pop (sr->reg));
10298
10299 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10300 RTX_FRAME_RELATED_P (insn) = 1;
10301 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10302 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10303 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10304 m->fs.sp_offset -= UNITS_PER_WORD;
10305 }
10306 }
10307
10308 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10309
10310 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10311
10312 static void
10313 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10314 {
10315 /* We skip the probe for the first interval + a small dope of 4 words and
10316 probe that many bytes past the specified size to maintain a protection
10317 area at the botton of the stack. */
10318 const int dope = 4 * UNITS_PER_WORD;
10319 rtx size_rtx = GEN_INT (size), last;
10320
10321 /* See if we have a constant small number of probes to generate. If so,
10322 that's the easy case. The run-time loop is made up of 11 insns in the
10323 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10324 for n # of intervals. */
10325 if (size <= 5 * PROBE_INTERVAL)
10326 {
10327 HOST_WIDE_INT i, adjust;
10328 bool first_probe = true;
10329
10330 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10331 values of N from 1 until it exceeds SIZE. If only one probe is
10332 needed, this will not generate any code. Then adjust and probe
10333 to PROBE_INTERVAL + SIZE. */
10334 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10335 {
10336 if (first_probe)
10337 {
10338 adjust = 2 * PROBE_INTERVAL + dope;
10339 first_probe = false;
10340 }
10341 else
10342 adjust = PROBE_INTERVAL;
10343
10344 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10345 plus_constant (Pmode, stack_pointer_rtx,
10346 -adjust)));
10347 emit_stack_probe (stack_pointer_rtx);
10348 }
10349
10350 if (first_probe)
10351 adjust = size + PROBE_INTERVAL + dope;
10352 else
10353 adjust = size + PROBE_INTERVAL - i;
10354
10355 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10356 plus_constant (Pmode, stack_pointer_rtx,
10357 -adjust)));
10358 emit_stack_probe (stack_pointer_rtx);
10359
10360 /* Adjust back to account for the additional first interval. */
10361 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10362 plus_constant (Pmode, stack_pointer_rtx,
10363 PROBE_INTERVAL + dope)));
10364 }
10365
10366 /* Otherwise, do the same as above, but in a loop. Note that we must be
10367 extra careful with variables wrapping around because we might be at
10368 the very top (or the very bottom) of the address space and we have
10369 to be able to handle this case properly; in particular, we use an
10370 equality test for the loop condition. */
10371 else
10372 {
10373 HOST_WIDE_INT rounded_size;
10374 struct scratch_reg sr;
10375
10376 get_scratch_register_on_entry (&sr);
10377
10378
10379 /* Step 1: round SIZE to the previous multiple of the interval. */
10380
10381 rounded_size = size & -PROBE_INTERVAL;
10382
10383
10384 /* Step 2: compute initial and final value of the loop counter. */
10385
10386 /* SP = SP_0 + PROBE_INTERVAL. */
10387 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10388 plus_constant (Pmode, stack_pointer_rtx,
10389 - (PROBE_INTERVAL + dope))));
10390
10391 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10392 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10393 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10394 gen_rtx_PLUS (Pmode, sr.reg,
10395 stack_pointer_rtx)));
10396
10397
10398 /* Step 3: the loop
10399
10400 while (SP != LAST_ADDR)
10401 {
10402 SP = SP + PROBE_INTERVAL
10403 probe at SP
10404 }
10405
10406 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10407 values of N from 1 until it is equal to ROUNDED_SIZE. */
10408
10409 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10410
10411
10412 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10413 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10414
10415 if (size != rounded_size)
10416 {
10417 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10418 plus_constant (Pmode, stack_pointer_rtx,
10419 rounded_size - size)));
10420 emit_stack_probe (stack_pointer_rtx);
10421 }
10422
10423 /* Adjust back to account for the additional first interval. */
10424 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10425 plus_constant (Pmode, stack_pointer_rtx,
10426 PROBE_INTERVAL + dope)));
10427
10428 release_scratch_register_on_entry (&sr);
10429 }
10430
10431 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10432
10433 /* Even if the stack pointer isn't the CFA register, we need to correctly
10434 describe the adjustments made to it, in particular differentiate the
10435 frame-related ones from the frame-unrelated ones. */
10436 if (size > 0)
10437 {
10438 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10439 XVECEXP (expr, 0, 0)
10440 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10441 plus_constant (Pmode, stack_pointer_rtx, -size));
10442 XVECEXP (expr, 0, 1)
10443 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10444 plus_constant (Pmode, stack_pointer_rtx,
10445 PROBE_INTERVAL + dope + size));
10446 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10447 RTX_FRAME_RELATED_P (last) = 1;
10448
10449 cfun->machine->fs.sp_offset += size;
10450 }
10451
10452 /* Make sure nothing is scheduled before we are done. */
10453 emit_insn (gen_blockage ());
10454 }
10455
10456 /* Adjust the stack pointer up to REG while probing it. */
10457
10458 const char *
10459 output_adjust_stack_and_probe (rtx reg)
10460 {
10461 static int labelno = 0;
10462 char loop_lab[32], end_lab[32];
10463 rtx xops[2];
10464
10465 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10466 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10467
10468 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10469
10470 /* Jump to END_LAB if SP == LAST_ADDR. */
10471 xops[0] = stack_pointer_rtx;
10472 xops[1] = reg;
10473 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10474 fputs ("\tje\t", asm_out_file);
10475 assemble_name_raw (asm_out_file, end_lab);
10476 fputc ('\n', asm_out_file);
10477
10478 /* SP = SP + PROBE_INTERVAL. */
10479 xops[1] = GEN_INT (PROBE_INTERVAL);
10480 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10481
10482 /* Probe at SP. */
10483 xops[1] = const0_rtx;
10484 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10485
10486 fprintf (asm_out_file, "\tjmp\t");
10487 assemble_name_raw (asm_out_file, loop_lab);
10488 fputc ('\n', asm_out_file);
10489
10490 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10491
10492 return "";
10493 }
10494
10495 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10496 inclusive. These are offsets from the current stack pointer. */
10497
10498 static void
10499 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10500 {
10501 /* See if we have a constant small number of probes to generate. If so,
10502 that's the easy case. The run-time loop is made up of 7 insns in the
10503 generic case while the compile-time loop is made up of n insns for n #
10504 of intervals. */
10505 if (size <= 7 * PROBE_INTERVAL)
10506 {
10507 HOST_WIDE_INT i;
10508
10509 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10510 it exceeds SIZE. If only one probe is needed, this will not
10511 generate any code. Then probe at FIRST + SIZE. */
10512 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10513 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10514 -(first + i)));
10515
10516 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10517 -(first + size)));
10518 }
10519
10520 /* Otherwise, do the same as above, but in a loop. Note that we must be
10521 extra careful with variables wrapping around because we might be at
10522 the very top (or the very bottom) of the address space and we have
10523 to be able to handle this case properly; in particular, we use an
10524 equality test for the loop condition. */
10525 else
10526 {
10527 HOST_WIDE_INT rounded_size, last;
10528 struct scratch_reg sr;
10529
10530 get_scratch_register_on_entry (&sr);
10531
10532
10533 /* Step 1: round SIZE to the previous multiple of the interval. */
10534
10535 rounded_size = size & -PROBE_INTERVAL;
10536
10537
10538 /* Step 2: compute initial and final value of the loop counter. */
10539
10540 /* TEST_OFFSET = FIRST. */
10541 emit_move_insn (sr.reg, GEN_INT (-first));
10542
10543 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10544 last = first + rounded_size;
10545
10546
10547 /* Step 3: the loop
10548
10549 while (TEST_ADDR != LAST_ADDR)
10550 {
10551 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10552 probe at TEST_ADDR
10553 }
10554
10555 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10556 until it is equal to ROUNDED_SIZE. */
10557
10558 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10559
10560
10561 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10562 that SIZE is equal to ROUNDED_SIZE. */
10563
10564 if (size != rounded_size)
10565 emit_stack_probe (plus_constant (Pmode,
10566 gen_rtx_PLUS (Pmode,
10567 stack_pointer_rtx,
10568 sr.reg),
10569 rounded_size - size));
10570
10571 release_scratch_register_on_entry (&sr);
10572 }
10573
10574 /* Make sure nothing is scheduled before we are done. */
10575 emit_insn (gen_blockage ());
10576 }
10577
10578 /* Probe a range of stack addresses from REG to END, inclusive. These are
10579 offsets from the current stack pointer. */
10580
10581 const char *
10582 output_probe_stack_range (rtx reg, rtx end)
10583 {
10584 static int labelno = 0;
10585 char loop_lab[32], end_lab[32];
10586 rtx xops[3];
10587
10588 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10589 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10590
10591 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10592
10593 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10594 xops[0] = reg;
10595 xops[1] = end;
10596 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10597 fputs ("\tje\t", asm_out_file);
10598 assemble_name_raw (asm_out_file, end_lab);
10599 fputc ('\n', asm_out_file);
10600
10601 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10602 xops[1] = GEN_INT (PROBE_INTERVAL);
10603 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10604
10605 /* Probe at TEST_ADDR. */
10606 xops[0] = stack_pointer_rtx;
10607 xops[1] = reg;
10608 xops[2] = const0_rtx;
10609 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10610
10611 fprintf (asm_out_file, "\tjmp\t");
10612 assemble_name_raw (asm_out_file, loop_lab);
10613 fputc ('\n', asm_out_file);
10614
10615 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10616
10617 return "";
10618 }
10619
10620 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10621 to be generated in correct form. */
10622 static void
10623 ix86_finalize_stack_realign_flags (void)
10624 {
10625 /* Check if stack realign is really needed after reload, and
10626 stores result in cfun */
10627 unsigned int incoming_stack_boundary
10628 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10629 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10630 unsigned int stack_realign = (incoming_stack_boundary
10631 < (crtl->is_leaf
10632 ? crtl->max_used_stack_slot_alignment
10633 : crtl->stack_alignment_needed));
10634
10635 if (crtl->stack_realign_finalized)
10636 {
10637 /* After stack_realign_needed is finalized, we can't no longer
10638 change it. */
10639 gcc_assert (crtl->stack_realign_needed == stack_realign);
10640 return;
10641 }
10642
10643 /* If the only reason for frame_pointer_needed is that we conservatively
10644 assumed stack realignment might be needed, but in the end nothing that
10645 needed the stack alignment had been spilled, clear frame_pointer_needed
10646 and say we don't need stack realignment. */
10647 if (stack_realign
10648 && frame_pointer_needed
10649 && crtl->is_leaf
10650 && flag_omit_frame_pointer
10651 && crtl->sp_is_unchanging
10652 && !ix86_current_function_calls_tls_descriptor
10653 && !crtl->accesses_prior_frames
10654 && !cfun->calls_alloca
10655 && !crtl->calls_eh_return
10656 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10657 && !ix86_frame_pointer_required ()
10658 && get_frame_size () == 0
10659 && ix86_nsaved_sseregs () == 0
10660 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10661 {
10662 HARD_REG_SET set_up_by_prologue, prologue_used;
10663 basic_block bb;
10664
10665 CLEAR_HARD_REG_SET (prologue_used);
10666 CLEAR_HARD_REG_SET (set_up_by_prologue);
10667 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10668 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10669 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10670 HARD_FRAME_POINTER_REGNUM);
10671 FOR_EACH_BB_FN (bb, cfun)
10672 {
10673 rtx insn;
10674 FOR_BB_INSNS (bb, insn)
10675 if (NONDEBUG_INSN_P (insn)
10676 && requires_stack_frame_p (insn, prologue_used,
10677 set_up_by_prologue))
10678 {
10679 crtl->stack_realign_needed = stack_realign;
10680 crtl->stack_realign_finalized = true;
10681 return;
10682 }
10683 }
10684
10685 /* If drap has been set, but it actually isn't live at the start
10686 of the function, there is no reason to set it up. */
10687 if (crtl->drap_reg)
10688 {
10689 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10690 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10691 {
10692 crtl->drap_reg = NULL_RTX;
10693 crtl->need_drap = false;
10694 }
10695 }
10696 else
10697 cfun->machine->no_drap_save_restore = true;
10698
10699 frame_pointer_needed = false;
10700 stack_realign = false;
10701 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10702 crtl->stack_alignment_needed = incoming_stack_boundary;
10703 crtl->stack_alignment_estimated = incoming_stack_boundary;
10704 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10705 crtl->preferred_stack_boundary = incoming_stack_boundary;
10706 df_finish_pass (true);
10707 df_scan_alloc (NULL);
10708 df_scan_blocks ();
10709 df_compute_regs_ever_live (true);
10710 df_analyze ();
10711 }
10712
10713 crtl->stack_realign_needed = stack_realign;
10714 crtl->stack_realign_finalized = true;
10715 }
10716
10717 /* Expand the prologue into a bunch of separate insns. */
10718
10719 void
10720 ix86_expand_prologue (void)
10721 {
10722 struct machine_function *m = cfun->machine;
10723 rtx insn, t;
10724 bool pic_reg_used;
10725 struct ix86_frame frame;
10726 HOST_WIDE_INT allocate;
10727 bool int_registers_saved;
10728 bool sse_registers_saved;
10729
10730 ix86_finalize_stack_realign_flags ();
10731
10732 /* DRAP should not coexist with stack_realign_fp */
10733 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10734
10735 memset (&m->fs, 0, sizeof (m->fs));
10736
10737 /* Initialize CFA state for before the prologue. */
10738 m->fs.cfa_reg = stack_pointer_rtx;
10739 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10740
10741 /* Track SP offset to the CFA. We continue tracking this after we've
10742 swapped the CFA register away from SP. In the case of re-alignment
10743 this is fudged; we're interested to offsets within the local frame. */
10744 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10745 m->fs.sp_valid = true;
10746
10747 ix86_compute_frame_layout (&frame);
10748
10749 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10750 {
10751 /* We should have already generated an error for any use of
10752 ms_hook on a nested function. */
10753 gcc_checking_assert (!ix86_static_chain_on_stack);
10754
10755 /* Check if profiling is active and we shall use profiling before
10756 prologue variant. If so sorry. */
10757 if (crtl->profile && flag_fentry != 0)
10758 sorry ("ms_hook_prologue attribute isn%'t compatible "
10759 "with -mfentry for 32-bit");
10760
10761 /* In ix86_asm_output_function_label we emitted:
10762 8b ff movl.s %edi,%edi
10763 55 push %ebp
10764 8b ec movl.s %esp,%ebp
10765
10766 This matches the hookable function prologue in Win32 API
10767 functions in Microsoft Windows XP Service Pack 2 and newer.
10768 Wine uses this to enable Windows apps to hook the Win32 API
10769 functions provided by Wine.
10770
10771 What that means is that we've already set up the frame pointer. */
10772
10773 if (frame_pointer_needed
10774 && !(crtl->drap_reg && crtl->stack_realign_needed))
10775 {
10776 rtx push, mov;
10777
10778 /* We've decided to use the frame pointer already set up.
10779 Describe this to the unwinder by pretending that both
10780 push and mov insns happen right here.
10781
10782 Putting the unwind info here at the end of the ms_hook
10783 is done so that we can make absolutely certain we get
10784 the required byte sequence at the start of the function,
10785 rather than relying on an assembler that can produce
10786 the exact encoding required.
10787
10788 However it does mean (in the unpatched case) that we have
10789 a 1 insn window where the asynchronous unwind info is
10790 incorrect. However, if we placed the unwind info at
10791 its correct location we would have incorrect unwind info
10792 in the patched case. Which is probably all moot since
10793 I don't expect Wine generates dwarf2 unwind info for the
10794 system libraries that use this feature. */
10795
10796 insn = emit_insn (gen_blockage ());
10797
10798 push = gen_push (hard_frame_pointer_rtx);
10799 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10800 stack_pointer_rtx);
10801 RTX_FRAME_RELATED_P (push) = 1;
10802 RTX_FRAME_RELATED_P (mov) = 1;
10803
10804 RTX_FRAME_RELATED_P (insn) = 1;
10805 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10806 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10807
10808 /* Note that gen_push incremented m->fs.cfa_offset, even
10809 though we didn't emit the push insn here. */
10810 m->fs.cfa_reg = hard_frame_pointer_rtx;
10811 m->fs.fp_offset = m->fs.cfa_offset;
10812 m->fs.fp_valid = true;
10813 }
10814 else
10815 {
10816 /* The frame pointer is not needed so pop %ebp again.
10817 This leaves us with a pristine state. */
10818 emit_insn (gen_pop (hard_frame_pointer_rtx));
10819 }
10820 }
10821
10822 /* The first insn of a function that accepts its static chain on the
10823 stack is to push the register that would be filled in by a direct
10824 call. This insn will be skipped by the trampoline. */
10825 else if (ix86_static_chain_on_stack)
10826 {
10827 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10828 emit_insn (gen_blockage ());
10829
10830 /* We don't want to interpret this push insn as a register save,
10831 only as a stack adjustment. The real copy of the register as
10832 a save will be done later, if needed. */
10833 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10834 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10835 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10836 RTX_FRAME_RELATED_P (insn) = 1;
10837 }
10838
10839 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10840 of DRAP is needed and stack realignment is really needed after reload */
10841 if (stack_realign_drap)
10842 {
10843 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10844
10845 /* Only need to push parameter pointer reg if it is caller saved. */
10846 if (!call_used_regs[REGNO (crtl->drap_reg)])
10847 {
10848 /* Push arg pointer reg */
10849 insn = emit_insn (gen_push (crtl->drap_reg));
10850 RTX_FRAME_RELATED_P (insn) = 1;
10851 }
10852
10853 /* Grab the argument pointer. */
10854 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10855 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10856 RTX_FRAME_RELATED_P (insn) = 1;
10857 m->fs.cfa_reg = crtl->drap_reg;
10858 m->fs.cfa_offset = 0;
10859
10860 /* Align the stack. */
10861 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10862 stack_pointer_rtx,
10863 GEN_INT (-align_bytes)));
10864 RTX_FRAME_RELATED_P (insn) = 1;
10865
10866 /* Replicate the return address on the stack so that return
10867 address can be reached via (argp - 1) slot. This is needed
10868 to implement macro RETURN_ADDR_RTX and intrinsic function
10869 expand_builtin_return_addr etc. */
10870 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10871 t = gen_frame_mem (word_mode, t);
10872 insn = emit_insn (gen_push (t));
10873 RTX_FRAME_RELATED_P (insn) = 1;
10874
10875 /* For the purposes of frame and register save area addressing,
10876 we've started over with a new frame. */
10877 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10878 m->fs.realigned = true;
10879 }
10880
10881 int_registers_saved = (frame.nregs == 0);
10882 sse_registers_saved = (frame.nsseregs == 0);
10883
10884 if (frame_pointer_needed && !m->fs.fp_valid)
10885 {
10886 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10887 slower on all targets. Also sdb doesn't like it. */
10888 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10889 RTX_FRAME_RELATED_P (insn) = 1;
10890
10891 /* Push registers now, before setting the frame pointer
10892 on SEH target. */
10893 if (!int_registers_saved
10894 && TARGET_SEH
10895 && !frame.save_regs_using_mov)
10896 {
10897 ix86_emit_save_regs ();
10898 int_registers_saved = true;
10899 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10900 }
10901
10902 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10903 {
10904 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10905 RTX_FRAME_RELATED_P (insn) = 1;
10906
10907 if (m->fs.cfa_reg == stack_pointer_rtx)
10908 m->fs.cfa_reg = hard_frame_pointer_rtx;
10909 m->fs.fp_offset = m->fs.sp_offset;
10910 m->fs.fp_valid = true;
10911 }
10912 }
10913
10914 if (!int_registers_saved)
10915 {
10916 /* If saving registers via PUSH, do so now. */
10917 if (!frame.save_regs_using_mov)
10918 {
10919 ix86_emit_save_regs ();
10920 int_registers_saved = true;
10921 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10922 }
10923
10924 /* When using red zone we may start register saving before allocating
10925 the stack frame saving one cycle of the prologue. However, avoid
10926 doing this if we have to probe the stack; at least on x86_64 the
10927 stack probe can turn into a call that clobbers a red zone location. */
10928 else if (ix86_using_red_zone ()
10929 && (! TARGET_STACK_PROBE
10930 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10931 {
10932 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10933 int_registers_saved = true;
10934 }
10935 }
10936
10937 if (stack_realign_fp)
10938 {
10939 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10940 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10941
10942 /* The computation of the size of the re-aligned stack frame means
10943 that we must allocate the size of the register save area before
10944 performing the actual alignment. Otherwise we cannot guarantee
10945 that there's enough storage above the realignment point. */
10946 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10947 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10948 GEN_INT (m->fs.sp_offset
10949 - frame.sse_reg_save_offset),
10950 -1, false);
10951
10952 /* Align the stack. */
10953 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10954 stack_pointer_rtx,
10955 GEN_INT (-align_bytes)));
10956
10957 /* For the purposes of register save area addressing, the stack
10958 pointer is no longer valid. As for the value of sp_offset,
10959 see ix86_compute_frame_layout, which we need to match in order
10960 to pass verification of stack_pointer_offset at the end. */
10961 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10962 m->fs.sp_valid = false;
10963 }
10964
10965 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10966
10967 if (flag_stack_usage_info)
10968 {
10969 /* We start to count from ARG_POINTER. */
10970 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10971
10972 /* If it was realigned, take into account the fake frame. */
10973 if (stack_realign_drap)
10974 {
10975 if (ix86_static_chain_on_stack)
10976 stack_size += UNITS_PER_WORD;
10977
10978 if (!call_used_regs[REGNO (crtl->drap_reg)])
10979 stack_size += UNITS_PER_WORD;
10980
10981 /* This over-estimates by 1 minimal-stack-alignment-unit but
10982 mitigates that by counting in the new return address slot. */
10983 current_function_dynamic_stack_size
10984 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10985 }
10986
10987 current_function_static_stack_size = stack_size;
10988 }
10989
10990 /* On SEH target with very large frame size, allocate an area to save
10991 SSE registers (as the very large allocation won't be described). */
10992 if (TARGET_SEH
10993 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10994 && !sse_registers_saved)
10995 {
10996 HOST_WIDE_INT sse_size =
10997 frame.sse_reg_save_offset - frame.reg_save_offset;
10998
10999 gcc_assert (int_registers_saved);
11000
11001 /* No need to do stack checking as the area will be immediately
11002 written. */
11003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11004 GEN_INT (-sse_size), -1,
11005 m->fs.cfa_reg == stack_pointer_rtx);
11006 allocate -= sse_size;
11007 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11008 sse_registers_saved = true;
11009 }
11010
11011 /* The stack has already been decremented by the instruction calling us
11012 so probe if the size is non-negative to preserve the protection area. */
11013 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11014 {
11015 /* We expect the registers to be saved when probes are used. */
11016 gcc_assert (int_registers_saved);
11017
11018 if (STACK_CHECK_MOVING_SP)
11019 {
11020 if (!(crtl->is_leaf && !cfun->calls_alloca
11021 && allocate <= PROBE_INTERVAL))
11022 {
11023 ix86_adjust_stack_and_probe (allocate);
11024 allocate = 0;
11025 }
11026 }
11027 else
11028 {
11029 HOST_WIDE_INT size = allocate;
11030
11031 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11032 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11033
11034 if (TARGET_STACK_PROBE)
11035 {
11036 if (crtl->is_leaf && !cfun->calls_alloca)
11037 {
11038 if (size > PROBE_INTERVAL)
11039 ix86_emit_probe_stack_range (0, size);
11040 }
11041 else
11042 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11043 }
11044 else
11045 {
11046 if (crtl->is_leaf && !cfun->calls_alloca)
11047 {
11048 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11049 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11050 size - STACK_CHECK_PROTECT);
11051 }
11052 else
11053 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11054 }
11055 }
11056 }
11057
11058 if (allocate == 0)
11059 ;
11060 else if (!ix86_target_stack_probe ()
11061 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11062 {
11063 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11064 GEN_INT (-allocate), -1,
11065 m->fs.cfa_reg == stack_pointer_rtx);
11066 }
11067 else
11068 {
11069 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11070 rtx r10 = NULL;
11071 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11072 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11073 bool eax_live = ix86_eax_live_at_start_p ();
11074 bool r10_live = false;
11075
11076 if (TARGET_64BIT)
11077 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11078
11079 if (eax_live)
11080 {
11081 insn = emit_insn (gen_push (eax));
11082 allocate -= UNITS_PER_WORD;
11083 /* Note that SEH directives need to continue tracking the stack
11084 pointer even after the frame pointer has been set up. */
11085 if (sp_is_cfa_reg || TARGET_SEH)
11086 {
11087 if (sp_is_cfa_reg)
11088 m->fs.cfa_offset += UNITS_PER_WORD;
11089 RTX_FRAME_RELATED_P (insn) = 1;
11090 }
11091 }
11092
11093 if (r10_live)
11094 {
11095 r10 = gen_rtx_REG (Pmode, R10_REG);
11096 insn = emit_insn (gen_push (r10));
11097 allocate -= UNITS_PER_WORD;
11098 if (sp_is_cfa_reg || TARGET_SEH)
11099 {
11100 if (sp_is_cfa_reg)
11101 m->fs.cfa_offset += UNITS_PER_WORD;
11102 RTX_FRAME_RELATED_P (insn) = 1;
11103 }
11104 }
11105
11106 emit_move_insn (eax, GEN_INT (allocate));
11107 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11108
11109 /* Use the fact that AX still contains ALLOCATE. */
11110 adjust_stack_insn = (Pmode == DImode
11111 ? gen_pro_epilogue_adjust_stack_di_sub
11112 : gen_pro_epilogue_adjust_stack_si_sub);
11113
11114 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11115 stack_pointer_rtx, eax));
11116
11117 if (sp_is_cfa_reg || TARGET_SEH)
11118 {
11119 if (sp_is_cfa_reg)
11120 m->fs.cfa_offset += allocate;
11121 RTX_FRAME_RELATED_P (insn) = 1;
11122 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11123 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11124 plus_constant (Pmode, stack_pointer_rtx,
11125 -allocate)));
11126 }
11127 m->fs.sp_offset += allocate;
11128
11129 /* Use stack_pointer_rtx for relative addressing so that code
11130 works for realigned stack, too. */
11131 if (r10_live && eax_live)
11132 {
11133 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11134 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11135 gen_frame_mem (word_mode, t));
11136 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11137 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11138 gen_frame_mem (word_mode, t));
11139 }
11140 else if (eax_live || r10_live)
11141 {
11142 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11143 emit_move_insn (gen_rtx_REG (word_mode,
11144 (eax_live ? AX_REG : R10_REG)),
11145 gen_frame_mem (word_mode, t));
11146 }
11147 }
11148 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11149
11150 /* If we havn't already set up the frame pointer, do so now. */
11151 if (frame_pointer_needed && !m->fs.fp_valid)
11152 {
11153 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11154 GEN_INT (frame.stack_pointer_offset
11155 - frame.hard_frame_pointer_offset));
11156 insn = emit_insn (insn);
11157 RTX_FRAME_RELATED_P (insn) = 1;
11158 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11159
11160 if (m->fs.cfa_reg == stack_pointer_rtx)
11161 m->fs.cfa_reg = hard_frame_pointer_rtx;
11162 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11163 m->fs.fp_valid = true;
11164 }
11165
11166 if (!int_registers_saved)
11167 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11168 if (!sse_registers_saved)
11169 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11170
11171 pic_reg_used = false;
11172 /* We don't use pic-register for pe-coff target. */
11173 if (pic_offset_table_rtx
11174 && !TARGET_PECOFF
11175 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11176 || crtl->profile))
11177 {
11178 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11179
11180 if (alt_pic_reg_used != INVALID_REGNUM)
11181 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11182
11183 pic_reg_used = true;
11184 }
11185
11186 if (pic_reg_used)
11187 {
11188 if (TARGET_64BIT)
11189 {
11190 if (ix86_cmodel == CM_LARGE_PIC)
11191 {
11192 rtx label, tmp_reg;
11193
11194 gcc_assert (Pmode == DImode);
11195 label = gen_label_rtx ();
11196 emit_label (label);
11197 LABEL_PRESERVE_P (label) = 1;
11198 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11199 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11200 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11201 label));
11202 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11203 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11204 pic_offset_table_rtx, tmp_reg));
11205 }
11206 else
11207 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11208 }
11209 else
11210 {
11211 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11212 RTX_FRAME_RELATED_P (insn) = 1;
11213 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11214 }
11215 }
11216
11217 /* In the pic_reg_used case, make sure that the got load isn't deleted
11218 when mcount needs it. Blockage to avoid call movement across mcount
11219 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11220 note. */
11221 if (crtl->profile && !flag_fentry && pic_reg_used)
11222 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11223
11224 if (crtl->drap_reg && !crtl->stack_realign_needed)
11225 {
11226 /* vDRAP is setup but after reload it turns out stack realign
11227 isn't necessary, here we will emit prologue to setup DRAP
11228 without stack realign adjustment */
11229 t = choose_baseaddr (0);
11230 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11231 }
11232
11233 /* Prevent instructions from being scheduled into register save push
11234 sequence when access to the redzone area is done through frame pointer.
11235 The offset between the frame pointer and the stack pointer is calculated
11236 relative to the value of the stack pointer at the end of the function
11237 prologue, and moving instructions that access redzone area via frame
11238 pointer inside push sequence violates this assumption. */
11239 if (frame_pointer_needed && frame.red_zone_size)
11240 emit_insn (gen_memory_blockage ());
11241
11242 /* Emit cld instruction if stringops are used in the function. */
11243 if (TARGET_CLD && ix86_current_function_needs_cld)
11244 emit_insn (gen_cld ());
11245
11246 /* SEH requires that the prologue end within 256 bytes of the start of
11247 the function. Prevent instruction schedules that would extend that.
11248 Further, prevent alloca modifications to the stack pointer from being
11249 combined with prologue modifications. */
11250 if (TARGET_SEH)
11251 emit_insn (gen_prologue_use (stack_pointer_rtx));
11252 }
11253
11254 /* Emit code to restore REG using a POP insn. */
11255
11256 static void
11257 ix86_emit_restore_reg_using_pop (rtx reg)
11258 {
11259 struct machine_function *m = cfun->machine;
11260 rtx insn = emit_insn (gen_pop (reg));
11261
11262 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11263 m->fs.sp_offset -= UNITS_PER_WORD;
11264
11265 if (m->fs.cfa_reg == crtl->drap_reg
11266 && REGNO (reg) == REGNO (crtl->drap_reg))
11267 {
11268 /* Previously we'd represented the CFA as an expression
11269 like *(%ebp - 8). We've just popped that value from
11270 the stack, which means we need to reset the CFA to
11271 the drap register. This will remain until we restore
11272 the stack pointer. */
11273 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11274 RTX_FRAME_RELATED_P (insn) = 1;
11275
11276 /* This means that the DRAP register is valid for addressing too. */
11277 m->fs.drap_valid = true;
11278 return;
11279 }
11280
11281 if (m->fs.cfa_reg == stack_pointer_rtx)
11282 {
11283 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11284 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11285 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11286 RTX_FRAME_RELATED_P (insn) = 1;
11287
11288 m->fs.cfa_offset -= UNITS_PER_WORD;
11289 }
11290
11291 /* When the frame pointer is the CFA, and we pop it, we are
11292 swapping back to the stack pointer as the CFA. This happens
11293 for stack frames that don't allocate other data, so we assume
11294 the stack pointer is now pointing at the return address, i.e.
11295 the function entry state, which makes the offset be 1 word. */
11296 if (reg == hard_frame_pointer_rtx)
11297 {
11298 m->fs.fp_valid = false;
11299 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11300 {
11301 m->fs.cfa_reg = stack_pointer_rtx;
11302 m->fs.cfa_offset -= UNITS_PER_WORD;
11303
11304 add_reg_note (insn, REG_CFA_DEF_CFA,
11305 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11306 GEN_INT (m->fs.cfa_offset)));
11307 RTX_FRAME_RELATED_P (insn) = 1;
11308 }
11309 }
11310 }
11311
11312 /* Emit code to restore saved registers using POP insns. */
11313
11314 static void
11315 ix86_emit_restore_regs_using_pop (void)
11316 {
11317 unsigned int regno;
11318
11319 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11320 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11321 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11322 }
11323
11324 /* Emit code and notes for the LEAVE instruction. */
11325
11326 static void
11327 ix86_emit_leave (void)
11328 {
11329 struct machine_function *m = cfun->machine;
11330 rtx insn = emit_insn (ix86_gen_leave ());
11331
11332 ix86_add_queued_cfa_restore_notes (insn);
11333
11334 gcc_assert (m->fs.fp_valid);
11335 m->fs.sp_valid = true;
11336 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11337 m->fs.fp_valid = false;
11338
11339 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11340 {
11341 m->fs.cfa_reg = stack_pointer_rtx;
11342 m->fs.cfa_offset = m->fs.sp_offset;
11343
11344 add_reg_note (insn, REG_CFA_DEF_CFA,
11345 plus_constant (Pmode, stack_pointer_rtx,
11346 m->fs.sp_offset));
11347 RTX_FRAME_RELATED_P (insn) = 1;
11348 }
11349 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11350 m->fs.fp_offset);
11351 }
11352
11353 /* Emit code to restore saved registers using MOV insns.
11354 First register is restored from CFA - CFA_OFFSET. */
11355 static void
11356 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11357 bool maybe_eh_return)
11358 {
11359 struct machine_function *m = cfun->machine;
11360 unsigned int regno;
11361
11362 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11363 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11364 {
11365 rtx reg = gen_rtx_REG (word_mode, regno);
11366 rtx insn, mem;
11367
11368 mem = choose_baseaddr (cfa_offset);
11369 mem = gen_frame_mem (word_mode, mem);
11370 insn = emit_move_insn (reg, mem);
11371
11372 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11373 {
11374 /* Previously we'd represented the CFA as an expression
11375 like *(%ebp - 8). We've just popped that value from
11376 the stack, which means we need to reset the CFA to
11377 the drap register. This will remain until we restore
11378 the stack pointer. */
11379 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11380 RTX_FRAME_RELATED_P (insn) = 1;
11381
11382 /* This means that the DRAP register is valid for addressing. */
11383 m->fs.drap_valid = true;
11384 }
11385 else
11386 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11387
11388 cfa_offset -= UNITS_PER_WORD;
11389 }
11390 }
11391
11392 /* Emit code to restore saved registers using MOV insns.
11393 First register is restored from CFA - CFA_OFFSET. */
11394 static void
11395 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11396 bool maybe_eh_return)
11397 {
11398 unsigned int regno;
11399
11400 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11401 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11402 {
11403 rtx reg = gen_rtx_REG (V4SFmode, regno);
11404 rtx mem;
11405
11406 mem = choose_baseaddr (cfa_offset);
11407 mem = gen_rtx_MEM (V4SFmode, mem);
11408 set_mem_align (mem, 128);
11409 emit_move_insn (reg, mem);
11410
11411 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11412
11413 cfa_offset -= 16;
11414 }
11415 }
11416
11417 /* Restore function stack, frame, and registers. */
11418
11419 void
11420 ix86_expand_epilogue (int style)
11421 {
11422 struct machine_function *m = cfun->machine;
11423 struct machine_frame_state frame_state_save = m->fs;
11424 struct ix86_frame frame;
11425 bool restore_regs_via_mov;
11426 bool using_drap;
11427
11428 ix86_finalize_stack_realign_flags ();
11429 ix86_compute_frame_layout (&frame);
11430
11431 m->fs.sp_valid = (!frame_pointer_needed
11432 || (crtl->sp_is_unchanging
11433 && !stack_realign_fp));
11434 gcc_assert (!m->fs.sp_valid
11435 || m->fs.sp_offset == frame.stack_pointer_offset);
11436
11437 /* The FP must be valid if the frame pointer is present. */
11438 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11439 gcc_assert (!m->fs.fp_valid
11440 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11441
11442 /* We must have *some* valid pointer to the stack frame. */
11443 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11444
11445 /* The DRAP is never valid at this point. */
11446 gcc_assert (!m->fs.drap_valid);
11447
11448 /* See the comment about red zone and frame
11449 pointer usage in ix86_expand_prologue. */
11450 if (frame_pointer_needed && frame.red_zone_size)
11451 emit_insn (gen_memory_blockage ());
11452
11453 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11454 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11455
11456 /* Determine the CFA offset of the end of the red-zone. */
11457 m->fs.red_zone_offset = 0;
11458 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11459 {
11460 /* The red-zone begins below the return address. */
11461 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11462
11463 /* When the register save area is in the aligned portion of
11464 the stack, determine the maximum runtime displacement that
11465 matches up with the aligned frame. */
11466 if (stack_realign_drap)
11467 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11468 + UNITS_PER_WORD);
11469 }
11470
11471 /* Special care must be taken for the normal return case of a function
11472 using eh_return: the eax and edx registers are marked as saved, but
11473 not restored along this path. Adjust the save location to match. */
11474 if (crtl->calls_eh_return && style != 2)
11475 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11476
11477 /* EH_RETURN requires the use of moves to function properly. */
11478 if (crtl->calls_eh_return)
11479 restore_regs_via_mov = true;
11480 /* SEH requires the use of pops to identify the epilogue. */
11481 else if (TARGET_SEH)
11482 restore_regs_via_mov = false;
11483 /* If we're only restoring one register and sp is not valid then
11484 using a move instruction to restore the register since it's
11485 less work than reloading sp and popping the register. */
11486 else if (!m->fs.sp_valid && frame.nregs <= 1)
11487 restore_regs_via_mov = true;
11488 else if (TARGET_EPILOGUE_USING_MOVE
11489 && cfun->machine->use_fast_prologue_epilogue
11490 && (frame.nregs > 1
11491 || m->fs.sp_offset != frame.reg_save_offset))
11492 restore_regs_via_mov = true;
11493 else if (frame_pointer_needed
11494 && !frame.nregs
11495 && m->fs.sp_offset != frame.reg_save_offset)
11496 restore_regs_via_mov = true;
11497 else if (frame_pointer_needed
11498 && TARGET_USE_LEAVE
11499 && cfun->machine->use_fast_prologue_epilogue
11500 && frame.nregs == 1)
11501 restore_regs_via_mov = true;
11502 else
11503 restore_regs_via_mov = false;
11504
11505 if (restore_regs_via_mov || frame.nsseregs)
11506 {
11507 /* Ensure that the entire register save area is addressable via
11508 the stack pointer, if we will restore via sp. */
11509 if (TARGET_64BIT
11510 && m->fs.sp_offset > 0x7fffffff
11511 && !(m->fs.fp_valid || m->fs.drap_valid)
11512 && (frame.nsseregs + frame.nregs) != 0)
11513 {
11514 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11515 GEN_INT (m->fs.sp_offset
11516 - frame.sse_reg_save_offset),
11517 style,
11518 m->fs.cfa_reg == stack_pointer_rtx);
11519 }
11520 }
11521
11522 /* If there are any SSE registers to restore, then we have to do it
11523 via moves, since there's obviously no pop for SSE regs. */
11524 if (frame.nsseregs)
11525 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11526 style == 2);
11527
11528 if (restore_regs_via_mov)
11529 {
11530 rtx t;
11531
11532 if (frame.nregs)
11533 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11534
11535 /* eh_return epilogues need %ecx added to the stack pointer. */
11536 if (style == 2)
11537 {
11538 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11539
11540 /* Stack align doesn't work with eh_return. */
11541 gcc_assert (!stack_realign_drap);
11542 /* Neither does regparm nested functions. */
11543 gcc_assert (!ix86_static_chain_on_stack);
11544
11545 if (frame_pointer_needed)
11546 {
11547 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11548 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11549 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11550
11551 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11552 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11553
11554 /* Note that we use SA as a temporary CFA, as the return
11555 address is at the proper place relative to it. We
11556 pretend this happens at the FP restore insn because
11557 prior to this insn the FP would be stored at the wrong
11558 offset relative to SA, and after this insn we have no
11559 other reasonable register to use for the CFA. We don't
11560 bother resetting the CFA to the SP for the duration of
11561 the return insn. */
11562 add_reg_note (insn, REG_CFA_DEF_CFA,
11563 plus_constant (Pmode, sa, UNITS_PER_WORD));
11564 ix86_add_queued_cfa_restore_notes (insn);
11565 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11566 RTX_FRAME_RELATED_P (insn) = 1;
11567
11568 m->fs.cfa_reg = sa;
11569 m->fs.cfa_offset = UNITS_PER_WORD;
11570 m->fs.fp_valid = false;
11571
11572 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11573 const0_rtx, style, false);
11574 }
11575 else
11576 {
11577 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11578 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11579 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11580 ix86_add_queued_cfa_restore_notes (insn);
11581
11582 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11583 if (m->fs.cfa_offset != UNITS_PER_WORD)
11584 {
11585 m->fs.cfa_offset = UNITS_PER_WORD;
11586 add_reg_note (insn, REG_CFA_DEF_CFA,
11587 plus_constant (Pmode, stack_pointer_rtx,
11588 UNITS_PER_WORD));
11589 RTX_FRAME_RELATED_P (insn) = 1;
11590 }
11591 }
11592 m->fs.sp_offset = UNITS_PER_WORD;
11593 m->fs.sp_valid = true;
11594 }
11595 }
11596 else
11597 {
11598 /* SEH requires that the function end with (1) a stack adjustment
11599 if necessary, (2) a sequence of pops, and (3) a return or
11600 jump instruction. Prevent insns from the function body from
11601 being scheduled into this sequence. */
11602 if (TARGET_SEH)
11603 {
11604 /* Prevent a catch region from being adjacent to the standard
11605 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11606 several other flags that would be interesting to test are
11607 not yet set up. */
11608 if (flag_non_call_exceptions)
11609 emit_insn (gen_nops (const1_rtx));
11610 else
11611 emit_insn (gen_blockage ());
11612 }
11613
11614 /* First step is to deallocate the stack frame so that we can
11615 pop the registers. Also do it on SEH target for very large
11616 frame as the emitted instructions aren't allowed by the ABI in
11617 epilogues. */
11618 if (!m->fs.sp_valid
11619 || (TARGET_SEH
11620 && (m->fs.sp_offset - frame.reg_save_offset
11621 >= SEH_MAX_FRAME_SIZE)))
11622 {
11623 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11624 GEN_INT (m->fs.fp_offset
11625 - frame.reg_save_offset),
11626 style, false);
11627 }
11628 else if (m->fs.sp_offset != frame.reg_save_offset)
11629 {
11630 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11631 GEN_INT (m->fs.sp_offset
11632 - frame.reg_save_offset),
11633 style,
11634 m->fs.cfa_reg == stack_pointer_rtx);
11635 }
11636
11637 ix86_emit_restore_regs_using_pop ();
11638 }
11639
11640 /* If we used a stack pointer and haven't already got rid of it,
11641 then do so now. */
11642 if (m->fs.fp_valid)
11643 {
11644 /* If the stack pointer is valid and pointing at the frame
11645 pointer store address, then we only need a pop. */
11646 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11647 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11648 /* Leave results in shorter dependency chains on CPUs that are
11649 able to grok it fast. */
11650 else if (TARGET_USE_LEAVE
11651 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11652 || !cfun->machine->use_fast_prologue_epilogue)
11653 ix86_emit_leave ();
11654 else
11655 {
11656 pro_epilogue_adjust_stack (stack_pointer_rtx,
11657 hard_frame_pointer_rtx,
11658 const0_rtx, style, !using_drap);
11659 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11660 }
11661 }
11662
11663 if (using_drap)
11664 {
11665 int param_ptr_offset = UNITS_PER_WORD;
11666 rtx insn;
11667
11668 gcc_assert (stack_realign_drap);
11669
11670 if (ix86_static_chain_on_stack)
11671 param_ptr_offset += UNITS_PER_WORD;
11672 if (!call_used_regs[REGNO (crtl->drap_reg)])
11673 param_ptr_offset += UNITS_PER_WORD;
11674
11675 insn = emit_insn (gen_rtx_SET
11676 (VOIDmode, stack_pointer_rtx,
11677 gen_rtx_PLUS (Pmode,
11678 crtl->drap_reg,
11679 GEN_INT (-param_ptr_offset))));
11680 m->fs.cfa_reg = stack_pointer_rtx;
11681 m->fs.cfa_offset = param_ptr_offset;
11682 m->fs.sp_offset = param_ptr_offset;
11683 m->fs.realigned = false;
11684
11685 add_reg_note (insn, REG_CFA_DEF_CFA,
11686 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11687 GEN_INT (param_ptr_offset)));
11688 RTX_FRAME_RELATED_P (insn) = 1;
11689
11690 if (!call_used_regs[REGNO (crtl->drap_reg)])
11691 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11692 }
11693
11694 /* At this point the stack pointer must be valid, and we must have
11695 restored all of the registers. We may not have deallocated the
11696 entire stack frame. We've delayed this until now because it may
11697 be possible to merge the local stack deallocation with the
11698 deallocation forced by ix86_static_chain_on_stack. */
11699 gcc_assert (m->fs.sp_valid);
11700 gcc_assert (!m->fs.fp_valid);
11701 gcc_assert (!m->fs.realigned);
11702 if (m->fs.sp_offset != UNITS_PER_WORD)
11703 {
11704 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11705 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11706 style, true);
11707 }
11708 else
11709 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11710
11711 /* Sibcall epilogues don't want a return instruction. */
11712 if (style == 0)
11713 {
11714 m->fs = frame_state_save;
11715 return;
11716 }
11717
11718 if (crtl->args.pops_args && crtl->args.size)
11719 {
11720 rtx popc = GEN_INT (crtl->args.pops_args);
11721
11722 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11723 address, do explicit add, and jump indirectly to the caller. */
11724
11725 if (crtl->args.pops_args >= 65536)
11726 {
11727 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11728 rtx insn;
11729
11730 /* There is no "pascal" calling convention in any 64bit ABI. */
11731 gcc_assert (!TARGET_64BIT);
11732
11733 insn = emit_insn (gen_pop (ecx));
11734 m->fs.cfa_offset -= UNITS_PER_WORD;
11735 m->fs.sp_offset -= UNITS_PER_WORD;
11736
11737 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11738 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11739 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11740 add_reg_note (insn, REG_CFA_REGISTER,
11741 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11742 RTX_FRAME_RELATED_P (insn) = 1;
11743
11744 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11745 popc, -1, true);
11746 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11747 }
11748 else
11749 emit_jump_insn (gen_simple_return_pop_internal (popc));
11750 }
11751 else
11752 emit_jump_insn (gen_simple_return_internal ());
11753
11754 /* Restore the state back to the state from the prologue,
11755 so that it's correct for the next epilogue. */
11756 m->fs = frame_state_save;
11757 }
11758
11759 /* Reset from the function's potential modifications. */
11760
11761 static void
11762 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11763 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11764 {
11765 if (pic_offset_table_rtx)
11766 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11767 #if TARGET_MACHO
11768 /* Mach-O doesn't support labels at the end of objects, so if
11769 it looks like we might want one, insert a NOP. */
11770 {
11771 rtx insn = get_last_insn ();
11772 rtx deleted_debug_label = NULL_RTX;
11773 while (insn
11774 && NOTE_P (insn)
11775 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11776 {
11777 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11778 notes only, instead set their CODE_LABEL_NUMBER to -1,
11779 otherwise there would be code generation differences
11780 in between -g and -g0. */
11781 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11782 deleted_debug_label = insn;
11783 insn = PREV_INSN (insn);
11784 }
11785 if (insn
11786 && (LABEL_P (insn)
11787 || (NOTE_P (insn)
11788 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11789 fputs ("\tnop\n", file);
11790 else if (deleted_debug_label)
11791 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11792 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11793 CODE_LABEL_NUMBER (insn) = -1;
11794 }
11795 #endif
11796
11797 }
11798
11799 /* Return a scratch register to use in the split stack prologue. The
11800 split stack prologue is used for -fsplit-stack. It is the first
11801 instructions in the function, even before the regular prologue.
11802 The scratch register can be any caller-saved register which is not
11803 used for parameters or for the static chain. */
11804
11805 static unsigned int
11806 split_stack_prologue_scratch_regno (void)
11807 {
11808 if (TARGET_64BIT)
11809 return R11_REG;
11810 else
11811 {
11812 bool is_fastcall, is_thiscall;
11813 int regparm;
11814
11815 is_fastcall = (lookup_attribute ("fastcall",
11816 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11817 != NULL);
11818 is_thiscall = (lookup_attribute ("thiscall",
11819 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11820 != NULL);
11821 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11822
11823 if (is_fastcall)
11824 {
11825 if (DECL_STATIC_CHAIN (cfun->decl))
11826 {
11827 sorry ("-fsplit-stack does not support fastcall with "
11828 "nested function");
11829 return INVALID_REGNUM;
11830 }
11831 return AX_REG;
11832 }
11833 else if (is_thiscall)
11834 {
11835 if (!DECL_STATIC_CHAIN (cfun->decl))
11836 return DX_REG;
11837 return AX_REG;
11838 }
11839 else if (regparm < 3)
11840 {
11841 if (!DECL_STATIC_CHAIN (cfun->decl))
11842 return CX_REG;
11843 else
11844 {
11845 if (regparm >= 2)
11846 {
11847 sorry ("-fsplit-stack does not support 2 register "
11848 "parameters for a nested function");
11849 return INVALID_REGNUM;
11850 }
11851 return DX_REG;
11852 }
11853 }
11854 else
11855 {
11856 /* FIXME: We could make this work by pushing a register
11857 around the addition and comparison. */
11858 sorry ("-fsplit-stack does not support 3 register parameters");
11859 return INVALID_REGNUM;
11860 }
11861 }
11862 }
11863
11864 /* A SYMBOL_REF for the function which allocates new stackspace for
11865 -fsplit-stack. */
11866
11867 static GTY(()) rtx split_stack_fn;
11868
11869 /* A SYMBOL_REF for the more stack function when using the large
11870 model. */
11871
11872 static GTY(()) rtx split_stack_fn_large;
11873
11874 /* Handle -fsplit-stack. These are the first instructions in the
11875 function, even before the regular prologue. */
11876
11877 void
11878 ix86_expand_split_stack_prologue (void)
11879 {
11880 struct ix86_frame frame;
11881 HOST_WIDE_INT allocate;
11882 unsigned HOST_WIDE_INT args_size;
11883 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11884 rtx scratch_reg = NULL_RTX;
11885 rtx varargs_label = NULL_RTX;
11886 rtx fn;
11887
11888 gcc_assert (flag_split_stack && reload_completed);
11889
11890 ix86_finalize_stack_realign_flags ();
11891 ix86_compute_frame_layout (&frame);
11892 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11893
11894 /* This is the label we will branch to if we have enough stack
11895 space. We expect the basic block reordering pass to reverse this
11896 branch if optimizing, so that we branch in the unlikely case. */
11897 label = gen_label_rtx ();
11898
11899 /* We need to compare the stack pointer minus the frame size with
11900 the stack boundary in the TCB. The stack boundary always gives
11901 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11902 can compare directly. Otherwise we need to do an addition. */
11903
11904 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11905 UNSPEC_STACK_CHECK);
11906 limit = gen_rtx_CONST (Pmode, limit);
11907 limit = gen_rtx_MEM (Pmode, limit);
11908 if (allocate < SPLIT_STACK_AVAILABLE)
11909 current = stack_pointer_rtx;
11910 else
11911 {
11912 unsigned int scratch_regno;
11913 rtx offset;
11914
11915 /* We need a scratch register to hold the stack pointer minus
11916 the required frame size. Since this is the very start of the
11917 function, the scratch register can be any caller-saved
11918 register which is not used for parameters. */
11919 offset = GEN_INT (- allocate);
11920 scratch_regno = split_stack_prologue_scratch_regno ();
11921 if (scratch_regno == INVALID_REGNUM)
11922 return;
11923 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11924 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11925 {
11926 /* We don't use ix86_gen_add3 in this case because it will
11927 want to split to lea, but when not optimizing the insn
11928 will not be split after this point. */
11929 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11930 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11931 offset)));
11932 }
11933 else
11934 {
11935 emit_move_insn (scratch_reg, offset);
11936 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11937 stack_pointer_rtx));
11938 }
11939 current = scratch_reg;
11940 }
11941
11942 ix86_expand_branch (GEU, current, limit, label);
11943 jump_insn = get_last_insn ();
11944 JUMP_LABEL (jump_insn) = label;
11945
11946 /* Mark the jump as very likely to be taken. */
11947 add_int_reg_note (jump_insn, REG_BR_PROB,
11948 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11949
11950 if (split_stack_fn == NULL_RTX)
11951 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11952 fn = split_stack_fn;
11953
11954 /* Get more stack space. We pass in the desired stack space and the
11955 size of the arguments to copy to the new stack. In 32-bit mode
11956 we push the parameters; __morestack will return on a new stack
11957 anyhow. In 64-bit mode we pass the parameters in r10 and
11958 r11. */
11959 allocate_rtx = GEN_INT (allocate);
11960 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11961 call_fusage = NULL_RTX;
11962 if (TARGET_64BIT)
11963 {
11964 rtx reg10, reg11;
11965
11966 reg10 = gen_rtx_REG (Pmode, R10_REG);
11967 reg11 = gen_rtx_REG (Pmode, R11_REG);
11968
11969 /* If this function uses a static chain, it will be in %r10.
11970 Preserve it across the call to __morestack. */
11971 if (DECL_STATIC_CHAIN (cfun->decl))
11972 {
11973 rtx rax;
11974
11975 rax = gen_rtx_REG (word_mode, AX_REG);
11976 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11977 use_reg (&call_fusage, rax);
11978 }
11979
11980 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11981 && !TARGET_PECOFF)
11982 {
11983 HOST_WIDE_INT argval;
11984
11985 gcc_assert (Pmode == DImode);
11986 /* When using the large model we need to load the address
11987 into a register, and we've run out of registers. So we
11988 switch to a different calling convention, and we call a
11989 different function: __morestack_large. We pass the
11990 argument size in the upper 32 bits of r10 and pass the
11991 frame size in the lower 32 bits. */
11992 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11993 gcc_assert ((args_size & 0xffffffff) == args_size);
11994
11995 if (split_stack_fn_large == NULL_RTX)
11996 split_stack_fn_large =
11997 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11998
11999 if (ix86_cmodel == CM_LARGE_PIC)
12000 {
12001 rtx label, x;
12002
12003 label = gen_label_rtx ();
12004 emit_label (label);
12005 LABEL_PRESERVE_P (label) = 1;
12006 emit_insn (gen_set_rip_rex64 (reg10, label));
12007 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12008 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12009 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12010 UNSPEC_GOT);
12011 x = gen_rtx_CONST (Pmode, x);
12012 emit_move_insn (reg11, x);
12013 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12014 x = gen_const_mem (Pmode, x);
12015 emit_move_insn (reg11, x);
12016 }
12017 else
12018 emit_move_insn (reg11, split_stack_fn_large);
12019
12020 fn = reg11;
12021
12022 argval = ((args_size << 16) << 16) + allocate;
12023 emit_move_insn (reg10, GEN_INT (argval));
12024 }
12025 else
12026 {
12027 emit_move_insn (reg10, allocate_rtx);
12028 emit_move_insn (reg11, GEN_INT (args_size));
12029 use_reg (&call_fusage, reg11);
12030 }
12031
12032 use_reg (&call_fusage, reg10);
12033 }
12034 else
12035 {
12036 emit_insn (gen_push (GEN_INT (args_size)));
12037 emit_insn (gen_push (allocate_rtx));
12038 }
12039 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12040 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12041 NULL_RTX, false);
12042 add_function_usage_to (call_insn, call_fusage);
12043
12044 /* In order to make call/return prediction work right, we now need
12045 to execute a return instruction. See
12046 libgcc/config/i386/morestack.S for the details on how this works.
12047
12048 For flow purposes gcc must not see this as a return
12049 instruction--we need control flow to continue at the subsequent
12050 label. Therefore, we use an unspec. */
12051 gcc_assert (crtl->args.pops_args < 65536);
12052 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12053
12054 /* If we are in 64-bit mode and this function uses a static chain,
12055 we saved %r10 in %rax before calling _morestack. */
12056 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12057 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12058 gen_rtx_REG (word_mode, AX_REG));
12059
12060 /* If this function calls va_start, we need to store a pointer to
12061 the arguments on the old stack, because they may not have been
12062 all copied to the new stack. At this point the old stack can be
12063 found at the frame pointer value used by __morestack, because
12064 __morestack has set that up before calling back to us. Here we
12065 store that pointer in a scratch register, and in
12066 ix86_expand_prologue we store the scratch register in a stack
12067 slot. */
12068 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12069 {
12070 unsigned int scratch_regno;
12071 rtx frame_reg;
12072 int words;
12073
12074 scratch_regno = split_stack_prologue_scratch_regno ();
12075 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12076 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12077
12078 /* 64-bit:
12079 fp -> old fp value
12080 return address within this function
12081 return address of caller of this function
12082 stack arguments
12083 So we add three words to get to the stack arguments.
12084
12085 32-bit:
12086 fp -> old fp value
12087 return address within this function
12088 first argument to __morestack
12089 second argument to __morestack
12090 return address of caller of this function
12091 stack arguments
12092 So we add five words to get to the stack arguments.
12093 */
12094 words = TARGET_64BIT ? 3 : 5;
12095 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12096 gen_rtx_PLUS (Pmode, frame_reg,
12097 GEN_INT (words * UNITS_PER_WORD))));
12098
12099 varargs_label = gen_label_rtx ();
12100 emit_jump_insn (gen_jump (varargs_label));
12101 JUMP_LABEL (get_last_insn ()) = varargs_label;
12102
12103 emit_barrier ();
12104 }
12105
12106 emit_label (label);
12107 LABEL_NUSES (label) = 1;
12108
12109 /* If this function calls va_start, we now have to set the scratch
12110 register for the case where we do not call __morestack. In this
12111 case we need to set it based on the stack pointer. */
12112 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12113 {
12114 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12115 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12116 GEN_INT (UNITS_PER_WORD))));
12117
12118 emit_label (varargs_label);
12119 LABEL_NUSES (varargs_label) = 1;
12120 }
12121 }
12122
12123 /* We may have to tell the dataflow pass that the split stack prologue
12124 is initializing a scratch register. */
12125
12126 static void
12127 ix86_live_on_entry (bitmap regs)
12128 {
12129 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12130 {
12131 gcc_assert (flag_split_stack);
12132 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12133 }
12134 }
12135 \f
12136 /* Extract the parts of an RTL expression that is a valid memory address
12137 for an instruction. Return 0 if the structure of the address is
12138 grossly off. Return -1 if the address contains ASHIFT, so it is not
12139 strictly valid, but still used for computing length of lea instruction. */
12140
12141 int
12142 ix86_decompose_address (rtx addr, struct ix86_address *out)
12143 {
12144 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12145 rtx base_reg, index_reg;
12146 HOST_WIDE_INT scale = 1;
12147 rtx scale_rtx = NULL_RTX;
12148 rtx tmp;
12149 int retval = 1;
12150 enum ix86_address_seg seg = SEG_DEFAULT;
12151
12152 /* Allow zero-extended SImode addresses,
12153 they will be emitted with addr32 prefix. */
12154 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12155 {
12156 if (GET_CODE (addr) == ZERO_EXTEND
12157 && GET_MODE (XEXP (addr, 0)) == SImode)
12158 {
12159 addr = XEXP (addr, 0);
12160 if (CONST_INT_P (addr))
12161 return 0;
12162 }
12163 else if (GET_CODE (addr) == AND
12164 && const_32bit_mask (XEXP (addr, 1), DImode))
12165 {
12166 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12167 if (addr == NULL_RTX)
12168 return 0;
12169
12170 if (CONST_INT_P (addr))
12171 return 0;
12172 }
12173 }
12174
12175 /* Allow SImode subregs of DImode addresses,
12176 they will be emitted with addr32 prefix. */
12177 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12178 {
12179 if (GET_CODE (addr) == SUBREG
12180 && GET_MODE (SUBREG_REG (addr)) == DImode)
12181 {
12182 addr = SUBREG_REG (addr);
12183 if (CONST_INT_P (addr))
12184 return 0;
12185 }
12186 }
12187
12188 if (REG_P (addr))
12189 base = addr;
12190 else if (GET_CODE (addr) == SUBREG)
12191 {
12192 if (REG_P (SUBREG_REG (addr)))
12193 base = addr;
12194 else
12195 return 0;
12196 }
12197 else if (GET_CODE (addr) == PLUS)
12198 {
12199 rtx addends[4], op;
12200 int n = 0, i;
12201
12202 op = addr;
12203 do
12204 {
12205 if (n >= 4)
12206 return 0;
12207 addends[n++] = XEXP (op, 1);
12208 op = XEXP (op, 0);
12209 }
12210 while (GET_CODE (op) == PLUS);
12211 if (n >= 4)
12212 return 0;
12213 addends[n] = op;
12214
12215 for (i = n; i >= 0; --i)
12216 {
12217 op = addends[i];
12218 switch (GET_CODE (op))
12219 {
12220 case MULT:
12221 if (index)
12222 return 0;
12223 index = XEXP (op, 0);
12224 scale_rtx = XEXP (op, 1);
12225 break;
12226
12227 case ASHIFT:
12228 if (index)
12229 return 0;
12230 index = XEXP (op, 0);
12231 tmp = XEXP (op, 1);
12232 if (!CONST_INT_P (tmp))
12233 return 0;
12234 scale = INTVAL (tmp);
12235 if ((unsigned HOST_WIDE_INT) scale > 3)
12236 return 0;
12237 scale = 1 << scale;
12238 break;
12239
12240 case ZERO_EXTEND:
12241 op = XEXP (op, 0);
12242 if (GET_CODE (op) != UNSPEC)
12243 return 0;
12244 /* FALLTHRU */
12245
12246 case UNSPEC:
12247 if (XINT (op, 1) == UNSPEC_TP
12248 && TARGET_TLS_DIRECT_SEG_REFS
12249 && seg == SEG_DEFAULT)
12250 seg = DEFAULT_TLS_SEG_REG;
12251 else
12252 return 0;
12253 break;
12254
12255 case SUBREG:
12256 if (!REG_P (SUBREG_REG (op)))
12257 return 0;
12258 /* FALLTHRU */
12259
12260 case REG:
12261 if (!base)
12262 base = op;
12263 else if (!index)
12264 index = op;
12265 else
12266 return 0;
12267 break;
12268
12269 case CONST:
12270 case CONST_INT:
12271 case SYMBOL_REF:
12272 case LABEL_REF:
12273 if (disp)
12274 return 0;
12275 disp = op;
12276 break;
12277
12278 default:
12279 return 0;
12280 }
12281 }
12282 }
12283 else if (GET_CODE (addr) == MULT)
12284 {
12285 index = XEXP (addr, 0); /* index*scale */
12286 scale_rtx = XEXP (addr, 1);
12287 }
12288 else if (GET_CODE (addr) == ASHIFT)
12289 {
12290 /* We're called for lea too, which implements ashift on occasion. */
12291 index = XEXP (addr, 0);
12292 tmp = XEXP (addr, 1);
12293 if (!CONST_INT_P (tmp))
12294 return 0;
12295 scale = INTVAL (tmp);
12296 if ((unsigned HOST_WIDE_INT) scale > 3)
12297 return 0;
12298 scale = 1 << scale;
12299 retval = -1;
12300 }
12301 else
12302 disp = addr; /* displacement */
12303
12304 if (index)
12305 {
12306 if (REG_P (index))
12307 ;
12308 else if (GET_CODE (index) == SUBREG
12309 && REG_P (SUBREG_REG (index)))
12310 ;
12311 else
12312 return 0;
12313 }
12314
12315 /* Extract the integral value of scale. */
12316 if (scale_rtx)
12317 {
12318 if (!CONST_INT_P (scale_rtx))
12319 return 0;
12320 scale = INTVAL (scale_rtx);
12321 }
12322
12323 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12324 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12325
12326 /* Avoid useless 0 displacement. */
12327 if (disp == const0_rtx && (base || index))
12328 disp = NULL_RTX;
12329
12330 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12331 if (base_reg && index_reg && scale == 1
12332 && (index_reg == arg_pointer_rtx
12333 || index_reg == frame_pointer_rtx
12334 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12335 {
12336 rtx tmp;
12337 tmp = base, base = index, index = tmp;
12338 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12339 }
12340
12341 /* Special case: %ebp cannot be encoded as a base without a displacement.
12342 Similarly %r13. */
12343 if (!disp
12344 && base_reg
12345 && (base_reg == hard_frame_pointer_rtx
12346 || base_reg == frame_pointer_rtx
12347 || base_reg == arg_pointer_rtx
12348 || (REG_P (base_reg)
12349 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12350 || REGNO (base_reg) == R13_REG))))
12351 disp = const0_rtx;
12352
12353 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12354 Avoid this by transforming to [%esi+0].
12355 Reload calls address legitimization without cfun defined, so we need
12356 to test cfun for being non-NULL. */
12357 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12358 && base_reg && !index_reg && !disp
12359 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12360 disp = const0_rtx;
12361
12362 /* Special case: encode reg+reg instead of reg*2. */
12363 if (!base && index && scale == 2)
12364 base = index, base_reg = index_reg, scale = 1;
12365
12366 /* Special case: scaling cannot be encoded without base or displacement. */
12367 if (!base && !disp && index && scale != 1)
12368 disp = const0_rtx;
12369
12370 out->base = base;
12371 out->index = index;
12372 out->disp = disp;
12373 out->scale = scale;
12374 out->seg = seg;
12375
12376 return retval;
12377 }
12378 \f
12379 /* Return cost of the memory address x.
12380 For i386, it is better to use a complex address than let gcc copy
12381 the address into a reg and make a new pseudo. But not if the address
12382 requires to two regs - that would mean more pseudos with longer
12383 lifetimes. */
12384 static int
12385 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12386 addr_space_t as ATTRIBUTE_UNUSED,
12387 bool speed ATTRIBUTE_UNUSED)
12388 {
12389 struct ix86_address parts;
12390 int cost = 1;
12391 int ok = ix86_decompose_address (x, &parts);
12392
12393 gcc_assert (ok);
12394
12395 if (parts.base && GET_CODE (parts.base) == SUBREG)
12396 parts.base = SUBREG_REG (parts.base);
12397 if (parts.index && GET_CODE (parts.index) == SUBREG)
12398 parts.index = SUBREG_REG (parts.index);
12399
12400 /* Attempt to minimize number of registers in the address. */
12401 if ((parts.base
12402 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12403 || (parts.index
12404 && (!REG_P (parts.index)
12405 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12406 cost++;
12407
12408 if (parts.base
12409 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12410 && parts.index
12411 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12412 && parts.base != parts.index)
12413 cost++;
12414
12415 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12416 since it's predecode logic can't detect the length of instructions
12417 and it degenerates to vector decoded. Increase cost of such
12418 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12419 to split such addresses or even refuse such addresses at all.
12420
12421 Following addressing modes are affected:
12422 [base+scale*index]
12423 [scale*index+disp]
12424 [base+index]
12425
12426 The first and last case may be avoidable by explicitly coding the zero in
12427 memory address, but I don't have AMD-K6 machine handy to check this
12428 theory. */
12429
12430 if (TARGET_K6
12431 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12432 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12433 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12434 cost += 10;
12435
12436 return cost;
12437 }
12438 \f
12439 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12440 this is used for to form addresses to local data when -fPIC is in
12441 use. */
12442
12443 static bool
12444 darwin_local_data_pic (rtx disp)
12445 {
12446 return (GET_CODE (disp) == UNSPEC
12447 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12448 }
12449
12450 /* Determine if a given RTX is a valid constant. We already know this
12451 satisfies CONSTANT_P. */
12452
12453 static bool
12454 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12455 {
12456 switch (GET_CODE (x))
12457 {
12458 case CONST:
12459 x = XEXP (x, 0);
12460
12461 if (GET_CODE (x) == PLUS)
12462 {
12463 if (!CONST_INT_P (XEXP (x, 1)))
12464 return false;
12465 x = XEXP (x, 0);
12466 }
12467
12468 if (TARGET_MACHO && darwin_local_data_pic (x))
12469 return true;
12470
12471 /* Only some unspecs are valid as "constants". */
12472 if (GET_CODE (x) == UNSPEC)
12473 switch (XINT (x, 1))
12474 {
12475 case UNSPEC_GOT:
12476 case UNSPEC_GOTOFF:
12477 case UNSPEC_PLTOFF:
12478 return TARGET_64BIT;
12479 case UNSPEC_TPOFF:
12480 case UNSPEC_NTPOFF:
12481 x = XVECEXP (x, 0, 0);
12482 return (GET_CODE (x) == SYMBOL_REF
12483 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12484 case UNSPEC_DTPOFF:
12485 x = XVECEXP (x, 0, 0);
12486 return (GET_CODE (x) == SYMBOL_REF
12487 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12488 default:
12489 return false;
12490 }
12491
12492 /* We must have drilled down to a symbol. */
12493 if (GET_CODE (x) == LABEL_REF)
12494 return true;
12495 if (GET_CODE (x) != SYMBOL_REF)
12496 return false;
12497 /* FALLTHRU */
12498
12499 case SYMBOL_REF:
12500 /* TLS symbols are never valid. */
12501 if (SYMBOL_REF_TLS_MODEL (x))
12502 return false;
12503
12504 /* DLLIMPORT symbols are never valid. */
12505 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12506 && SYMBOL_REF_DLLIMPORT_P (x))
12507 return false;
12508
12509 #if TARGET_MACHO
12510 /* mdynamic-no-pic */
12511 if (MACHO_DYNAMIC_NO_PIC_P)
12512 return machopic_symbol_defined_p (x);
12513 #endif
12514 break;
12515
12516 case CONST_DOUBLE:
12517 if (GET_MODE (x) == TImode
12518 && x != CONST0_RTX (TImode)
12519 && !TARGET_64BIT)
12520 return false;
12521 break;
12522
12523 case CONST_VECTOR:
12524 if (!standard_sse_constant_p (x))
12525 return false;
12526
12527 default:
12528 break;
12529 }
12530
12531 /* Otherwise we handle everything else in the move patterns. */
12532 return true;
12533 }
12534
12535 /* Determine if it's legal to put X into the constant pool. This
12536 is not possible for the address of thread-local symbols, which
12537 is checked above. */
12538
12539 static bool
12540 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12541 {
12542 /* We can always put integral constants and vectors in memory. */
12543 switch (GET_CODE (x))
12544 {
12545 case CONST_INT:
12546 case CONST_DOUBLE:
12547 case CONST_VECTOR:
12548 return false;
12549
12550 default:
12551 break;
12552 }
12553 return !ix86_legitimate_constant_p (mode, x);
12554 }
12555
12556 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12557 otherwise zero. */
12558
12559 static bool
12560 is_imported_p (rtx x)
12561 {
12562 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12563 || GET_CODE (x) != SYMBOL_REF)
12564 return false;
12565
12566 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12567 }
12568
12569
12570 /* Nonzero if the constant value X is a legitimate general operand
12571 when generating PIC code. It is given that flag_pic is on and
12572 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12573
12574 bool
12575 legitimate_pic_operand_p (rtx x)
12576 {
12577 rtx inner;
12578
12579 switch (GET_CODE (x))
12580 {
12581 case CONST:
12582 inner = XEXP (x, 0);
12583 if (GET_CODE (inner) == PLUS
12584 && CONST_INT_P (XEXP (inner, 1)))
12585 inner = XEXP (inner, 0);
12586
12587 /* Only some unspecs are valid as "constants". */
12588 if (GET_CODE (inner) == UNSPEC)
12589 switch (XINT (inner, 1))
12590 {
12591 case UNSPEC_GOT:
12592 case UNSPEC_GOTOFF:
12593 case UNSPEC_PLTOFF:
12594 return TARGET_64BIT;
12595 case UNSPEC_TPOFF:
12596 x = XVECEXP (inner, 0, 0);
12597 return (GET_CODE (x) == SYMBOL_REF
12598 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12599 case UNSPEC_MACHOPIC_OFFSET:
12600 return legitimate_pic_address_disp_p (x);
12601 default:
12602 return false;
12603 }
12604 /* FALLTHRU */
12605
12606 case SYMBOL_REF:
12607 case LABEL_REF:
12608 return legitimate_pic_address_disp_p (x);
12609
12610 default:
12611 return true;
12612 }
12613 }
12614
12615 /* Determine if a given CONST RTX is a valid memory displacement
12616 in PIC mode. */
12617
12618 bool
12619 legitimate_pic_address_disp_p (rtx disp)
12620 {
12621 bool saw_plus;
12622
12623 /* In 64bit mode we can allow direct addresses of symbols and labels
12624 when they are not dynamic symbols. */
12625 if (TARGET_64BIT)
12626 {
12627 rtx op0 = disp, op1;
12628
12629 switch (GET_CODE (disp))
12630 {
12631 case LABEL_REF:
12632 return true;
12633
12634 case CONST:
12635 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12636 break;
12637 op0 = XEXP (XEXP (disp, 0), 0);
12638 op1 = XEXP (XEXP (disp, 0), 1);
12639 if (!CONST_INT_P (op1)
12640 || INTVAL (op1) >= 16*1024*1024
12641 || INTVAL (op1) < -16*1024*1024)
12642 break;
12643 if (GET_CODE (op0) == LABEL_REF)
12644 return true;
12645 if (GET_CODE (op0) == CONST
12646 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12647 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12648 return true;
12649 if (GET_CODE (op0) == UNSPEC
12650 && XINT (op0, 1) == UNSPEC_PCREL)
12651 return true;
12652 if (GET_CODE (op0) != SYMBOL_REF)
12653 break;
12654 /* FALLTHRU */
12655
12656 case SYMBOL_REF:
12657 /* TLS references should always be enclosed in UNSPEC.
12658 The dllimported symbol needs always to be resolved. */
12659 if (SYMBOL_REF_TLS_MODEL (op0)
12660 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12661 return false;
12662
12663 if (TARGET_PECOFF)
12664 {
12665 if (is_imported_p (op0))
12666 return true;
12667
12668 if (SYMBOL_REF_FAR_ADDR_P (op0)
12669 || !SYMBOL_REF_LOCAL_P (op0))
12670 break;
12671
12672 /* Function-symbols need to be resolved only for
12673 large-model.
12674 For the small-model we don't need to resolve anything
12675 here. */
12676 if ((ix86_cmodel != CM_LARGE_PIC
12677 && SYMBOL_REF_FUNCTION_P (op0))
12678 || ix86_cmodel == CM_SMALL_PIC)
12679 return true;
12680 /* Non-external symbols don't need to be resolved for
12681 large, and medium-model. */
12682 if ((ix86_cmodel == CM_LARGE_PIC
12683 || ix86_cmodel == CM_MEDIUM_PIC)
12684 && !SYMBOL_REF_EXTERNAL_P (op0))
12685 return true;
12686 }
12687 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12688 && SYMBOL_REF_LOCAL_P (op0)
12689 && ix86_cmodel != CM_LARGE_PIC)
12690 return true;
12691 break;
12692
12693 default:
12694 break;
12695 }
12696 }
12697 if (GET_CODE (disp) != CONST)
12698 return false;
12699 disp = XEXP (disp, 0);
12700
12701 if (TARGET_64BIT)
12702 {
12703 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12704 of GOT tables. We should not need these anyway. */
12705 if (GET_CODE (disp) != UNSPEC
12706 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12707 && XINT (disp, 1) != UNSPEC_GOTOFF
12708 && XINT (disp, 1) != UNSPEC_PCREL
12709 && XINT (disp, 1) != UNSPEC_PLTOFF))
12710 return false;
12711
12712 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12713 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12714 return false;
12715 return true;
12716 }
12717
12718 saw_plus = false;
12719 if (GET_CODE (disp) == PLUS)
12720 {
12721 if (!CONST_INT_P (XEXP (disp, 1)))
12722 return false;
12723 disp = XEXP (disp, 0);
12724 saw_plus = true;
12725 }
12726
12727 if (TARGET_MACHO && darwin_local_data_pic (disp))
12728 return true;
12729
12730 if (GET_CODE (disp) != UNSPEC)
12731 return false;
12732
12733 switch (XINT (disp, 1))
12734 {
12735 case UNSPEC_GOT:
12736 if (saw_plus)
12737 return false;
12738 /* We need to check for both symbols and labels because VxWorks loads
12739 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12740 details. */
12741 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12742 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12743 case UNSPEC_GOTOFF:
12744 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12745 While ABI specify also 32bit relocation but we don't produce it in
12746 small PIC model at all. */
12747 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12748 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12749 && !TARGET_64BIT)
12750 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12751 return false;
12752 case UNSPEC_GOTTPOFF:
12753 case UNSPEC_GOTNTPOFF:
12754 case UNSPEC_INDNTPOFF:
12755 if (saw_plus)
12756 return false;
12757 disp = XVECEXP (disp, 0, 0);
12758 return (GET_CODE (disp) == SYMBOL_REF
12759 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12760 case UNSPEC_NTPOFF:
12761 disp = XVECEXP (disp, 0, 0);
12762 return (GET_CODE (disp) == SYMBOL_REF
12763 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12764 case UNSPEC_DTPOFF:
12765 disp = XVECEXP (disp, 0, 0);
12766 return (GET_CODE (disp) == SYMBOL_REF
12767 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12768 }
12769
12770 return false;
12771 }
12772
12773 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12774 replace the input X, or the original X if no replacement is called for.
12775 The output parameter *WIN is 1 if the calling macro should goto WIN,
12776 0 if it should not. */
12777
12778 bool
12779 ix86_legitimize_reload_address (rtx x,
12780 enum machine_mode mode ATTRIBUTE_UNUSED,
12781 int opnum, int type,
12782 int ind_levels ATTRIBUTE_UNUSED)
12783 {
12784 /* Reload can generate:
12785
12786 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12787 (reg:DI 97))
12788 (reg:DI 2 cx))
12789
12790 This RTX is rejected from ix86_legitimate_address_p due to
12791 non-strictness of base register 97. Following this rejection,
12792 reload pushes all three components into separate registers,
12793 creating invalid memory address RTX.
12794
12795 Following code reloads only the invalid part of the
12796 memory address RTX. */
12797
12798 if (GET_CODE (x) == PLUS
12799 && REG_P (XEXP (x, 1))
12800 && GET_CODE (XEXP (x, 0)) == PLUS
12801 && REG_P (XEXP (XEXP (x, 0), 1)))
12802 {
12803 rtx base, index;
12804 bool something_reloaded = false;
12805
12806 base = XEXP (XEXP (x, 0), 1);
12807 if (!REG_OK_FOR_BASE_STRICT_P (base))
12808 {
12809 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12810 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12811 opnum, (enum reload_type) type);
12812 something_reloaded = true;
12813 }
12814
12815 index = XEXP (x, 1);
12816 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12817 {
12818 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12819 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12820 opnum, (enum reload_type) type);
12821 something_reloaded = true;
12822 }
12823
12824 gcc_assert (something_reloaded);
12825 return true;
12826 }
12827
12828 return false;
12829 }
12830
12831 /* Determine if op is suitable RTX for an address register.
12832 Return naked register if a register or a register subreg is
12833 found, otherwise return NULL_RTX. */
12834
12835 static rtx
12836 ix86_validate_address_register (rtx op)
12837 {
12838 enum machine_mode mode = GET_MODE (op);
12839
12840 /* Only SImode or DImode registers can form the address. */
12841 if (mode != SImode && mode != DImode)
12842 return NULL_RTX;
12843
12844 if (REG_P (op))
12845 return op;
12846 else if (GET_CODE (op) == SUBREG)
12847 {
12848 rtx reg = SUBREG_REG (op);
12849
12850 if (!REG_P (reg))
12851 return NULL_RTX;
12852
12853 mode = GET_MODE (reg);
12854
12855 /* Don't allow SUBREGs that span more than a word. It can
12856 lead to spill failures when the register is one word out
12857 of a two word structure. */
12858 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12859 return NULL_RTX;
12860
12861 /* Allow only SUBREGs of non-eliminable hard registers. */
12862 if (register_no_elim_operand (reg, mode))
12863 return reg;
12864 }
12865
12866 /* Op is not a register. */
12867 return NULL_RTX;
12868 }
12869
12870 /* Recognizes RTL expressions that are valid memory addresses for an
12871 instruction. The MODE argument is the machine mode for the MEM
12872 expression that wants to use this address.
12873
12874 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12875 convert common non-canonical forms to canonical form so that they will
12876 be recognized. */
12877
12878 static bool
12879 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12880 rtx addr, bool strict)
12881 {
12882 struct ix86_address parts;
12883 rtx base, index, disp;
12884 HOST_WIDE_INT scale;
12885 enum ix86_address_seg seg;
12886
12887 if (ix86_decompose_address (addr, &parts) <= 0)
12888 /* Decomposition failed. */
12889 return false;
12890
12891 base = parts.base;
12892 index = parts.index;
12893 disp = parts.disp;
12894 scale = parts.scale;
12895 seg = parts.seg;
12896
12897 /* Validate base register. */
12898 if (base)
12899 {
12900 rtx reg = ix86_validate_address_register (base);
12901
12902 if (reg == NULL_RTX)
12903 return false;
12904
12905 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12906 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12907 /* Base is not valid. */
12908 return false;
12909 }
12910
12911 /* Validate index register. */
12912 if (index)
12913 {
12914 rtx reg = ix86_validate_address_register (index);
12915
12916 if (reg == NULL_RTX)
12917 return false;
12918
12919 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12920 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12921 /* Index is not valid. */
12922 return false;
12923 }
12924
12925 /* Index and base should have the same mode. */
12926 if (base && index
12927 && GET_MODE (base) != GET_MODE (index))
12928 return false;
12929
12930 /* Address override works only on the (%reg) part of %fs:(%reg). */
12931 if (seg != SEG_DEFAULT
12932 && ((base && GET_MODE (base) != word_mode)
12933 || (index && GET_MODE (index) != word_mode)))
12934 return false;
12935
12936 /* Validate scale factor. */
12937 if (scale != 1)
12938 {
12939 if (!index)
12940 /* Scale without index. */
12941 return false;
12942
12943 if (scale != 2 && scale != 4 && scale != 8)
12944 /* Scale is not a valid multiplier. */
12945 return false;
12946 }
12947
12948 /* Validate displacement. */
12949 if (disp)
12950 {
12951 if (GET_CODE (disp) == CONST
12952 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12953 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12954 switch (XINT (XEXP (disp, 0), 1))
12955 {
12956 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12957 used. While ABI specify also 32bit relocations, we don't produce
12958 them at all and use IP relative instead. */
12959 case UNSPEC_GOT:
12960 case UNSPEC_GOTOFF:
12961 gcc_assert (flag_pic);
12962 if (!TARGET_64BIT)
12963 goto is_legitimate_pic;
12964
12965 /* 64bit address unspec. */
12966 return false;
12967
12968 case UNSPEC_GOTPCREL:
12969 case UNSPEC_PCREL:
12970 gcc_assert (flag_pic);
12971 goto is_legitimate_pic;
12972
12973 case UNSPEC_GOTTPOFF:
12974 case UNSPEC_GOTNTPOFF:
12975 case UNSPEC_INDNTPOFF:
12976 case UNSPEC_NTPOFF:
12977 case UNSPEC_DTPOFF:
12978 break;
12979
12980 case UNSPEC_STACK_CHECK:
12981 gcc_assert (flag_split_stack);
12982 break;
12983
12984 default:
12985 /* Invalid address unspec. */
12986 return false;
12987 }
12988
12989 else if (SYMBOLIC_CONST (disp)
12990 && (flag_pic
12991 || (TARGET_MACHO
12992 #if TARGET_MACHO
12993 && MACHOPIC_INDIRECT
12994 && !machopic_operand_p (disp)
12995 #endif
12996 )))
12997 {
12998
12999 is_legitimate_pic:
13000 if (TARGET_64BIT && (index || base))
13001 {
13002 /* foo@dtpoff(%rX) is ok. */
13003 if (GET_CODE (disp) != CONST
13004 || GET_CODE (XEXP (disp, 0)) != PLUS
13005 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13006 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13007 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13008 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13009 /* Non-constant pic memory reference. */
13010 return false;
13011 }
13012 else if ((!TARGET_MACHO || flag_pic)
13013 && ! legitimate_pic_address_disp_p (disp))
13014 /* Displacement is an invalid pic construct. */
13015 return false;
13016 #if TARGET_MACHO
13017 else if (MACHO_DYNAMIC_NO_PIC_P
13018 && !ix86_legitimate_constant_p (Pmode, disp))
13019 /* displacment must be referenced via non_lazy_pointer */
13020 return false;
13021 #endif
13022
13023 /* This code used to verify that a symbolic pic displacement
13024 includes the pic_offset_table_rtx register.
13025
13026 While this is good idea, unfortunately these constructs may
13027 be created by "adds using lea" optimization for incorrect
13028 code like:
13029
13030 int a;
13031 int foo(int i)
13032 {
13033 return *(&a+i);
13034 }
13035
13036 This code is nonsensical, but results in addressing
13037 GOT table with pic_offset_table_rtx base. We can't
13038 just refuse it easily, since it gets matched by
13039 "addsi3" pattern, that later gets split to lea in the
13040 case output register differs from input. While this
13041 can be handled by separate addsi pattern for this case
13042 that never results in lea, this seems to be easier and
13043 correct fix for crash to disable this test. */
13044 }
13045 else if (GET_CODE (disp) != LABEL_REF
13046 && !CONST_INT_P (disp)
13047 && (GET_CODE (disp) != CONST
13048 || !ix86_legitimate_constant_p (Pmode, disp))
13049 && (GET_CODE (disp) != SYMBOL_REF
13050 || !ix86_legitimate_constant_p (Pmode, disp)))
13051 /* Displacement is not constant. */
13052 return false;
13053 else if (TARGET_64BIT
13054 && !x86_64_immediate_operand (disp, VOIDmode))
13055 /* Displacement is out of range. */
13056 return false;
13057 /* In x32 mode, constant addresses are sign extended to 64bit, so
13058 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13059 else if (TARGET_X32 && !(index || base)
13060 && CONST_INT_P (disp)
13061 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13062 return false;
13063 }
13064
13065 /* Everything looks valid. */
13066 return true;
13067 }
13068
13069 /* Determine if a given RTX is a valid constant address. */
13070
13071 bool
13072 constant_address_p (rtx x)
13073 {
13074 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13075 }
13076 \f
13077 /* Return a unique alias set for the GOT. */
13078
13079 static alias_set_type
13080 ix86_GOT_alias_set (void)
13081 {
13082 static alias_set_type set = -1;
13083 if (set == -1)
13084 set = new_alias_set ();
13085 return set;
13086 }
13087
13088 /* Return a legitimate reference for ORIG (an address) using the
13089 register REG. If REG is 0, a new pseudo is generated.
13090
13091 There are two types of references that must be handled:
13092
13093 1. Global data references must load the address from the GOT, via
13094 the PIC reg. An insn is emitted to do this load, and the reg is
13095 returned.
13096
13097 2. Static data references, constant pool addresses, and code labels
13098 compute the address as an offset from the GOT, whose base is in
13099 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13100 differentiate them from global data objects. The returned
13101 address is the PIC reg + an unspec constant.
13102
13103 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13104 reg also appears in the address. */
13105
13106 static rtx
13107 legitimize_pic_address (rtx orig, rtx reg)
13108 {
13109 rtx addr = orig;
13110 rtx new_rtx = orig;
13111
13112 #if TARGET_MACHO
13113 if (TARGET_MACHO && !TARGET_64BIT)
13114 {
13115 if (reg == 0)
13116 reg = gen_reg_rtx (Pmode);
13117 /* Use the generic Mach-O PIC machinery. */
13118 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13119 }
13120 #endif
13121
13122 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13123 {
13124 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13125 if (tmp)
13126 return tmp;
13127 }
13128
13129 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13130 new_rtx = addr;
13131 else if (TARGET_64BIT && !TARGET_PECOFF
13132 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13133 {
13134 rtx tmpreg;
13135 /* This symbol may be referenced via a displacement from the PIC
13136 base address (@GOTOFF). */
13137
13138 if (reload_in_progress)
13139 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13140 if (GET_CODE (addr) == CONST)
13141 addr = XEXP (addr, 0);
13142 if (GET_CODE (addr) == PLUS)
13143 {
13144 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13145 UNSPEC_GOTOFF);
13146 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13147 }
13148 else
13149 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13150 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13151 if (!reg)
13152 tmpreg = gen_reg_rtx (Pmode);
13153 else
13154 tmpreg = reg;
13155 emit_move_insn (tmpreg, new_rtx);
13156
13157 if (reg != 0)
13158 {
13159 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13160 tmpreg, 1, OPTAB_DIRECT);
13161 new_rtx = reg;
13162 }
13163 else
13164 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13165 }
13166 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13167 {
13168 /* This symbol may be referenced via a displacement from the PIC
13169 base address (@GOTOFF). */
13170
13171 if (reload_in_progress)
13172 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13173 if (GET_CODE (addr) == CONST)
13174 addr = XEXP (addr, 0);
13175 if (GET_CODE (addr) == PLUS)
13176 {
13177 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13178 UNSPEC_GOTOFF);
13179 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13180 }
13181 else
13182 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13183 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13184 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13185
13186 if (reg != 0)
13187 {
13188 emit_move_insn (reg, new_rtx);
13189 new_rtx = reg;
13190 }
13191 }
13192 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13193 /* We can't use @GOTOFF for text labels on VxWorks;
13194 see gotoff_operand. */
13195 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13196 {
13197 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13198 if (tmp)
13199 return tmp;
13200
13201 /* For x64 PE-COFF there is no GOT table. So we use address
13202 directly. */
13203 if (TARGET_64BIT && TARGET_PECOFF)
13204 {
13205 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13206 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13207
13208 if (reg == 0)
13209 reg = gen_reg_rtx (Pmode);
13210 emit_move_insn (reg, new_rtx);
13211 new_rtx = reg;
13212 }
13213 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13214 {
13215 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13216 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13217 new_rtx = gen_const_mem (Pmode, new_rtx);
13218 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13219
13220 if (reg == 0)
13221 reg = gen_reg_rtx (Pmode);
13222 /* Use directly gen_movsi, otherwise the address is loaded
13223 into register for CSE. We don't want to CSE this addresses,
13224 instead we CSE addresses from the GOT table, so skip this. */
13225 emit_insn (gen_movsi (reg, new_rtx));
13226 new_rtx = reg;
13227 }
13228 else
13229 {
13230 /* This symbol must be referenced via a load from the
13231 Global Offset Table (@GOT). */
13232
13233 if (reload_in_progress)
13234 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13235 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13236 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13237 if (TARGET_64BIT)
13238 new_rtx = force_reg (Pmode, new_rtx);
13239 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13240 new_rtx = gen_const_mem (Pmode, new_rtx);
13241 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13242
13243 if (reg == 0)
13244 reg = gen_reg_rtx (Pmode);
13245 emit_move_insn (reg, new_rtx);
13246 new_rtx = reg;
13247 }
13248 }
13249 else
13250 {
13251 if (CONST_INT_P (addr)
13252 && !x86_64_immediate_operand (addr, VOIDmode))
13253 {
13254 if (reg)
13255 {
13256 emit_move_insn (reg, addr);
13257 new_rtx = reg;
13258 }
13259 else
13260 new_rtx = force_reg (Pmode, addr);
13261 }
13262 else if (GET_CODE (addr) == CONST)
13263 {
13264 addr = XEXP (addr, 0);
13265
13266 /* We must match stuff we generate before. Assume the only
13267 unspecs that can get here are ours. Not that we could do
13268 anything with them anyway.... */
13269 if (GET_CODE (addr) == UNSPEC
13270 || (GET_CODE (addr) == PLUS
13271 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13272 return orig;
13273 gcc_assert (GET_CODE (addr) == PLUS);
13274 }
13275 if (GET_CODE (addr) == PLUS)
13276 {
13277 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13278
13279 /* Check first to see if this is a constant offset from a @GOTOFF
13280 symbol reference. */
13281 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13282 && CONST_INT_P (op1))
13283 {
13284 if (!TARGET_64BIT)
13285 {
13286 if (reload_in_progress)
13287 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13288 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13289 UNSPEC_GOTOFF);
13290 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13291 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13292 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13293
13294 if (reg != 0)
13295 {
13296 emit_move_insn (reg, new_rtx);
13297 new_rtx = reg;
13298 }
13299 }
13300 else
13301 {
13302 if (INTVAL (op1) < -16*1024*1024
13303 || INTVAL (op1) >= 16*1024*1024)
13304 {
13305 if (!x86_64_immediate_operand (op1, Pmode))
13306 op1 = force_reg (Pmode, op1);
13307 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13308 }
13309 }
13310 }
13311 else
13312 {
13313 rtx base = legitimize_pic_address (op0, reg);
13314 enum machine_mode mode = GET_MODE (base);
13315 new_rtx
13316 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13317
13318 if (CONST_INT_P (new_rtx))
13319 {
13320 if (INTVAL (new_rtx) < -16*1024*1024
13321 || INTVAL (new_rtx) >= 16*1024*1024)
13322 {
13323 if (!x86_64_immediate_operand (new_rtx, mode))
13324 new_rtx = force_reg (mode, new_rtx);
13325 new_rtx
13326 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13327 }
13328 else
13329 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13330 }
13331 else
13332 {
13333 if (GET_CODE (new_rtx) == PLUS
13334 && CONSTANT_P (XEXP (new_rtx, 1)))
13335 {
13336 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13337 new_rtx = XEXP (new_rtx, 1);
13338 }
13339 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13340 }
13341 }
13342 }
13343 }
13344 return new_rtx;
13345 }
13346 \f
13347 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13348
13349 static rtx
13350 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13351 {
13352 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13353
13354 if (GET_MODE (tp) != tp_mode)
13355 {
13356 gcc_assert (GET_MODE (tp) == SImode);
13357 gcc_assert (tp_mode == DImode);
13358
13359 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13360 }
13361
13362 if (to_reg)
13363 tp = copy_to_mode_reg (tp_mode, tp);
13364
13365 return tp;
13366 }
13367
13368 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13369
13370 static GTY(()) rtx ix86_tls_symbol;
13371
13372 static rtx
13373 ix86_tls_get_addr (void)
13374 {
13375 if (!ix86_tls_symbol)
13376 {
13377 const char *sym
13378 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13379 ? "___tls_get_addr" : "__tls_get_addr");
13380
13381 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13382 }
13383
13384 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13385 {
13386 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13387 UNSPEC_PLTOFF);
13388 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13389 gen_rtx_CONST (Pmode, unspec));
13390 }
13391
13392 return ix86_tls_symbol;
13393 }
13394
13395 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13396
13397 static GTY(()) rtx ix86_tls_module_base_symbol;
13398
13399 rtx
13400 ix86_tls_module_base (void)
13401 {
13402 if (!ix86_tls_module_base_symbol)
13403 {
13404 ix86_tls_module_base_symbol
13405 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13406
13407 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13408 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13409 }
13410
13411 return ix86_tls_module_base_symbol;
13412 }
13413
13414 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13415 false if we expect this to be used for a memory address and true if
13416 we expect to load the address into a register. */
13417
13418 static rtx
13419 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13420 {
13421 rtx dest, base, off;
13422 rtx pic = NULL_RTX, tp = NULL_RTX;
13423 enum machine_mode tp_mode = Pmode;
13424 int type;
13425
13426 /* Fall back to global dynamic model if tool chain cannot support local
13427 dynamic. */
13428 if (TARGET_SUN_TLS && !TARGET_64BIT
13429 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13430 && model == TLS_MODEL_LOCAL_DYNAMIC)
13431 model = TLS_MODEL_GLOBAL_DYNAMIC;
13432
13433 switch (model)
13434 {
13435 case TLS_MODEL_GLOBAL_DYNAMIC:
13436 dest = gen_reg_rtx (Pmode);
13437
13438 if (!TARGET_64BIT)
13439 {
13440 if (flag_pic && !TARGET_PECOFF)
13441 pic = pic_offset_table_rtx;
13442 else
13443 {
13444 pic = gen_reg_rtx (Pmode);
13445 emit_insn (gen_set_got (pic));
13446 }
13447 }
13448
13449 if (TARGET_GNU2_TLS)
13450 {
13451 if (TARGET_64BIT)
13452 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13453 else
13454 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13455
13456 tp = get_thread_pointer (Pmode, true);
13457 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13458
13459 if (GET_MODE (x) != Pmode)
13460 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13461
13462 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13463 }
13464 else
13465 {
13466 rtx caddr = ix86_tls_get_addr ();
13467
13468 if (TARGET_64BIT)
13469 {
13470 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13471 rtx insns;
13472
13473 start_sequence ();
13474 emit_call_insn
13475 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13476 insns = get_insns ();
13477 end_sequence ();
13478
13479 if (GET_MODE (x) != Pmode)
13480 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13481
13482 RTL_CONST_CALL_P (insns) = 1;
13483 emit_libcall_block (insns, dest, rax, x);
13484 }
13485 else
13486 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13487 }
13488 break;
13489
13490 case TLS_MODEL_LOCAL_DYNAMIC:
13491 base = gen_reg_rtx (Pmode);
13492
13493 if (!TARGET_64BIT)
13494 {
13495 if (flag_pic)
13496 pic = pic_offset_table_rtx;
13497 else
13498 {
13499 pic = gen_reg_rtx (Pmode);
13500 emit_insn (gen_set_got (pic));
13501 }
13502 }
13503
13504 if (TARGET_GNU2_TLS)
13505 {
13506 rtx tmp = ix86_tls_module_base ();
13507
13508 if (TARGET_64BIT)
13509 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13510 else
13511 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13512
13513 tp = get_thread_pointer (Pmode, true);
13514 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13515 gen_rtx_MINUS (Pmode, tmp, tp));
13516 }
13517 else
13518 {
13519 rtx caddr = ix86_tls_get_addr ();
13520
13521 if (TARGET_64BIT)
13522 {
13523 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13524 rtx insns, eqv;
13525
13526 start_sequence ();
13527 emit_call_insn
13528 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13529 insns = get_insns ();
13530 end_sequence ();
13531
13532 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13533 share the LD_BASE result with other LD model accesses. */
13534 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13535 UNSPEC_TLS_LD_BASE);
13536
13537 RTL_CONST_CALL_P (insns) = 1;
13538 emit_libcall_block (insns, base, rax, eqv);
13539 }
13540 else
13541 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13542 }
13543
13544 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13545 off = gen_rtx_CONST (Pmode, off);
13546
13547 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13548
13549 if (TARGET_GNU2_TLS)
13550 {
13551 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13552
13553 if (GET_MODE (x) != Pmode)
13554 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13555
13556 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13557 }
13558 break;
13559
13560 case TLS_MODEL_INITIAL_EXEC:
13561 if (TARGET_64BIT)
13562 {
13563 if (TARGET_SUN_TLS && !TARGET_X32)
13564 {
13565 /* The Sun linker took the AMD64 TLS spec literally
13566 and can only handle %rax as destination of the
13567 initial executable code sequence. */
13568
13569 dest = gen_reg_rtx (DImode);
13570 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13571 return dest;
13572 }
13573
13574 /* Generate DImode references to avoid %fs:(%reg32)
13575 problems and linker IE->LE relaxation bug. */
13576 tp_mode = DImode;
13577 pic = NULL;
13578 type = UNSPEC_GOTNTPOFF;
13579 }
13580 else if (flag_pic)
13581 {
13582 if (reload_in_progress)
13583 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13584 pic = pic_offset_table_rtx;
13585 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13586 }
13587 else if (!TARGET_ANY_GNU_TLS)
13588 {
13589 pic = gen_reg_rtx (Pmode);
13590 emit_insn (gen_set_got (pic));
13591 type = UNSPEC_GOTTPOFF;
13592 }
13593 else
13594 {
13595 pic = NULL;
13596 type = UNSPEC_INDNTPOFF;
13597 }
13598
13599 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13600 off = gen_rtx_CONST (tp_mode, off);
13601 if (pic)
13602 off = gen_rtx_PLUS (tp_mode, pic, off);
13603 off = gen_const_mem (tp_mode, off);
13604 set_mem_alias_set (off, ix86_GOT_alias_set ());
13605
13606 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13607 {
13608 base = get_thread_pointer (tp_mode,
13609 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13610 off = force_reg (tp_mode, off);
13611 return gen_rtx_PLUS (tp_mode, base, off);
13612 }
13613 else
13614 {
13615 base = get_thread_pointer (Pmode, true);
13616 dest = gen_reg_rtx (Pmode);
13617 emit_insn (ix86_gen_sub3 (dest, base, off));
13618 }
13619 break;
13620
13621 case TLS_MODEL_LOCAL_EXEC:
13622 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13623 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13624 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13625 off = gen_rtx_CONST (Pmode, off);
13626
13627 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13628 {
13629 base = get_thread_pointer (Pmode,
13630 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13631 return gen_rtx_PLUS (Pmode, base, off);
13632 }
13633 else
13634 {
13635 base = get_thread_pointer (Pmode, true);
13636 dest = gen_reg_rtx (Pmode);
13637 emit_insn (ix86_gen_sub3 (dest, base, off));
13638 }
13639 break;
13640
13641 default:
13642 gcc_unreachable ();
13643 }
13644
13645 return dest;
13646 }
13647
13648 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13649 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13650 unique refptr-DECL symbol corresponding to symbol DECL. */
13651
13652 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13653 htab_t dllimport_map;
13654
13655 static tree
13656 get_dllimport_decl (tree decl, bool beimport)
13657 {
13658 struct tree_map *h, in;
13659 void **loc;
13660 const char *name;
13661 const char *prefix;
13662 size_t namelen, prefixlen;
13663 char *imp_name;
13664 tree to;
13665 rtx rtl;
13666
13667 if (!dllimport_map)
13668 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13669
13670 in.hash = htab_hash_pointer (decl);
13671 in.base.from = decl;
13672 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13673 h = (struct tree_map *) *loc;
13674 if (h)
13675 return h->to;
13676
13677 *loc = h = ggc_alloc<tree_map> ();
13678 h->hash = in.hash;
13679 h->base.from = decl;
13680 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13681 VAR_DECL, NULL, ptr_type_node);
13682 DECL_ARTIFICIAL (to) = 1;
13683 DECL_IGNORED_P (to) = 1;
13684 DECL_EXTERNAL (to) = 1;
13685 TREE_READONLY (to) = 1;
13686
13687 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13688 name = targetm.strip_name_encoding (name);
13689 if (beimport)
13690 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13691 ? "*__imp_" : "*__imp__";
13692 else
13693 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13694 namelen = strlen (name);
13695 prefixlen = strlen (prefix);
13696 imp_name = (char *) alloca (namelen + prefixlen + 1);
13697 memcpy (imp_name, prefix, prefixlen);
13698 memcpy (imp_name + prefixlen, name, namelen + 1);
13699
13700 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13701 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13702 SET_SYMBOL_REF_DECL (rtl, to);
13703 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13704 if (!beimport)
13705 {
13706 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13707 #ifdef SUB_TARGET_RECORD_STUB
13708 SUB_TARGET_RECORD_STUB (name);
13709 #endif
13710 }
13711
13712 rtl = gen_const_mem (Pmode, rtl);
13713 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13714
13715 SET_DECL_RTL (to, rtl);
13716 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13717
13718 return to;
13719 }
13720
13721 /* Expand SYMBOL into its corresponding far-addresse symbol.
13722 WANT_REG is true if we require the result be a register. */
13723
13724 static rtx
13725 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13726 {
13727 tree imp_decl;
13728 rtx x;
13729
13730 gcc_assert (SYMBOL_REF_DECL (symbol));
13731 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13732
13733 x = DECL_RTL (imp_decl);
13734 if (want_reg)
13735 x = force_reg (Pmode, x);
13736 return x;
13737 }
13738
13739 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13740 true if we require the result be a register. */
13741
13742 static rtx
13743 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13744 {
13745 tree imp_decl;
13746 rtx x;
13747
13748 gcc_assert (SYMBOL_REF_DECL (symbol));
13749 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13750
13751 x = DECL_RTL (imp_decl);
13752 if (want_reg)
13753 x = force_reg (Pmode, x);
13754 return x;
13755 }
13756
13757 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13758 is true if we require the result be a register. */
13759
13760 static rtx
13761 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13762 {
13763 if (!TARGET_PECOFF)
13764 return NULL_RTX;
13765
13766 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13767 {
13768 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13769 return legitimize_dllimport_symbol (addr, inreg);
13770 if (GET_CODE (addr) == CONST
13771 && GET_CODE (XEXP (addr, 0)) == PLUS
13772 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13773 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13774 {
13775 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13776 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13777 }
13778 }
13779
13780 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13781 return NULL_RTX;
13782 if (GET_CODE (addr) == SYMBOL_REF
13783 && !is_imported_p (addr)
13784 && SYMBOL_REF_EXTERNAL_P (addr)
13785 && SYMBOL_REF_DECL (addr))
13786 return legitimize_pe_coff_extern_decl (addr, inreg);
13787
13788 if (GET_CODE (addr) == CONST
13789 && GET_CODE (XEXP (addr, 0)) == PLUS
13790 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13791 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13792 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13793 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13794 {
13795 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13796 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13797 }
13798 return NULL_RTX;
13799 }
13800
13801 /* Try machine-dependent ways of modifying an illegitimate address
13802 to be legitimate. If we find one, return the new, valid address.
13803 This macro is used in only one place: `memory_address' in explow.c.
13804
13805 OLDX is the address as it was before break_out_memory_refs was called.
13806 In some cases it is useful to look at this to decide what needs to be done.
13807
13808 It is always safe for this macro to do nothing. It exists to recognize
13809 opportunities to optimize the output.
13810
13811 For the 80386, we handle X+REG by loading X into a register R and
13812 using R+REG. R will go in a general reg and indexing will be used.
13813 However, if REG is a broken-out memory address or multiplication,
13814 nothing needs to be done because REG can certainly go in a general reg.
13815
13816 When -fpic is used, special handling is needed for symbolic references.
13817 See comments by legitimize_pic_address in i386.c for details. */
13818
13819 static rtx
13820 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13821 enum machine_mode mode)
13822 {
13823 int changed = 0;
13824 unsigned log;
13825
13826 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13827 if (log)
13828 return legitimize_tls_address (x, (enum tls_model) log, false);
13829 if (GET_CODE (x) == CONST
13830 && GET_CODE (XEXP (x, 0)) == PLUS
13831 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13832 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13833 {
13834 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13835 (enum tls_model) log, false);
13836 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13837 }
13838
13839 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13840 {
13841 rtx tmp = legitimize_pe_coff_symbol (x, true);
13842 if (tmp)
13843 return tmp;
13844 }
13845
13846 if (flag_pic && SYMBOLIC_CONST (x))
13847 return legitimize_pic_address (x, 0);
13848
13849 #if TARGET_MACHO
13850 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13851 return machopic_indirect_data_reference (x, 0);
13852 #endif
13853
13854 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13855 if (GET_CODE (x) == ASHIFT
13856 && CONST_INT_P (XEXP (x, 1))
13857 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13858 {
13859 changed = 1;
13860 log = INTVAL (XEXP (x, 1));
13861 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13862 GEN_INT (1 << log));
13863 }
13864
13865 if (GET_CODE (x) == PLUS)
13866 {
13867 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13868
13869 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13870 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13871 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13872 {
13873 changed = 1;
13874 log = INTVAL (XEXP (XEXP (x, 0), 1));
13875 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13876 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13877 GEN_INT (1 << log));
13878 }
13879
13880 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13881 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13882 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13883 {
13884 changed = 1;
13885 log = INTVAL (XEXP (XEXP (x, 1), 1));
13886 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13887 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13888 GEN_INT (1 << log));
13889 }
13890
13891 /* Put multiply first if it isn't already. */
13892 if (GET_CODE (XEXP (x, 1)) == MULT)
13893 {
13894 rtx tmp = XEXP (x, 0);
13895 XEXP (x, 0) = XEXP (x, 1);
13896 XEXP (x, 1) = tmp;
13897 changed = 1;
13898 }
13899
13900 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13901 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13902 created by virtual register instantiation, register elimination, and
13903 similar optimizations. */
13904 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13905 {
13906 changed = 1;
13907 x = gen_rtx_PLUS (Pmode,
13908 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13909 XEXP (XEXP (x, 1), 0)),
13910 XEXP (XEXP (x, 1), 1));
13911 }
13912
13913 /* Canonicalize
13914 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13915 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13916 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13917 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13918 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13919 && CONSTANT_P (XEXP (x, 1)))
13920 {
13921 rtx constant;
13922 rtx other = NULL_RTX;
13923
13924 if (CONST_INT_P (XEXP (x, 1)))
13925 {
13926 constant = XEXP (x, 1);
13927 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13928 }
13929 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13930 {
13931 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13932 other = XEXP (x, 1);
13933 }
13934 else
13935 constant = 0;
13936
13937 if (constant)
13938 {
13939 changed = 1;
13940 x = gen_rtx_PLUS (Pmode,
13941 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13942 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13943 plus_constant (Pmode, other,
13944 INTVAL (constant)));
13945 }
13946 }
13947
13948 if (changed && ix86_legitimate_address_p (mode, x, false))
13949 return x;
13950
13951 if (GET_CODE (XEXP (x, 0)) == MULT)
13952 {
13953 changed = 1;
13954 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13955 }
13956
13957 if (GET_CODE (XEXP (x, 1)) == MULT)
13958 {
13959 changed = 1;
13960 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13961 }
13962
13963 if (changed
13964 && REG_P (XEXP (x, 1))
13965 && REG_P (XEXP (x, 0)))
13966 return x;
13967
13968 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13969 {
13970 changed = 1;
13971 x = legitimize_pic_address (x, 0);
13972 }
13973
13974 if (changed && ix86_legitimate_address_p (mode, x, false))
13975 return x;
13976
13977 if (REG_P (XEXP (x, 0)))
13978 {
13979 rtx temp = gen_reg_rtx (Pmode);
13980 rtx val = force_operand (XEXP (x, 1), temp);
13981 if (val != temp)
13982 {
13983 val = convert_to_mode (Pmode, val, 1);
13984 emit_move_insn (temp, val);
13985 }
13986
13987 XEXP (x, 1) = temp;
13988 return x;
13989 }
13990
13991 else if (REG_P (XEXP (x, 1)))
13992 {
13993 rtx temp = gen_reg_rtx (Pmode);
13994 rtx val = force_operand (XEXP (x, 0), temp);
13995 if (val != temp)
13996 {
13997 val = convert_to_mode (Pmode, val, 1);
13998 emit_move_insn (temp, val);
13999 }
14000
14001 XEXP (x, 0) = temp;
14002 return x;
14003 }
14004 }
14005
14006 return x;
14007 }
14008 \f
14009 /* Print an integer constant expression in assembler syntax. Addition
14010 and subtraction are the only arithmetic that may appear in these
14011 expressions. FILE is the stdio stream to write to, X is the rtx, and
14012 CODE is the operand print code from the output string. */
14013
14014 static void
14015 output_pic_addr_const (FILE *file, rtx x, int code)
14016 {
14017 char buf[256];
14018
14019 switch (GET_CODE (x))
14020 {
14021 case PC:
14022 gcc_assert (flag_pic);
14023 putc ('.', file);
14024 break;
14025
14026 case SYMBOL_REF:
14027 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14028 output_addr_const (file, x);
14029 else
14030 {
14031 const char *name = XSTR (x, 0);
14032
14033 /* Mark the decl as referenced so that cgraph will
14034 output the function. */
14035 if (SYMBOL_REF_DECL (x))
14036 mark_decl_referenced (SYMBOL_REF_DECL (x));
14037
14038 #if TARGET_MACHO
14039 if (MACHOPIC_INDIRECT
14040 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14041 name = machopic_indirection_name (x, /*stub_p=*/true);
14042 #endif
14043 assemble_name (file, name);
14044 }
14045 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14046 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14047 fputs ("@PLT", file);
14048 break;
14049
14050 case LABEL_REF:
14051 x = XEXP (x, 0);
14052 /* FALLTHRU */
14053 case CODE_LABEL:
14054 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14055 assemble_name (asm_out_file, buf);
14056 break;
14057
14058 case CONST_INT:
14059 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14060 break;
14061
14062 case CONST:
14063 /* This used to output parentheses around the expression,
14064 but that does not work on the 386 (either ATT or BSD assembler). */
14065 output_pic_addr_const (file, XEXP (x, 0), code);
14066 break;
14067
14068 case CONST_DOUBLE:
14069 if (GET_MODE (x) == VOIDmode)
14070 {
14071 /* We can use %d if the number is <32 bits and positive. */
14072 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14073 fprintf (file, "0x%lx%08lx",
14074 (unsigned long) CONST_DOUBLE_HIGH (x),
14075 (unsigned long) CONST_DOUBLE_LOW (x));
14076 else
14077 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14078 }
14079 else
14080 /* We can't handle floating point constants;
14081 TARGET_PRINT_OPERAND must handle them. */
14082 output_operand_lossage ("floating constant misused");
14083 break;
14084
14085 case PLUS:
14086 /* Some assemblers need integer constants to appear first. */
14087 if (CONST_INT_P (XEXP (x, 0)))
14088 {
14089 output_pic_addr_const (file, XEXP (x, 0), code);
14090 putc ('+', file);
14091 output_pic_addr_const (file, XEXP (x, 1), code);
14092 }
14093 else
14094 {
14095 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14096 output_pic_addr_const (file, XEXP (x, 1), code);
14097 putc ('+', file);
14098 output_pic_addr_const (file, XEXP (x, 0), code);
14099 }
14100 break;
14101
14102 case MINUS:
14103 if (!TARGET_MACHO)
14104 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14105 output_pic_addr_const (file, XEXP (x, 0), code);
14106 putc ('-', file);
14107 output_pic_addr_const (file, XEXP (x, 1), code);
14108 if (!TARGET_MACHO)
14109 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14110 break;
14111
14112 case UNSPEC:
14113 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14114 {
14115 bool f = i386_asm_output_addr_const_extra (file, x);
14116 gcc_assert (f);
14117 break;
14118 }
14119
14120 gcc_assert (XVECLEN (x, 0) == 1);
14121 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14122 switch (XINT (x, 1))
14123 {
14124 case UNSPEC_GOT:
14125 fputs ("@GOT", file);
14126 break;
14127 case UNSPEC_GOTOFF:
14128 fputs ("@GOTOFF", file);
14129 break;
14130 case UNSPEC_PLTOFF:
14131 fputs ("@PLTOFF", file);
14132 break;
14133 case UNSPEC_PCREL:
14134 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14135 "(%rip)" : "[rip]", file);
14136 break;
14137 case UNSPEC_GOTPCREL:
14138 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14139 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14140 break;
14141 case UNSPEC_GOTTPOFF:
14142 /* FIXME: This might be @TPOFF in Sun ld too. */
14143 fputs ("@gottpoff", file);
14144 break;
14145 case UNSPEC_TPOFF:
14146 fputs ("@tpoff", file);
14147 break;
14148 case UNSPEC_NTPOFF:
14149 if (TARGET_64BIT)
14150 fputs ("@tpoff", file);
14151 else
14152 fputs ("@ntpoff", file);
14153 break;
14154 case UNSPEC_DTPOFF:
14155 fputs ("@dtpoff", file);
14156 break;
14157 case UNSPEC_GOTNTPOFF:
14158 if (TARGET_64BIT)
14159 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14160 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14161 else
14162 fputs ("@gotntpoff", file);
14163 break;
14164 case UNSPEC_INDNTPOFF:
14165 fputs ("@indntpoff", file);
14166 break;
14167 #if TARGET_MACHO
14168 case UNSPEC_MACHOPIC_OFFSET:
14169 putc ('-', file);
14170 machopic_output_function_base_name (file);
14171 break;
14172 #endif
14173 default:
14174 output_operand_lossage ("invalid UNSPEC as operand");
14175 break;
14176 }
14177 break;
14178
14179 default:
14180 output_operand_lossage ("invalid expression as operand");
14181 }
14182 }
14183
14184 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14185 We need to emit DTP-relative relocations. */
14186
14187 static void ATTRIBUTE_UNUSED
14188 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14189 {
14190 fputs (ASM_LONG, file);
14191 output_addr_const (file, x);
14192 fputs ("@dtpoff", file);
14193 switch (size)
14194 {
14195 case 4:
14196 break;
14197 case 8:
14198 fputs (", 0", file);
14199 break;
14200 default:
14201 gcc_unreachable ();
14202 }
14203 }
14204
14205 /* Return true if X is a representation of the PIC register. This copes
14206 with calls from ix86_find_base_term, where the register might have
14207 been replaced by a cselib value. */
14208
14209 static bool
14210 ix86_pic_register_p (rtx x)
14211 {
14212 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14213 return (pic_offset_table_rtx
14214 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14215 else
14216 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14217 }
14218
14219 /* Helper function for ix86_delegitimize_address.
14220 Attempt to delegitimize TLS local-exec accesses. */
14221
14222 static rtx
14223 ix86_delegitimize_tls_address (rtx orig_x)
14224 {
14225 rtx x = orig_x, unspec;
14226 struct ix86_address addr;
14227
14228 if (!TARGET_TLS_DIRECT_SEG_REFS)
14229 return orig_x;
14230 if (MEM_P (x))
14231 x = XEXP (x, 0);
14232 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14233 return orig_x;
14234 if (ix86_decompose_address (x, &addr) == 0
14235 || addr.seg != DEFAULT_TLS_SEG_REG
14236 || addr.disp == NULL_RTX
14237 || GET_CODE (addr.disp) != CONST)
14238 return orig_x;
14239 unspec = XEXP (addr.disp, 0);
14240 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14241 unspec = XEXP (unspec, 0);
14242 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14243 return orig_x;
14244 x = XVECEXP (unspec, 0, 0);
14245 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14246 if (unspec != XEXP (addr.disp, 0))
14247 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14248 if (addr.index)
14249 {
14250 rtx idx = addr.index;
14251 if (addr.scale != 1)
14252 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14253 x = gen_rtx_PLUS (Pmode, idx, x);
14254 }
14255 if (addr.base)
14256 x = gen_rtx_PLUS (Pmode, addr.base, x);
14257 if (MEM_P (orig_x))
14258 x = replace_equiv_address_nv (orig_x, x);
14259 return x;
14260 }
14261
14262 /* In the name of slightly smaller debug output, and to cater to
14263 general assembler lossage, recognize PIC+GOTOFF and turn it back
14264 into a direct symbol reference.
14265
14266 On Darwin, this is necessary to avoid a crash, because Darwin
14267 has a different PIC label for each routine but the DWARF debugging
14268 information is not associated with any particular routine, so it's
14269 necessary to remove references to the PIC label from RTL stored by
14270 the DWARF output code. */
14271
14272 static rtx
14273 ix86_delegitimize_address (rtx x)
14274 {
14275 rtx orig_x = delegitimize_mem_from_attrs (x);
14276 /* addend is NULL or some rtx if x is something+GOTOFF where
14277 something doesn't include the PIC register. */
14278 rtx addend = NULL_RTX;
14279 /* reg_addend is NULL or a multiple of some register. */
14280 rtx reg_addend = NULL_RTX;
14281 /* const_addend is NULL or a const_int. */
14282 rtx const_addend = NULL_RTX;
14283 /* This is the result, or NULL. */
14284 rtx result = NULL_RTX;
14285
14286 x = orig_x;
14287
14288 if (MEM_P (x))
14289 x = XEXP (x, 0);
14290
14291 if (TARGET_64BIT)
14292 {
14293 if (GET_CODE (x) == CONST
14294 && GET_CODE (XEXP (x, 0)) == PLUS
14295 && GET_MODE (XEXP (x, 0)) == Pmode
14296 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14297 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14298 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14299 {
14300 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14301 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14302 if (MEM_P (orig_x))
14303 x = replace_equiv_address_nv (orig_x, x);
14304 return x;
14305 }
14306
14307 if (GET_CODE (x) == CONST
14308 && GET_CODE (XEXP (x, 0)) == UNSPEC
14309 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14310 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14311 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14312 {
14313 x = XVECEXP (XEXP (x, 0), 0, 0);
14314 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14315 {
14316 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14317 GET_MODE (x), 0);
14318 if (x == NULL_RTX)
14319 return orig_x;
14320 }
14321 return x;
14322 }
14323
14324 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14325 return ix86_delegitimize_tls_address (orig_x);
14326
14327 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14328 and -mcmodel=medium -fpic. */
14329 }
14330
14331 if (GET_CODE (x) != PLUS
14332 || GET_CODE (XEXP (x, 1)) != CONST)
14333 return ix86_delegitimize_tls_address (orig_x);
14334
14335 if (ix86_pic_register_p (XEXP (x, 0)))
14336 /* %ebx + GOT/GOTOFF */
14337 ;
14338 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14339 {
14340 /* %ebx + %reg * scale + GOT/GOTOFF */
14341 reg_addend = XEXP (x, 0);
14342 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14343 reg_addend = XEXP (reg_addend, 1);
14344 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14345 reg_addend = XEXP (reg_addend, 0);
14346 else
14347 {
14348 reg_addend = NULL_RTX;
14349 addend = XEXP (x, 0);
14350 }
14351 }
14352 else
14353 addend = XEXP (x, 0);
14354
14355 x = XEXP (XEXP (x, 1), 0);
14356 if (GET_CODE (x) == PLUS
14357 && CONST_INT_P (XEXP (x, 1)))
14358 {
14359 const_addend = XEXP (x, 1);
14360 x = XEXP (x, 0);
14361 }
14362
14363 if (GET_CODE (x) == UNSPEC
14364 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14365 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14366 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14367 && !MEM_P (orig_x) && !addend)))
14368 result = XVECEXP (x, 0, 0);
14369
14370 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14371 && !MEM_P (orig_x))
14372 result = XVECEXP (x, 0, 0);
14373
14374 if (! result)
14375 return ix86_delegitimize_tls_address (orig_x);
14376
14377 if (const_addend)
14378 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14379 if (reg_addend)
14380 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14381 if (addend)
14382 {
14383 /* If the rest of original X doesn't involve the PIC register, add
14384 addend and subtract pic_offset_table_rtx. This can happen e.g.
14385 for code like:
14386 leal (%ebx, %ecx, 4), %ecx
14387 ...
14388 movl foo@GOTOFF(%ecx), %edx
14389 in which case we return (%ecx - %ebx) + foo. */
14390 if (pic_offset_table_rtx)
14391 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14392 pic_offset_table_rtx),
14393 result);
14394 else
14395 return orig_x;
14396 }
14397 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14398 {
14399 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14400 if (result == NULL_RTX)
14401 return orig_x;
14402 }
14403 return result;
14404 }
14405
14406 /* If X is a machine specific address (i.e. a symbol or label being
14407 referenced as a displacement from the GOT implemented using an
14408 UNSPEC), then return the base term. Otherwise return X. */
14409
14410 rtx
14411 ix86_find_base_term (rtx x)
14412 {
14413 rtx term;
14414
14415 if (TARGET_64BIT)
14416 {
14417 if (GET_CODE (x) != CONST)
14418 return x;
14419 term = XEXP (x, 0);
14420 if (GET_CODE (term) == PLUS
14421 && (CONST_INT_P (XEXP (term, 1))
14422 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14423 term = XEXP (term, 0);
14424 if (GET_CODE (term) != UNSPEC
14425 || (XINT (term, 1) != UNSPEC_GOTPCREL
14426 && XINT (term, 1) != UNSPEC_PCREL))
14427 return x;
14428
14429 return XVECEXP (term, 0, 0);
14430 }
14431
14432 return ix86_delegitimize_address (x);
14433 }
14434 \f
14435 static void
14436 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14437 bool fp, FILE *file)
14438 {
14439 const char *suffix;
14440
14441 if (mode == CCFPmode || mode == CCFPUmode)
14442 {
14443 code = ix86_fp_compare_code_to_integer (code);
14444 mode = CCmode;
14445 }
14446 if (reverse)
14447 code = reverse_condition (code);
14448
14449 switch (code)
14450 {
14451 case EQ:
14452 switch (mode)
14453 {
14454 case CCAmode:
14455 suffix = "a";
14456 break;
14457
14458 case CCCmode:
14459 suffix = "c";
14460 break;
14461
14462 case CCOmode:
14463 suffix = "o";
14464 break;
14465
14466 case CCSmode:
14467 suffix = "s";
14468 break;
14469
14470 default:
14471 suffix = "e";
14472 }
14473 break;
14474 case NE:
14475 switch (mode)
14476 {
14477 case CCAmode:
14478 suffix = "na";
14479 break;
14480
14481 case CCCmode:
14482 suffix = "nc";
14483 break;
14484
14485 case CCOmode:
14486 suffix = "no";
14487 break;
14488
14489 case CCSmode:
14490 suffix = "ns";
14491 break;
14492
14493 default:
14494 suffix = "ne";
14495 }
14496 break;
14497 case GT:
14498 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14499 suffix = "g";
14500 break;
14501 case GTU:
14502 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14503 Those same assemblers have the same but opposite lossage on cmov. */
14504 if (mode == CCmode)
14505 suffix = fp ? "nbe" : "a";
14506 else
14507 gcc_unreachable ();
14508 break;
14509 case LT:
14510 switch (mode)
14511 {
14512 case CCNOmode:
14513 case CCGOCmode:
14514 suffix = "s";
14515 break;
14516
14517 case CCmode:
14518 case CCGCmode:
14519 suffix = "l";
14520 break;
14521
14522 default:
14523 gcc_unreachable ();
14524 }
14525 break;
14526 case LTU:
14527 if (mode == CCmode)
14528 suffix = "b";
14529 else if (mode == CCCmode)
14530 suffix = "c";
14531 else
14532 gcc_unreachable ();
14533 break;
14534 case GE:
14535 switch (mode)
14536 {
14537 case CCNOmode:
14538 case CCGOCmode:
14539 suffix = "ns";
14540 break;
14541
14542 case CCmode:
14543 case CCGCmode:
14544 suffix = "ge";
14545 break;
14546
14547 default:
14548 gcc_unreachable ();
14549 }
14550 break;
14551 case GEU:
14552 if (mode == CCmode)
14553 suffix = fp ? "nb" : "ae";
14554 else if (mode == CCCmode)
14555 suffix = "nc";
14556 else
14557 gcc_unreachable ();
14558 break;
14559 case LE:
14560 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14561 suffix = "le";
14562 break;
14563 case LEU:
14564 if (mode == CCmode)
14565 suffix = "be";
14566 else
14567 gcc_unreachable ();
14568 break;
14569 case UNORDERED:
14570 suffix = fp ? "u" : "p";
14571 break;
14572 case ORDERED:
14573 suffix = fp ? "nu" : "np";
14574 break;
14575 default:
14576 gcc_unreachable ();
14577 }
14578 fputs (suffix, file);
14579 }
14580
14581 /* Print the name of register X to FILE based on its machine mode and number.
14582 If CODE is 'w', pretend the mode is HImode.
14583 If CODE is 'b', pretend the mode is QImode.
14584 If CODE is 'k', pretend the mode is SImode.
14585 If CODE is 'q', pretend the mode is DImode.
14586 If CODE is 'x', pretend the mode is V4SFmode.
14587 If CODE is 't', pretend the mode is V8SFmode.
14588 If CODE is 'g', pretend the mode is V16SFmode.
14589 If CODE is 'h', pretend the reg is the 'high' byte register.
14590 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14591 If CODE is 'd', duplicate the operand for AVX instruction.
14592 */
14593
14594 void
14595 print_reg (rtx x, int code, FILE *file)
14596 {
14597 const char *reg;
14598 unsigned int regno;
14599 bool duplicated = code == 'd' && TARGET_AVX;
14600
14601 if (ASSEMBLER_DIALECT == ASM_ATT)
14602 putc ('%', file);
14603
14604 if (x == pc_rtx)
14605 {
14606 gcc_assert (TARGET_64BIT);
14607 fputs ("rip", file);
14608 return;
14609 }
14610
14611 regno = true_regnum (x);
14612 gcc_assert (regno != ARG_POINTER_REGNUM
14613 && regno != FRAME_POINTER_REGNUM
14614 && regno != FLAGS_REG
14615 && regno != FPSR_REG
14616 && regno != FPCR_REG);
14617
14618 if (code == 'w' || MMX_REG_P (x))
14619 code = 2;
14620 else if (code == 'b')
14621 code = 1;
14622 else if (code == 'k')
14623 code = 4;
14624 else if (code == 'q')
14625 code = 8;
14626 else if (code == 'y')
14627 code = 3;
14628 else if (code == 'h')
14629 code = 0;
14630 else if (code == 'x')
14631 code = 16;
14632 else if (code == 't')
14633 code = 32;
14634 else if (code == 'g')
14635 code = 64;
14636 else
14637 code = GET_MODE_SIZE (GET_MODE (x));
14638
14639 /* Irritatingly, AMD extended registers use different naming convention
14640 from the normal registers: "r%d[bwd]" */
14641 if (REX_INT_REGNO_P (regno))
14642 {
14643 gcc_assert (TARGET_64BIT);
14644 putc ('r', file);
14645 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14646 switch (code)
14647 {
14648 case 0:
14649 error ("extended registers have no high halves");
14650 break;
14651 case 1:
14652 putc ('b', file);
14653 break;
14654 case 2:
14655 putc ('w', file);
14656 break;
14657 case 4:
14658 putc ('d', file);
14659 break;
14660 case 8:
14661 /* no suffix */
14662 break;
14663 default:
14664 error ("unsupported operand size for extended register");
14665 break;
14666 }
14667 return;
14668 }
14669
14670 reg = NULL;
14671 switch (code)
14672 {
14673 case 3:
14674 if (STACK_TOP_P (x))
14675 {
14676 reg = "st(0)";
14677 break;
14678 }
14679 /* FALLTHRU */
14680 case 8:
14681 case 4:
14682 case 12:
14683 if (! ANY_FP_REG_P (x))
14684 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14685 /* FALLTHRU */
14686 case 16:
14687 case 2:
14688 normal:
14689 reg = hi_reg_name[regno];
14690 break;
14691 case 1:
14692 if (regno >= ARRAY_SIZE (qi_reg_name))
14693 goto normal;
14694 reg = qi_reg_name[regno];
14695 break;
14696 case 0:
14697 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14698 goto normal;
14699 reg = qi_high_reg_name[regno];
14700 break;
14701 case 32:
14702 if (SSE_REG_P (x))
14703 {
14704 gcc_assert (!duplicated);
14705 putc ('y', file);
14706 fputs (hi_reg_name[regno] + 1, file);
14707 return;
14708 }
14709 case 64:
14710 if (SSE_REG_P (x))
14711 {
14712 gcc_assert (!duplicated);
14713 putc ('z', file);
14714 fputs (hi_reg_name[REGNO (x)] + 1, file);
14715 return;
14716 }
14717 break;
14718 default:
14719 gcc_unreachable ();
14720 }
14721
14722 fputs (reg, file);
14723 if (duplicated)
14724 {
14725 if (ASSEMBLER_DIALECT == ASM_ATT)
14726 fprintf (file, ", %%%s", reg);
14727 else
14728 fprintf (file, ", %s", reg);
14729 }
14730 }
14731
14732 /* Locate some local-dynamic symbol still in use by this function
14733 so that we can print its name in some tls_local_dynamic_base
14734 pattern. */
14735
14736 static int
14737 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14738 {
14739 rtx x = *px;
14740
14741 if (GET_CODE (x) == SYMBOL_REF
14742 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14743 {
14744 cfun->machine->some_ld_name = XSTR (x, 0);
14745 return 1;
14746 }
14747
14748 return 0;
14749 }
14750
14751 static const char *
14752 get_some_local_dynamic_name (void)
14753 {
14754 rtx insn;
14755
14756 if (cfun->machine->some_ld_name)
14757 return cfun->machine->some_ld_name;
14758
14759 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14760 if (NONDEBUG_INSN_P (insn)
14761 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14762 return cfun->machine->some_ld_name;
14763
14764 return NULL;
14765 }
14766
14767 /* Meaning of CODE:
14768 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14769 C -- print opcode suffix for set/cmov insn.
14770 c -- like C, but print reversed condition
14771 F,f -- likewise, but for floating-point.
14772 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14773 otherwise nothing
14774 R -- print embeded rounding and sae.
14775 r -- print only sae.
14776 z -- print the opcode suffix for the size of the current operand.
14777 Z -- likewise, with special suffixes for x87 instructions.
14778 * -- print a star (in certain assembler syntax)
14779 A -- print an absolute memory reference.
14780 E -- print address with DImode register names if TARGET_64BIT.
14781 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14782 s -- print a shift double count, followed by the assemblers argument
14783 delimiter.
14784 b -- print the QImode name of the register for the indicated operand.
14785 %b0 would print %al if operands[0] is reg 0.
14786 w -- likewise, print the HImode name of the register.
14787 k -- likewise, print the SImode name of the register.
14788 q -- likewise, print the DImode name of the register.
14789 x -- likewise, print the V4SFmode name of the register.
14790 t -- likewise, print the V8SFmode name of the register.
14791 g -- likewise, print the V16SFmode name of the register.
14792 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14793 y -- print "st(0)" instead of "st" as a register.
14794 d -- print duplicated register operand for AVX instruction.
14795 D -- print condition for SSE cmp instruction.
14796 P -- if PIC, print an @PLT suffix.
14797 p -- print raw symbol name.
14798 X -- don't print any sort of PIC '@' suffix for a symbol.
14799 & -- print some in-use local-dynamic symbol name.
14800 H -- print a memory address offset by 8; used for sse high-parts
14801 Y -- print condition for XOP pcom* instruction.
14802 + -- print a branch hint as 'cs' or 'ds' prefix
14803 ; -- print a semicolon (after prefixes due to bug in older gas).
14804 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14805 @ -- print a segment register of thread base pointer load
14806 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14807 */
14808
14809 void
14810 ix86_print_operand (FILE *file, rtx x, int code)
14811 {
14812 if (code)
14813 {
14814 switch (code)
14815 {
14816 case 'A':
14817 switch (ASSEMBLER_DIALECT)
14818 {
14819 case ASM_ATT:
14820 putc ('*', file);
14821 break;
14822
14823 case ASM_INTEL:
14824 /* Intel syntax. For absolute addresses, registers should not
14825 be surrounded by braces. */
14826 if (!REG_P (x))
14827 {
14828 putc ('[', file);
14829 ix86_print_operand (file, x, 0);
14830 putc (']', file);
14831 return;
14832 }
14833 break;
14834
14835 default:
14836 gcc_unreachable ();
14837 }
14838
14839 ix86_print_operand (file, x, 0);
14840 return;
14841
14842 case 'E':
14843 /* Wrap address in an UNSPEC to declare special handling. */
14844 if (TARGET_64BIT)
14845 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14846
14847 output_address (x);
14848 return;
14849
14850 case 'L':
14851 if (ASSEMBLER_DIALECT == ASM_ATT)
14852 putc ('l', file);
14853 return;
14854
14855 case 'W':
14856 if (ASSEMBLER_DIALECT == ASM_ATT)
14857 putc ('w', file);
14858 return;
14859
14860 case 'B':
14861 if (ASSEMBLER_DIALECT == ASM_ATT)
14862 putc ('b', file);
14863 return;
14864
14865 case 'Q':
14866 if (ASSEMBLER_DIALECT == ASM_ATT)
14867 putc ('l', file);
14868 return;
14869
14870 case 'S':
14871 if (ASSEMBLER_DIALECT == ASM_ATT)
14872 putc ('s', file);
14873 return;
14874
14875 case 'T':
14876 if (ASSEMBLER_DIALECT == ASM_ATT)
14877 putc ('t', file);
14878 return;
14879
14880 case 'O':
14881 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14882 if (ASSEMBLER_DIALECT != ASM_ATT)
14883 return;
14884
14885 switch (GET_MODE_SIZE (GET_MODE (x)))
14886 {
14887 case 2:
14888 putc ('w', file);
14889 break;
14890
14891 case 4:
14892 putc ('l', file);
14893 break;
14894
14895 case 8:
14896 putc ('q', file);
14897 break;
14898
14899 default:
14900 output_operand_lossage
14901 ("invalid operand size for operand code 'O'");
14902 return;
14903 }
14904
14905 putc ('.', file);
14906 #endif
14907 return;
14908
14909 case 'z':
14910 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14911 {
14912 /* Opcodes don't get size suffixes if using Intel opcodes. */
14913 if (ASSEMBLER_DIALECT == ASM_INTEL)
14914 return;
14915
14916 switch (GET_MODE_SIZE (GET_MODE (x)))
14917 {
14918 case 1:
14919 putc ('b', file);
14920 return;
14921
14922 case 2:
14923 putc ('w', file);
14924 return;
14925
14926 case 4:
14927 putc ('l', file);
14928 return;
14929
14930 case 8:
14931 putc ('q', file);
14932 return;
14933
14934 default:
14935 output_operand_lossage
14936 ("invalid operand size for operand code 'z'");
14937 return;
14938 }
14939 }
14940
14941 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14942 warning
14943 (0, "non-integer operand used with operand code 'z'");
14944 /* FALLTHRU */
14945
14946 case 'Z':
14947 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14948 if (ASSEMBLER_DIALECT == ASM_INTEL)
14949 return;
14950
14951 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14952 {
14953 switch (GET_MODE_SIZE (GET_MODE (x)))
14954 {
14955 case 2:
14956 #ifdef HAVE_AS_IX86_FILDS
14957 putc ('s', file);
14958 #endif
14959 return;
14960
14961 case 4:
14962 putc ('l', file);
14963 return;
14964
14965 case 8:
14966 #ifdef HAVE_AS_IX86_FILDQ
14967 putc ('q', file);
14968 #else
14969 fputs ("ll", file);
14970 #endif
14971 return;
14972
14973 default:
14974 break;
14975 }
14976 }
14977 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14978 {
14979 /* 387 opcodes don't get size suffixes
14980 if the operands are registers. */
14981 if (STACK_REG_P (x))
14982 return;
14983
14984 switch (GET_MODE_SIZE (GET_MODE (x)))
14985 {
14986 case 4:
14987 putc ('s', file);
14988 return;
14989
14990 case 8:
14991 putc ('l', file);
14992 return;
14993
14994 case 12:
14995 case 16:
14996 putc ('t', file);
14997 return;
14998
14999 default:
15000 break;
15001 }
15002 }
15003 else
15004 {
15005 output_operand_lossage
15006 ("invalid operand type used with operand code 'Z'");
15007 return;
15008 }
15009
15010 output_operand_lossage
15011 ("invalid operand size for operand code 'Z'");
15012 return;
15013
15014 case 'd':
15015 case 'b':
15016 case 'w':
15017 case 'k':
15018 case 'q':
15019 case 'h':
15020 case 't':
15021 case 'g':
15022 case 'y':
15023 case 'x':
15024 case 'X':
15025 case 'P':
15026 case 'p':
15027 break;
15028
15029 case 's':
15030 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15031 {
15032 ix86_print_operand (file, x, 0);
15033 fputs (", ", file);
15034 }
15035 return;
15036
15037 case 'Y':
15038 switch (GET_CODE (x))
15039 {
15040 case NE:
15041 fputs ("neq", file);
15042 break;
15043 case EQ:
15044 fputs ("eq", file);
15045 break;
15046 case GE:
15047 case GEU:
15048 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15049 break;
15050 case GT:
15051 case GTU:
15052 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15053 break;
15054 case LE:
15055 case LEU:
15056 fputs ("le", file);
15057 break;
15058 case LT:
15059 case LTU:
15060 fputs ("lt", file);
15061 break;
15062 case UNORDERED:
15063 fputs ("unord", file);
15064 break;
15065 case ORDERED:
15066 fputs ("ord", file);
15067 break;
15068 case UNEQ:
15069 fputs ("ueq", file);
15070 break;
15071 case UNGE:
15072 fputs ("nlt", file);
15073 break;
15074 case UNGT:
15075 fputs ("nle", file);
15076 break;
15077 case UNLE:
15078 fputs ("ule", file);
15079 break;
15080 case UNLT:
15081 fputs ("ult", file);
15082 break;
15083 case LTGT:
15084 fputs ("une", file);
15085 break;
15086 default:
15087 output_operand_lossage ("operand is not a condition code, "
15088 "invalid operand code 'Y'");
15089 return;
15090 }
15091 return;
15092
15093 case 'D':
15094 /* Little bit of braindamage here. The SSE compare instructions
15095 does use completely different names for the comparisons that the
15096 fp conditional moves. */
15097 switch (GET_CODE (x))
15098 {
15099 case UNEQ:
15100 if (TARGET_AVX)
15101 {
15102 fputs ("eq_us", file);
15103 break;
15104 }
15105 case EQ:
15106 fputs ("eq", file);
15107 break;
15108 case UNLT:
15109 if (TARGET_AVX)
15110 {
15111 fputs ("nge", file);
15112 break;
15113 }
15114 case LT:
15115 fputs ("lt", file);
15116 break;
15117 case UNLE:
15118 if (TARGET_AVX)
15119 {
15120 fputs ("ngt", file);
15121 break;
15122 }
15123 case LE:
15124 fputs ("le", file);
15125 break;
15126 case UNORDERED:
15127 fputs ("unord", file);
15128 break;
15129 case LTGT:
15130 if (TARGET_AVX)
15131 {
15132 fputs ("neq_oq", file);
15133 break;
15134 }
15135 case NE:
15136 fputs ("neq", file);
15137 break;
15138 case GE:
15139 if (TARGET_AVX)
15140 {
15141 fputs ("ge", file);
15142 break;
15143 }
15144 case UNGE:
15145 fputs ("nlt", file);
15146 break;
15147 case GT:
15148 if (TARGET_AVX)
15149 {
15150 fputs ("gt", file);
15151 break;
15152 }
15153 case UNGT:
15154 fputs ("nle", file);
15155 break;
15156 case ORDERED:
15157 fputs ("ord", file);
15158 break;
15159 default:
15160 output_operand_lossage ("operand is not a condition code, "
15161 "invalid operand code 'D'");
15162 return;
15163 }
15164 return;
15165
15166 case 'F':
15167 case 'f':
15168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15169 if (ASSEMBLER_DIALECT == ASM_ATT)
15170 putc ('.', file);
15171 #endif
15172
15173 case 'C':
15174 case 'c':
15175 if (!COMPARISON_P (x))
15176 {
15177 output_operand_lossage ("operand is not a condition code, "
15178 "invalid operand code '%c'", code);
15179 return;
15180 }
15181 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15182 code == 'c' || code == 'f',
15183 code == 'F' || code == 'f',
15184 file);
15185 return;
15186
15187 case 'H':
15188 if (!offsettable_memref_p (x))
15189 {
15190 output_operand_lossage ("operand is not an offsettable memory "
15191 "reference, invalid operand code 'H'");
15192 return;
15193 }
15194 /* It doesn't actually matter what mode we use here, as we're
15195 only going to use this for printing. */
15196 x = adjust_address_nv (x, DImode, 8);
15197 /* Output 'qword ptr' for intel assembler dialect. */
15198 if (ASSEMBLER_DIALECT == ASM_INTEL)
15199 code = 'q';
15200 break;
15201
15202 case 'K':
15203 gcc_assert (CONST_INT_P (x));
15204
15205 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15206 #ifdef HAVE_AS_IX86_HLE
15207 fputs ("xacquire ", file);
15208 #else
15209 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15210 #endif
15211 else if (INTVAL (x) & IX86_HLE_RELEASE)
15212 #ifdef HAVE_AS_IX86_HLE
15213 fputs ("xrelease ", file);
15214 #else
15215 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15216 #endif
15217 /* We do not want to print value of the operand. */
15218 return;
15219
15220 case 'N':
15221 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15222 fputs ("{z}", file);
15223 return;
15224
15225 case 'r':
15226 gcc_assert (CONST_INT_P (x));
15227 gcc_assert (INTVAL (x) == ROUND_SAE);
15228
15229 if (ASSEMBLER_DIALECT == ASM_INTEL)
15230 fputs (", ", file);
15231
15232 fputs ("{sae}", file);
15233
15234 if (ASSEMBLER_DIALECT == ASM_ATT)
15235 fputs (", ", file);
15236
15237 return;
15238
15239 case 'R':
15240 gcc_assert (CONST_INT_P (x));
15241
15242 if (ASSEMBLER_DIALECT == ASM_INTEL)
15243 fputs (", ", file);
15244
15245 switch (INTVAL (x))
15246 {
15247 case ROUND_NEAREST_INT | ROUND_SAE:
15248 fputs ("{rn-sae}", file);
15249 break;
15250 case ROUND_NEG_INF | ROUND_SAE:
15251 fputs ("{rd-sae}", file);
15252 break;
15253 case ROUND_POS_INF | ROUND_SAE:
15254 fputs ("{ru-sae}", file);
15255 break;
15256 case ROUND_ZERO | ROUND_SAE:
15257 fputs ("{rz-sae}", file);
15258 break;
15259 default:
15260 gcc_unreachable ();
15261 }
15262
15263 if (ASSEMBLER_DIALECT == ASM_ATT)
15264 fputs (", ", file);
15265
15266 return;
15267
15268 case '*':
15269 if (ASSEMBLER_DIALECT == ASM_ATT)
15270 putc ('*', file);
15271 return;
15272
15273 case '&':
15274 {
15275 const char *name = get_some_local_dynamic_name ();
15276 if (name == NULL)
15277 output_operand_lossage ("'%%&' used without any "
15278 "local dynamic TLS references");
15279 else
15280 assemble_name (file, name);
15281 return;
15282 }
15283
15284 case '+':
15285 {
15286 rtx x;
15287
15288 if (!optimize
15289 || optimize_function_for_size_p (cfun)
15290 || !TARGET_BRANCH_PREDICTION_HINTS)
15291 return;
15292
15293 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15294 if (x)
15295 {
15296 int pred_val = XINT (x, 0);
15297
15298 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15299 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15300 {
15301 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15302 bool cputaken
15303 = final_forward_branch_p (current_output_insn) == 0;
15304
15305 /* Emit hints only in the case default branch prediction
15306 heuristics would fail. */
15307 if (taken != cputaken)
15308 {
15309 /* We use 3e (DS) prefix for taken branches and
15310 2e (CS) prefix for not taken branches. */
15311 if (taken)
15312 fputs ("ds ; ", file);
15313 else
15314 fputs ("cs ; ", file);
15315 }
15316 }
15317 }
15318 return;
15319 }
15320
15321 case ';':
15322 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15323 putc (';', file);
15324 #endif
15325 return;
15326
15327 case '@':
15328 if (ASSEMBLER_DIALECT == ASM_ATT)
15329 putc ('%', file);
15330
15331 /* The kernel uses a different segment register for performance
15332 reasons; a system call would not have to trash the userspace
15333 segment register, which would be expensive. */
15334 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15335 fputs ("fs", file);
15336 else
15337 fputs ("gs", file);
15338 return;
15339
15340 case '~':
15341 putc (TARGET_AVX2 ? 'i' : 'f', file);
15342 return;
15343
15344 case '^':
15345 if (TARGET_64BIT && Pmode != word_mode)
15346 fputs ("addr32 ", file);
15347 return;
15348
15349 default:
15350 output_operand_lossage ("invalid operand code '%c'", code);
15351 }
15352 }
15353
15354 if (REG_P (x))
15355 print_reg (x, code, file);
15356
15357 else if (MEM_P (x))
15358 {
15359 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15360 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15361 && GET_MODE (x) != BLKmode)
15362 {
15363 const char * size;
15364 switch (GET_MODE_SIZE (GET_MODE (x)))
15365 {
15366 case 1: size = "BYTE"; break;
15367 case 2: size = "WORD"; break;
15368 case 4: size = "DWORD"; break;
15369 case 8: size = "QWORD"; break;
15370 case 12: size = "TBYTE"; break;
15371 case 16:
15372 if (GET_MODE (x) == XFmode)
15373 size = "TBYTE";
15374 else
15375 size = "XMMWORD";
15376 break;
15377 case 32: size = "YMMWORD"; break;
15378 case 64: size = "ZMMWORD"; break;
15379 default:
15380 gcc_unreachable ();
15381 }
15382
15383 /* Check for explicit size override (codes 'b', 'w', 'k',
15384 'q' and 'x') */
15385 if (code == 'b')
15386 size = "BYTE";
15387 else if (code == 'w')
15388 size = "WORD";
15389 else if (code == 'k')
15390 size = "DWORD";
15391 else if (code == 'q')
15392 size = "QWORD";
15393 else if (code == 'x')
15394 size = "XMMWORD";
15395
15396 fputs (size, file);
15397 fputs (" PTR ", file);
15398 }
15399
15400 x = XEXP (x, 0);
15401 /* Avoid (%rip) for call operands. */
15402 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15403 && !CONST_INT_P (x))
15404 output_addr_const (file, x);
15405 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15406 output_operand_lossage ("invalid constraints for operand");
15407 else
15408 output_address (x);
15409 }
15410
15411 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15412 {
15413 REAL_VALUE_TYPE r;
15414 long l;
15415
15416 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15417 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15418
15419 if (ASSEMBLER_DIALECT == ASM_ATT)
15420 putc ('$', file);
15421 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15422 if (code == 'q')
15423 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15424 (unsigned long long) (int) l);
15425 else
15426 fprintf (file, "0x%08x", (unsigned int) l);
15427 }
15428
15429 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15430 {
15431 REAL_VALUE_TYPE r;
15432 long l[2];
15433
15434 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15435 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15436
15437 if (ASSEMBLER_DIALECT == ASM_ATT)
15438 putc ('$', file);
15439 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15440 }
15441
15442 /* These float cases don't actually occur as immediate operands. */
15443 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15444 {
15445 char dstr[30];
15446
15447 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15448 fputs (dstr, file);
15449 }
15450
15451 else
15452 {
15453 /* We have patterns that allow zero sets of memory, for instance.
15454 In 64-bit mode, we should probably support all 8-byte vectors,
15455 since we can in fact encode that into an immediate. */
15456 if (GET_CODE (x) == CONST_VECTOR)
15457 {
15458 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15459 x = const0_rtx;
15460 }
15461
15462 if (code != 'P' && code != 'p')
15463 {
15464 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15465 {
15466 if (ASSEMBLER_DIALECT == ASM_ATT)
15467 putc ('$', file);
15468 }
15469 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15470 || GET_CODE (x) == LABEL_REF)
15471 {
15472 if (ASSEMBLER_DIALECT == ASM_ATT)
15473 putc ('$', file);
15474 else
15475 fputs ("OFFSET FLAT:", file);
15476 }
15477 }
15478 if (CONST_INT_P (x))
15479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15480 else if (flag_pic || MACHOPIC_INDIRECT)
15481 output_pic_addr_const (file, x, code);
15482 else
15483 output_addr_const (file, x);
15484 }
15485 }
15486
15487 static bool
15488 ix86_print_operand_punct_valid_p (unsigned char code)
15489 {
15490 return (code == '@' || code == '*' || code == '+' || code == '&'
15491 || code == ';' || code == '~' || code == '^');
15492 }
15493 \f
15494 /* Print a memory operand whose address is ADDR. */
15495
15496 static void
15497 ix86_print_operand_address (FILE *file, rtx addr)
15498 {
15499 struct ix86_address parts;
15500 rtx base, index, disp;
15501 int scale;
15502 int ok;
15503 bool vsib = false;
15504 int code = 0;
15505
15506 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15507 {
15508 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15509 gcc_assert (parts.index == NULL_RTX);
15510 parts.index = XVECEXP (addr, 0, 1);
15511 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15512 addr = XVECEXP (addr, 0, 0);
15513 vsib = true;
15514 }
15515 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15516 {
15517 gcc_assert (TARGET_64BIT);
15518 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15519 code = 'q';
15520 }
15521 else
15522 ok = ix86_decompose_address (addr, &parts);
15523
15524 gcc_assert (ok);
15525
15526 base = parts.base;
15527 index = parts.index;
15528 disp = parts.disp;
15529 scale = parts.scale;
15530
15531 switch (parts.seg)
15532 {
15533 case SEG_DEFAULT:
15534 break;
15535 case SEG_FS:
15536 case SEG_GS:
15537 if (ASSEMBLER_DIALECT == ASM_ATT)
15538 putc ('%', file);
15539 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15540 break;
15541 default:
15542 gcc_unreachable ();
15543 }
15544
15545 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15546 if (TARGET_64BIT && !base && !index)
15547 {
15548 rtx symbol = disp;
15549
15550 if (GET_CODE (disp) == CONST
15551 && GET_CODE (XEXP (disp, 0)) == PLUS
15552 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15553 symbol = XEXP (XEXP (disp, 0), 0);
15554
15555 if (GET_CODE (symbol) == LABEL_REF
15556 || (GET_CODE (symbol) == SYMBOL_REF
15557 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15558 base = pc_rtx;
15559 }
15560 if (!base && !index)
15561 {
15562 /* Displacement only requires special attention. */
15563
15564 if (CONST_INT_P (disp))
15565 {
15566 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15567 fputs ("ds:", file);
15568 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15569 }
15570 else if (flag_pic)
15571 output_pic_addr_const (file, disp, 0);
15572 else
15573 output_addr_const (file, disp);
15574 }
15575 else
15576 {
15577 /* Print SImode register names to force addr32 prefix. */
15578 if (SImode_address_operand (addr, VOIDmode))
15579 {
15580 #ifdef ENABLE_CHECKING
15581 gcc_assert (TARGET_64BIT);
15582 switch (GET_CODE (addr))
15583 {
15584 case SUBREG:
15585 gcc_assert (GET_MODE (addr) == SImode);
15586 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15587 break;
15588 case ZERO_EXTEND:
15589 case AND:
15590 gcc_assert (GET_MODE (addr) == DImode);
15591 break;
15592 default:
15593 gcc_unreachable ();
15594 }
15595 #endif
15596 gcc_assert (!code);
15597 code = 'k';
15598 }
15599 else if (code == 0
15600 && TARGET_X32
15601 && disp
15602 && CONST_INT_P (disp)
15603 && INTVAL (disp) < -16*1024*1024)
15604 {
15605 /* X32 runs in 64-bit mode, where displacement, DISP, in
15606 address DISP(%r64), is encoded as 32-bit immediate sign-
15607 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15608 address is %r64 + 0xffffffffbffffd00. When %r64 <
15609 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15610 which is invalid for x32. The correct address is %r64
15611 - 0x40000300 == 0xf7ffdd64. To properly encode
15612 -0x40000300(%r64) for x32, we zero-extend negative
15613 displacement by forcing addr32 prefix which truncates
15614 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15615 zero-extend all negative displacements, including -1(%rsp).
15616 However, for small negative displacements, sign-extension
15617 won't cause overflow. We only zero-extend negative
15618 displacements if they < -16*1024*1024, which is also used
15619 to check legitimate address displacements for PIC. */
15620 code = 'k';
15621 }
15622
15623 if (ASSEMBLER_DIALECT == ASM_ATT)
15624 {
15625 if (disp)
15626 {
15627 if (flag_pic)
15628 output_pic_addr_const (file, disp, 0);
15629 else if (GET_CODE (disp) == LABEL_REF)
15630 output_asm_label (disp);
15631 else
15632 output_addr_const (file, disp);
15633 }
15634
15635 putc ('(', file);
15636 if (base)
15637 print_reg (base, code, file);
15638 if (index)
15639 {
15640 putc (',', file);
15641 print_reg (index, vsib ? 0 : code, file);
15642 if (scale != 1 || vsib)
15643 fprintf (file, ",%d", scale);
15644 }
15645 putc (')', file);
15646 }
15647 else
15648 {
15649 rtx offset = NULL_RTX;
15650
15651 if (disp)
15652 {
15653 /* Pull out the offset of a symbol; print any symbol itself. */
15654 if (GET_CODE (disp) == CONST
15655 && GET_CODE (XEXP (disp, 0)) == PLUS
15656 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15657 {
15658 offset = XEXP (XEXP (disp, 0), 1);
15659 disp = gen_rtx_CONST (VOIDmode,
15660 XEXP (XEXP (disp, 0), 0));
15661 }
15662
15663 if (flag_pic)
15664 output_pic_addr_const (file, disp, 0);
15665 else if (GET_CODE (disp) == LABEL_REF)
15666 output_asm_label (disp);
15667 else if (CONST_INT_P (disp))
15668 offset = disp;
15669 else
15670 output_addr_const (file, disp);
15671 }
15672
15673 putc ('[', file);
15674 if (base)
15675 {
15676 print_reg (base, code, file);
15677 if (offset)
15678 {
15679 if (INTVAL (offset) >= 0)
15680 putc ('+', file);
15681 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15682 }
15683 }
15684 else if (offset)
15685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15686 else
15687 putc ('0', file);
15688
15689 if (index)
15690 {
15691 putc ('+', file);
15692 print_reg (index, vsib ? 0 : code, file);
15693 if (scale != 1 || vsib)
15694 fprintf (file, "*%d", scale);
15695 }
15696 putc (']', file);
15697 }
15698 }
15699 }
15700
15701 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15702
15703 static bool
15704 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15705 {
15706 rtx op;
15707
15708 if (GET_CODE (x) != UNSPEC)
15709 return false;
15710
15711 op = XVECEXP (x, 0, 0);
15712 switch (XINT (x, 1))
15713 {
15714 case UNSPEC_GOTTPOFF:
15715 output_addr_const (file, op);
15716 /* FIXME: This might be @TPOFF in Sun ld. */
15717 fputs ("@gottpoff", file);
15718 break;
15719 case UNSPEC_TPOFF:
15720 output_addr_const (file, op);
15721 fputs ("@tpoff", file);
15722 break;
15723 case UNSPEC_NTPOFF:
15724 output_addr_const (file, op);
15725 if (TARGET_64BIT)
15726 fputs ("@tpoff", file);
15727 else
15728 fputs ("@ntpoff", file);
15729 break;
15730 case UNSPEC_DTPOFF:
15731 output_addr_const (file, op);
15732 fputs ("@dtpoff", file);
15733 break;
15734 case UNSPEC_GOTNTPOFF:
15735 output_addr_const (file, op);
15736 if (TARGET_64BIT)
15737 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15738 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15739 else
15740 fputs ("@gotntpoff", file);
15741 break;
15742 case UNSPEC_INDNTPOFF:
15743 output_addr_const (file, op);
15744 fputs ("@indntpoff", file);
15745 break;
15746 #if TARGET_MACHO
15747 case UNSPEC_MACHOPIC_OFFSET:
15748 output_addr_const (file, op);
15749 putc ('-', file);
15750 machopic_output_function_base_name (file);
15751 break;
15752 #endif
15753
15754 case UNSPEC_STACK_CHECK:
15755 {
15756 int offset;
15757
15758 gcc_assert (flag_split_stack);
15759
15760 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15761 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15762 #else
15763 gcc_unreachable ();
15764 #endif
15765
15766 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15767 }
15768 break;
15769
15770 default:
15771 return false;
15772 }
15773
15774 return true;
15775 }
15776 \f
15777 /* Split one or more double-mode RTL references into pairs of half-mode
15778 references. The RTL can be REG, offsettable MEM, integer constant, or
15779 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15780 split and "num" is its length. lo_half and hi_half are output arrays
15781 that parallel "operands". */
15782
15783 void
15784 split_double_mode (enum machine_mode mode, rtx operands[],
15785 int num, rtx lo_half[], rtx hi_half[])
15786 {
15787 enum machine_mode half_mode;
15788 unsigned int byte;
15789
15790 switch (mode)
15791 {
15792 case TImode:
15793 half_mode = DImode;
15794 break;
15795 case DImode:
15796 half_mode = SImode;
15797 break;
15798 default:
15799 gcc_unreachable ();
15800 }
15801
15802 byte = GET_MODE_SIZE (half_mode);
15803
15804 while (num--)
15805 {
15806 rtx op = operands[num];
15807
15808 /* simplify_subreg refuse to split volatile memory addresses,
15809 but we still have to handle it. */
15810 if (MEM_P (op))
15811 {
15812 lo_half[num] = adjust_address (op, half_mode, 0);
15813 hi_half[num] = adjust_address (op, half_mode, byte);
15814 }
15815 else
15816 {
15817 lo_half[num] = simplify_gen_subreg (half_mode, op,
15818 GET_MODE (op) == VOIDmode
15819 ? mode : GET_MODE (op), 0);
15820 hi_half[num] = simplify_gen_subreg (half_mode, op,
15821 GET_MODE (op) == VOIDmode
15822 ? mode : GET_MODE (op), byte);
15823 }
15824 }
15825 }
15826 \f
15827 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15828 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15829 is the expression of the binary operation. The output may either be
15830 emitted here, or returned to the caller, like all output_* functions.
15831
15832 There is no guarantee that the operands are the same mode, as they
15833 might be within FLOAT or FLOAT_EXTEND expressions. */
15834
15835 #ifndef SYSV386_COMPAT
15836 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15837 wants to fix the assemblers because that causes incompatibility
15838 with gcc. No-one wants to fix gcc because that causes
15839 incompatibility with assemblers... You can use the option of
15840 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15841 #define SYSV386_COMPAT 1
15842 #endif
15843
15844 const char *
15845 output_387_binary_op (rtx insn, rtx *operands)
15846 {
15847 static char buf[40];
15848 const char *p;
15849 const char *ssep;
15850 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15851
15852 #ifdef ENABLE_CHECKING
15853 /* Even if we do not want to check the inputs, this documents input
15854 constraints. Which helps in understanding the following code. */
15855 if (STACK_REG_P (operands[0])
15856 && ((REG_P (operands[1])
15857 && REGNO (operands[0]) == REGNO (operands[1])
15858 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15859 || (REG_P (operands[2])
15860 && REGNO (operands[0]) == REGNO (operands[2])
15861 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15862 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15863 ; /* ok */
15864 else
15865 gcc_assert (is_sse);
15866 #endif
15867
15868 switch (GET_CODE (operands[3]))
15869 {
15870 case PLUS:
15871 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15872 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15873 p = "fiadd";
15874 else
15875 p = "fadd";
15876 ssep = "vadd";
15877 break;
15878
15879 case MINUS:
15880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15882 p = "fisub";
15883 else
15884 p = "fsub";
15885 ssep = "vsub";
15886 break;
15887
15888 case MULT:
15889 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15890 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15891 p = "fimul";
15892 else
15893 p = "fmul";
15894 ssep = "vmul";
15895 break;
15896
15897 case DIV:
15898 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15899 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15900 p = "fidiv";
15901 else
15902 p = "fdiv";
15903 ssep = "vdiv";
15904 break;
15905
15906 default:
15907 gcc_unreachable ();
15908 }
15909
15910 if (is_sse)
15911 {
15912 if (TARGET_AVX)
15913 {
15914 strcpy (buf, ssep);
15915 if (GET_MODE (operands[0]) == SFmode)
15916 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15917 else
15918 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15919 }
15920 else
15921 {
15922 strcpy (buf, ssep + 1);
15923 if (GET_MODE (operands[0]) == SFmode)
15924 strcat (buf, "ss\t{%2, %0|%0, %2}");
15925 else
15926 strcat (buf, "sd\t{%2, %0|%0, %2}");
15927 }
15928 return buf;
15929 }
15930 strcpy (buf, p);
15931
15932 switch (GET_CODE (operands[3]))
15933 {
15934 case MULT:
15935 case PLUS:
15936 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15937 {
15938 rtx temp = operands[2];
15939 operands[2] = operands[1];
15940 operands[1] = temp;
15941 }
15942
15943 /* know operands[0] == operands[1]. */
15944
15945 if (MEM_P (operands[2]))
15946 {
15947 p = "%Z2\t%2";
15948 break;
15949 }
15950
15951 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15952 {
15953 if (STACK_TOP_P (operands[0]))
15954 /* How is it that we are storing to a dead operand[2]?
15955 Well, presumably operands[1] is dead too. We can't
15956 store the result to st(0) as st(0) gets popped on this
15957 instruction. Instead store to operands[2] (which I
15958 think has to be st(1)). st(1) will be popped later.
15959 gcc <= 2.8.1 didn't have this check and generated
15960 assembly code that the Unixware assembler rejected. */
15961 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15962 else
15963 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15964 break;
15965 }
15966
15967 if (STACK_TOP_P (operands[0]))
15968 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15969 else
15970 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15971 break;
15972
15973 case MINUS:
15974 case DIV:
15975 if (MEM_P (operands[1]))
15976 {
15977 p = "r%Z1\t%1";
15978 break;
15979 }
15980
15981 if (MEM_P (operands[2]))
15982 {
15983 p = "%Z2\t%2";
15984 break;
15985 }
15986
15987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15988 {
15989 #if SYSV386_COMPAT
15990 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15991 derived assemblers, confusingly reverse the direction of
15992 the operation for fsub{r} and fdiv{r} when the
15993 destination register is not st(0). The Intel assembler
15994 doesn't have this brain damage. Read !SYSV386_COMPAT to
15995 figure out what the hardware really does. */
15996 if (STACK_TOP_P (operands[0]))
15997 p = "{p\t%0, %2|rp\t%2, %0}";
15998 else
15999 p = "{rp\t%2, %0|p\t%0, %2}";
16000 #else
16001 if (STACK_TOP_P (operands[0]))
16002 /* As above for fmul/fadd, we can't store to st(0). */
16003 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16004 else
16005 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16006 #endif
16007 break;
16008 }
16009
16010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16011 {
16012 #if SYSV386_COMPAT
16013 if (STACK_TOP_P (operands[0]))
16014 p = "{rp\t%0, %1|p\t%1, %0}";
16015 else
16016 p = "{p\t%1, %0|rp\t%0, %1}";
16017 #else
16018 if (STACK_TOP_P (operands[0]))
16019 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16020 else
16021 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16022 #endif
16023 break;
16024 }
16025
16026 if (STACK_TOP_P (operands[0]))
16027 {
16028 if (STACK_TOP_P (operands[1]))
16029 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16030 else
16031 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16032 break;
16033 }
16034 else if (STACK_TOP_P (operands[1]))
16035 {
16036 #if SYSV386_COMPAT
16037 p = "{\t%1, %0|r\t%0, %1}";
16038 #else
16039 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16040 #endif
16041 }
16042 else
16043 {
16044 #if SYSV386_COMPAT
16045 p = "{r\t%2, %0|\t%0, %2}";
16046 #else
16047 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16048 #endif
16049 }
16050 break;
16051
16052 default:
16053 gcc_unreachable ();
16054 }
16055
16056 strcat (buf, p);
16057 return buf;
16058 }
16059
16060 /* Check if a 256bit AVX register is referenced inside of EXP. */
16061
16062 static int
16063 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16064 {
16065 rtx exp = *pexp;
16066
16067 if (GET_CODE (exp) == SUBREG)
16068 exp = SUBREG_REG (exp);
16069
16070 if (REG_P (exp)
16071 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16072 return 1;
16073
16074 return 0;
16075 }
16076
16077 /* Return needed mode for entity in optimize_mode_switching pass. */
16078
16079 static int
16080 ix86_avx_u128_mode_needed (rtx insn)
16081 {
16082 if (CALL_P (insn))
16083 {
16084 rtx link;
16085
16086 /* Needed mode is set to AVX_U128_CLEAN if there are
16087 no 256bit modes used in function arguments. */
16088 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16089 link;
16090 link = XEXP (link, 1))
16091 {
16092 if (GET_CODE (XEXP (link, 0)) == USE)
16093 {
16094 rtx arg = XEXP (XEXP (link, 0), 0);
16095
16096 if (ix86_check_avx256_register (&arg, NULL))
16097 return AVX_U128_DIRTY;
16098 }
16099 }
16100
16101 return AVX_U128_CLEAN;
16102 }
16103
16104 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16105 changes state only when a 256bit register is written to, but we need
16106 to prevent the compiler from moving optimal insertion point above
16107 eventual read from 256bit register. */
16108 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16109 return AVX_U128_DIRTY;
16110
16111 return AVX_U128_ANY;
16112 }
16113
16114 /* Return mode that i387 must be switched into
16115 prior to the execution of insn. */
16116
16117 static int
16118 ix86_i387_mode_needed (int entity, rtx insn)
16119 {
16120 enum attr_i387_cw mode;
16121
16122 /* The mode UNINITIALIZED is used to store control word after a
16123 function call or ASM pattern. The mode ANY specify that function
16124 has no requirements on the control word and make no changes in the
16125 bits we are interested in. */
16126
16127 if (CALL_P (insn)
16128 || (NONJUMP_INSN_P (insn)
16129 && (asm_noperands (PATTERN (insn)) >= 0
16130 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16131 return I387_CW_UNINITIALIZED;
16132
16133 if (recog_memoized (insn) < 0)
16134 return I387_CW_ANY;
16135
16136 mode = get_attr_i387_cw (insn);
16137
16138 switch (entity)
16139 {
16140 case I387_TRUNC:
16141 if (mode == I387_CW_TRUNC)
16142 return mode;
16143 break;
16144
16145 case I387_FLOOR:
16146 if (mode == I387_CW_FLOOR)
16147 return mode;
16148 break;
16149
16150 case I387_CEIL:
16151 if (mode == I387_CW_CEIL)
16152 return mode;
16153 break;
16154
16155 case I387_MASK_PM:
16156 if (mode == I387_CW_MASK_PM)
16157 return mode;
16158 break;
16159
16160 default:
16161 gcc_unreachable ();
16162 }
16163
16164 return I387_CW_ANY;
16165 }
16166
16167 /* Return mode that entity must be switched into
16168 prior to the execution of insn. */
16169
16170 static int
16171 ix86_mode_needed (int entity, rtx insn)
16172 {
16173 switch (entity)
16174 {
16175 case AVX_U128:
16176 return ix86_avx_u128_mode_needed (insn);
16177 case I387_TRUNC:
16178 case I387_FLOOR:
16179 case I387_CEIL:
16180 case I387_MASK_PM:
16181 return ix86_i387_mode_needed (entity, insn);
16182 default:
16183 gcc_unreachable ();
16184 }
16185 return 0;
16186 }
16187
16188 /* Check if a 256bit AVX register is referenced in stores. */
16189
16190 static void
16191 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16192 {
16193 if (ix86_check_avx256_register (&dest, NULL))
16194 {
16195 bool *used = (bool *) data;
16196 *used = true;
16197 }
16198 }
16199
16200 /* Calculate mode of upper 128bit AVX registers after the insn. */
16201
16202 static int
16203 ix86_avx_u128_mode_after (int mode, rtx insn)
16204 {
16205 rtx pat = PATTERN (insn);
16206
16207 if (vzeroupper_operation (pat, VOIDmode)
16208 || vzeroall_operation (pat, VOIDmode))
16209 return AVX_U128_CLEAN;
16210
16211 /* We know that state is clean after CALL insn if there are no
16212 256bit registers used in the function return register. */
16213 if (CALL_P (insn))
16214 {
16215 bool avx_reg256_found = false;
16216 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16217
16218 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16219 }
16220
16221 /* Otherwise, return current mode. Remember that if insn
16222 references AVX 256bit registers, the mode was already changed
16223 to DIRTY from MODE_NEEDED. */
16224 return mode;
16225 }
16226
16227 /* Return the mode that an insn results in. */
16228
16229 int
16230 ix86_mode_after (int entity, int mode, rtx insn)
16231 {
16232 switch (entity)
16233 {
16234 case AVX_U128:
16235 return ix86_avx_u128_mode_after (mode, insn);
16236 case I387_TRUNC:
16237 case I387_FLOOR:
16238 case I387_CEIL:
16239 case I387_MASK_PM:
16240 return mode;
16241 default:
16242 gcc_unreachable ();
16243 }
16244 }
16245
16246 static int
16247 ix86_avx_u128_mode_entry (void)
16248 {
16249 tree arg;
16250
16251 /* Entry mode is set to AVX_U128_DIRTY if there are
16252 256bit modes used in function arguments. */
16253 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16254 arg = TREE_CHAIN (arg))
16255 {
16256 rtx incoming = DECL_INCOMING_RTL (arg);
16257
16258 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16259 return AVX_U128_DIRTY;
16260 }
16261
16262 return AVX_U128_CLEAN;
16263 }
16264
16265 /* Return a mode that ENTITY is assumed to be
16266 switched to at function entry. */
16267
16268 static int
16269 ix86_mode_entry (int entity)
16270 {
16271 switch (entity)
16272 {
16273 case AVX_U128:
16274 return ix86_avx_u128_mode_entry ();
16275 case I387_TRUNC:
16276 case I387_FLOOR:
16277 case I387_CEIL:
16278 case I387_MASK_PM:
16279 return I387_CW_ANY;
16280 default:
16281 gcc_unreachable ();
16282 }
16283 }
16284
16285 static int
16286 ix86_avx_u128_mode_exit (void)
16287 {
16288 rtx reg = crtl->return_rtx;
16289
16290 /* Exit mode is set to AVX_U128_DIRTY if there are
16291 256bit modes used in the function return register. */
16292 if (reg && ix86_check_avx256_register (&reg, NULL))
16293 return AVX_U128_DIRTY;
16294
16295 return AVX_U128_CLEAN;
16296 }
16297
16298 /* Return a mode that ENTITY is assumed to be
16299 switched to at function exit. */
16300
16301 static int
16302 ix86_mode_exit (int entity)
16303 {
16304 switch (entity)
16305 {
16306 case AVX_U128:
16307 return ix86_avx_u128_mode_exit ();
16308 case I387_TRUNC:
16309 case I387_FLOOR:
16310 case I387_CEIL:
16311 case I387_MASK_PM:
16312 return I387_CW_ANY;
16313 default:
16314 gcc_unreachable ();
16315 }
16316 }
16317
16318 static int
16319 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16320 {
16321 return n;
16322 }
16323
16324 /* Output code to initialize control word copies used by trunc?f?i and
16325 rounding patterns. CURRENT_MODE is set to current control word,
16326 while NEW_MODE is set to new control word. */
16327
16328 static void
16329 emit_i387_cw_initialization (int mode)
16330 {
16331 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16332 rtx new_mode;
16333
16334 enum ix86_stack_slot slot;
16335
16336 rtx reg = gen_reg_rtx (HImode);
16337
16338 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16339 emit_move_insn (reg, copy_rtx (stored_mode));
16340
16341 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16342 || optimize_insn_for_size_p ())
16343 {
16344 switch (mode)
16345 {
16346 case I387_CW_TRUNC:
16347 /* round toward zero (truncate) */
16348 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16349 slot = SLOT_CW_TRUNC;
16350 break;
16351
16352 case I387_CW_FLOOR:
16353 /* round down toward -oo */
16354 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16355 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16356 slot = SLOT_CW_FLOOR;
16357 break;
16358
16359 case I387_CW_CEIL:
16360 /* round up toward +oo */
16361 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16362 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16363 slot = SLOT_CW_CEIL;
16364 break;
16365
16366 case I387_CW_MASK_PM:
16367 /* mask precision exception for nearbyint() */
16368 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16369 slot = SLOT_CW_MASK_PM;
16370 break;
16371
16372 default:
16373 gcc_unreachable ();
16374 }
16375 }
16376 else
16377 {
16378 switch (mode)
16379 {
16380 case I387_CW_TRUNC:
16381 /* round toward zero (truncate) */
16382 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16383 slot = SLOT_CW_TRUNC;
16384 break;
16385
16386 case I387_CW_FLOOR:
16387 /* round down toward -oo */
16388 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16389 slot = SLOT_CW_FLOOR;
16390 break;
16391
16392 case I387_CW_CEIL:
16393 /* round up toward +oo */
16394 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16395 slot = SLOT_CW_CEIL;
16396 break;
16397
16398 case I387_CW_MASK_PM:
16399 /* mask precision exception for nearbyint() */
16400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16401 slot = SLOT_CW_MASK_PM;
16402 break;
16403
16404 default:
16405 gcc_unreachable ();
16406 }
16407 }
16408
16409 gcc_assert (slot < MAX_386_STACK_LOCALS);
16410
16411 new_mode = assign_386_stack_local (HImode, slot);
16412 emit_move_insn (new_mode, reg);
16413 }
16414
16415 /* Emit vzeroupper. */
16416
16417 void
16418 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16419 {
16420 int i;
16421
16422 /* Cancel automatic vzeroupper insertion if there are
16423 live call-saved SSE registers at the insertion point. */
16424
16425 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16426 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16427 return;
16428
16429 if (TARGET_64BIT)
16430 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16431 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16432 return;
16433
16434 emit_insn (gen_avx_vzeroupper ());
16435 }
16436
16437 /* Generate one or more insns to set ENTITY to MODE. */
16438
16439 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16440 is the set of hard registers live at the point where the insn(s)
16441 are to be inserted. */
16442
16443 static void
16444 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16445 {
16446 switch (entity)
16447 {
16448 case AVX_U128:
16449 if (mode == AVX_U128_CLEAN)
16450 ix86_avx_emit_vzeroupper (regs_live);
16451 break;
16452 case I387_TRUNC:
16453 case I387_FLOOR:
16454 case I387_CEIL:
16455 case I387_MASK_PM:
16456 if (mode != I387_CW_ANY
16457 && mode != I387_CW_UNINITIALIZED)
16458 emit_i387_cw_initialization (mode);
16459 break;
16460 default:
16461 gcc_unreachable ();
16462 }
16463 }
16464
16465 /* Output code for INSN to convert a float to a signed int. OPERANDS
16466 are the insn operands. The output may be [HSD]Imode and the input
16467 operand may be [SDX]Fmode. */
16468
16469 const char *
16470 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16471 {
16472 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16473 int dimode_p = GET_MODE (operands[0]) == DImode;
16474 int round_mode = get_attr_i387_cw (insn);
16475
16476 /* Jump through a hoop or two for DImode, since the hardware has no
16477 non-popping instruction. We used to do this a different way, but
16478 that was somewhat fragile and broke with post-reload splitters. */
16479 if ((dimode_p || fisttp) && !stack_top_dies)
16480 output_asm_insn ("fld\t%y1", operands);
16481
16482 gcc_assert (STACK_TOP_P (operands[1]));
16483 gcc_assert (MEM_P (operands[0]));
16484 gcc_assert (GET_MODE (operands[1]) != TFmode);
16485
16486 if (fisttp)
16487 output_asm_insn ("fisttp%Z0\t%0", operands);
16488 else
16489 {
16490 if (round_mode != I387_CW_ANY)
16491 output_asm_insn ("fldcw\t%3", operands);
16492 if (stack_top_dies || dimode_p)
16493 output_asm_insn ("fistp%Z0\t%0", operands);
16494 else
16495 output_asm_insn ("fist%Z0\t%0", operands);
16496 if (round_mode != I387_CW_ANY)
16497 output_asm_insn ("fldcw\t%2", operands);
16498 }
16499
16500 return "";
16501 }
16502
16503 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16504 have the values zero or one, indicates the ffreep insn's operand
16505 from the OPERANDS array. */
16506
16507 static const char *
16508 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16509 {
16510 if (TARGET_USE_FFREEP)
16511 #ifdef HAVE_AS_IX86_FFREEP
16512 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16513 #else
16514 {
16515 static char retval[32];
16516 int regno = REGNO (operands[opno]);
16517
16518 gcc_assert (STACK_REGNO_P (regno));
16519
16520 regno -= FIRST_STACK_REG;
16521
16522 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16523 return retval;
16524 }
16525 #endif
16526
16527 return opno ? "fstp\t%y1" : "fstp\t%y0";
16528 }
16529
16530
16531 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16532 should be used. UNORDERED_P is true when fucom should be used. */
16533
16534 const char *
16535 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16536 {
16537 int stack_top_dies;
16538 rtx cmp_op0, cmp_op1;
16539 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16540
16541 if (eflags_p)
16542 {
16543 cmp_op0 = operands[0];
16544 cmp_op1 = operands[1];
16545 }
16546 else
16547 {
16548 cmp_op0 = operands[1];
16549 cmp_op1 = operands[2];
16550 }
16551
16552 if (is_sse)
16553 {
16554 if (GET_MODE (operands[0]) == SFmode)
16555 if (unordered_p)
16556 return "%vucomiss\t{%1, %0|%0, %1}";
16557 else
16558 return "%vcomiss\t{%1, %0|%0, %1}";
16559 else
16560 if (unordered_p)
16561 return "%vucomisd\t{%1, %0|%0, %1}";
16562 else
16563 return "%vcomisd\t{%1, %0|%0, %1}";
16564 }
16565
16566 gcc_assert (STACK_TOP_P (cmp_op0));
16567
16568 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16569
16570 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16571 {
16572 if (stack_top_dies)
16573 {
16574 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16575 return output_387_ffreep (operands, 1);
16576 }
16577 else
16578 return "ftst\n\tfnstsw\t%0";
16579 }
16580
16581 if (STACK_REG_P (cmp_op1)
16582 && stack_top_dies
16583 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16584 && REGNO (cmp_op1) != FIRST_STACK_REG)
16585 {
16586 /* If both the top of the 387 stack dies, and the other operand
16587 is also a stack register that dies, then this must be a
16588 `fcompp' float compare */
16589
16590 if (eflags_p)
16591 {
16592 /* There is no double popping fcomi variant. Fortunately,
16593 eflags is immune from the fstp's cc clobbering. */
16594 if (unordered_p)
16595 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16596 else
16597 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16598 return output_387_ffreep (operands, 0);
16599 }
16600 else
16601 {
16602 if (unordered_p)
16603 return "fucompp\n\tfnstsw\t%0";
16604 else
16605 return "fcompp\n\tfnstsw\t%0";
16606 }
16607 }
16608 else
16609 {
16610 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16611
16612 static const char * const alt[16] =
16613 {
16614 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16615 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16616 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16617 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16618
16619 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16620 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16621 NULL,
16622 NULL,
16623
16624 "fcomi\t{%y1, %0|%0, %y1}",
16625 "fcomip\t{%y1, %0|%0, %y1}",
16626 "fucomi\t{%y1, %0|%0, %y1}",
16627 "fucomip\t{%y1, %0|%0, %y1}",
16628
16629 NULL,
16630 NULL,
16631 NULL,
16632 NULL
16633 };
16634
16635 int mask;
16636 const char *ret;
16637
16638 mask = eflags_p << 3;
16639 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16640 mask |= unordered_p << 1;
16641 mask |= stack_top_dies;
16642
16643 gcc_assert (mask < 16);
16644 ret = alt[mask];
16645 gcc_assert (ret);
16646
16647 return ret;
16648 }
16649 }
16650
16651 void
16652 ix86_output_addr_vec_elt (FILE *file, int value)
16653 {
16654 const char *directive = ASM_LONG;
16655
16656 #ifdef ASM_QUAD
16657 if (TARGET_LP64)
16658 directive = ASM_QUAD;
16659 #else
16660 gcc_assert (!TARGET_64BIT);
16661 #endif
16662
16663 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16664 }
16665
16666 void
16667 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16668 {
16669 const char *directive = ASM_LONG;
16670
16671 #ifdef ASM_QUAD
16672 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16673 directive = ASM_QUAD;
16674 #else
16675 gcc_assert (!TARGET_64BIT);
16676 #endif
16677 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16678 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16679 fprintf (file, "%s%s%d-%s%d\n",
16680 directive, LPREFIX, value, LPREFIX, rel);
16681 else if (HAVE_AS_GOTOFF_IN_DATA)
16682 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16683 #if TARGET_MACHO
16684 else if (TARGET_MACHO)
16685 {
16686 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16687 machopic_output_function_base_name (file);
16688 putc ('\n', file);
16689 }
16690 #endif
16691 else
16692 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16693 GOT_SYMBOL_NAME, LPREFIX, value);
16694 }
16695 \f
16696 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16697 for the target. */
16698
16699 void
16700 ix86_expand_clear (rtx dest)
16701 {
16702 rtx tmp;
16703
16704 /* We play register width games, which are only valid after reload. */
16705 gcc_assert (reload_completed);
16706
16707 /* Avoid HImode and its attendant prefix byte. */
16708 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16709 dest = gen_rtx_REG (SImode, REGNO (dest));
16710 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16711
16712 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16713 {
16714 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16715 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16716 }
16717
16718 emit_insn (tmp);
16719 }
16720
16721 /* X is an unchanging MEM. If it is a constant pool reference, return
16722 the constant pool rtx, else NULL. */
16723
16724 rtx
16725 maybe_get_pool_constant (rtx x)
16726 {
16727 x = ix86_delegitimize_address (XEXP (x, 0));
16728
16729 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16730 return get_pool_constant (x);
16731
16732 return NULL_RTX;
16733 }
16734
16735 void
16736 ix86_expand_move (enum machine_mode mode, rtx operands[])
16737 {
16738 rtx op0, op1;
16739 enum tls_model model;
16740
16741 op0 = operands[0];
16742 op1 = operands[1];
16743
16744 if (GET_CODE (op1) == SYMBOL_REF)
16745 {
16746 rtx tmp;
16747
16748 model = SYMBOL_REF_TLS_MODEL (op1);
16749 if (model)
16750 {
16751 op1 = legitimize_tls_address (op1, model, true);
16752 op1 = force_operand (op1, op0);
16753 if (op1 == op0)
16754 return;
16755 op1 = convert_to_mode (mode, op1, 1);
16756 }
16757 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16758 op1 = tmp;
16759 }
16760 else if (GET_CODE (op1) == CONST
16761 && GET_CODE (XEXP (op1, 0)) == PLUS
16762 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16763 {
16764 rtx addend = XEXP (XEXP (op1, 0), 1);
16765 rtx symbol = XEXP (XEXP (op1, 0), 0);
16766 rtx tmp;
16767
16768 model = SYMBOL_REF_TLS_MODEL (symbol);
16769 if (model)
16770 tmp = legitimize_tls_address (symbol, model, true);
16771 else
16772 tmp = legitimize_pe_coff_symbol (symbol, true);
16773
16774 if (tmp)
16775 {
16776 tmp = force_operand (tmp, NULL);
16777 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16778 op0, 1, OPTAB_DIRECT);
16779 if (tmp == op0)
16780 return;
16781 op1 = convert_to_mode (mode, tmp, 1);
16782 }
16783 }
16784
16785 if ((flag_pic || MACHOPIC_INDIRECT)
16786 && symbolic_operand (op1, mode))
16787 {
16788 if (TARGET_MACHO && !TARGET_64BIT)
16789 {
16790 #if TARGET_MACHO
16791 /* dynamic-no-pic */
16792 if (MACHOPIC_INDIRECT)
16793 {
16794 rtx temp = ((reload_in_progress
16795 || ((op0 && REG_P (op0))
16796 && mode == Pmode))
16797 ? op0 : gen_reg_rtx (Pmode));
16798 op1 = machopic_indirect_data_reference (op1, temp);
16799 if (MACHOPIC_PURE)
16800 op1 = machopic_legitimize_pic_address (op1, mode,
16801 temp == op1 ? 0 : temp);
16802 }
16803 if (op0 != op1 && GET_CODE (op0) != MEM)
16804 {
16805 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16806 emit_insn (insn);
16807 return;
16808 }
16809 if (GET_CODE (op0) == MEM)
16810 op1 = force_reg (Pmode, op1);
16811 else
16812 {
16813 rtx temp = op0;
16814 if (GET_CODE (temp) != REG)
16815 temp = gen_reg_rtx (Pmode);
16816 temp = legitimize_pic_address (op1, temp);
16817 if (temp == op0)
16818 return;
16819 op1 = temp;
16820 }
16821 /* dynamic-no-pic */
16822 #endif
16823 }
16824 else
16825 {
16826 if (MEM_P (op0))
16827 op1 = force_reg (mode, op1);
16828 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16829 {
16830 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16831 op1 = legitimize_pic_address (op1, reg);
16832 if (op0 == op1)
16833 return;
16834 op1 = convert_to_mode (mode, op1, 1);
16835 }
16836 }
16837 }
16838 else
16839 {
16840 if (MEM_P (op0)
16841 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16842 || !push_operand (op0, mode))
16843 && MEM_P (op1))
16844 op1 = force_reg (mode, op1);
16845
16846 if (push_operand (op0, mode)
16847 && ! general_no_elim_operand (op1, mode))
16848 op1 = copy_to_mode_reg (mode, op1);
16849
16850 /* Force large constants in 64bit compilation into register
16851 to get them CSEed. */
16852 if (can_create_pseudo_p ()
16853 && (mode == DImode) && TARGET_64BIT
16854 && immediate_operand (op1, mode)
16855 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16856 && !register_operand (op0, mode)
16857 && optimize)
16858 op1 = copy_to_mode_reg (mode, op1);
16859
16860 if (can_create_pseudo_p ()
16861 && FLOAT_MODE_P (mode)
16862 && GET_CODE (op1) == CONST_DOUBLE)
16863 {
16864 /* If we are loading a floating point constant to a register,
16865 force the value to memory now, since we'll get better code
16866 out the back end. */
16867
16868 op1 = validize_mem (force_const_mem (mode, op1));
16869 if (!register_operand (op0, mode))
16870 {
16871 rtx temp = gen_reg_rtx (mode);
16872 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16873 emit_move_insn (op0, temp);
16874 return;
16875 }
16876 }
16877 }
16878
16879 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16880 }
16881
16882 void
16883 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16884 {
16885 rtx op0 = operands[0], op1 = operands[1];
16886 unsigned int align = GET_MODE_ALIGNMENT (mode);
16887
16888 if (push_operand (op0, VOIDmode))
16889 op0 = emit_move_resolve_push (mode, op0);
16890
16891 /* Force constants other than zero into memory. We do not know how
16892 the instructions used to build constants modify the upper 64 bits
16893 of the register, once we have that information we may be able
16894 to handle some of them more efficiently. */
16895 if (can_create_pseudo_p ()
16896 && register_operand (op0, mode)
16897 && (CONSTANT_P (op1)
16898 || (GET_CODE (op1) == SUBREG
16899 && CONSTANT_P (SUBREG_REG (op1))))
16900 && !standard_sse_constant_p (op1))
16901 op1 = validize_mem (force_const_mem (mode, op1));
16902
16903 /* We need to check memory alignment for SSE mode since attribute
16904 can make operands unaligned. */
16905 if (can_create_pseudo_p ()
16906 && SSE_REG_MODE_P (mode)
16907 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16908 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16909 {
16910 rtx tmp[2];
16911
16912 /* ix86_expand_vector_move_misalign() does not like constants ... */
16913 if (CONSTANT_P (op1)
16914 || (GET_CODE (op1) == SUBREG
16915 && CONSTANT_P (SUBREG_REG (op1))))
16916 op1 = validize_mem (force_const_mem (mode, op1));
16917
16918 /* ... nor both arguments in memory. */
16919 if (!register_operand (op0, mode)
16920 && !register_operand (op1, mode))
16921 op1 = force_reg (mode, op1);
16922
16923 tmp[0] = op0; tmp[1] = op1;
16924 ix86_expand_vector_move_misalign (mode, tmp);
16925 return;
16926 }
16927
16928 /* Make operand1 a register if it isn't already. */
16929 if (can_create_pseudo_p ()
16930 && !register_operand (op0, mode)
16931 && !register_operand (op1, mode))
16932 {
16933 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16934 return;
16935 }
16936
16937 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16938 }
16939
16940 /* Split 32-byte AVX unaligned load and store if needed. */
16941
16942 static void
16943 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16944 {
16945 rtx m;
16946 rtx (*extract) (rtx, rtx, rtx);
16947 rtx (*load_unaligned) (rtx, rtx);
16948 rtx (*store_unaligned) (rtx, rtx);
16949 enum machine_mode mode;
16950
16951 switch (GET_MODE (op0))
16952 {
16953 default:
16954 gcc_unreachable ();
16955 case V32QImode:
16956 extract = gen_avx_vextractf128v32qi;
16957 load_unaligned = gen_avx_loaddquv32qi;
16958 store_unaligned = gen_avx_storedquv32qi;
16959 mode = V16QImode;
16960 break;
16961 case V8SFmode:
16962 extract = gen_avx_vextractf128v8sf;
16963 load_unaligned = gen_avx_loadups256;
16964 store_unaligned = gen_avx_storeups256;
16965 mode = V4SFmode;
16966 break;
16967 case V4DFmode:
16968 extract = gen_avx_vextractf128v4df;
16969 load_unaligned = gen_avx_loadupd256;
16970 store_unaligned = gen_avx_storeupd256;
16971 mode = V2DFmode;
16972 break;
16973 }
16974
16975 if (MEM_P (op1))
16976 {
16977 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16978 {
16979 rtx r = gen_reg_rtx (mode);
16980 m = adjust_address (op1, mode, 0);
16981 emit_move_insn (r, m);
16982 m = adjust_address (op1, mode, 16);
16983 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16984 emit_move_insn (op0, r);
16985 }
16986 /* Normal *mov<mode>_internal pattern will handle
16987 unaligned loads just fine if misaligned_operand
16988 is true, and without the UNSPEC it can be combined
16989 with arithmetic instructions. */
16990 else if (misaligned_operand (op1, GET_MODE (op1)))
16991 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16992 else
16993 emit_insn (load_unaligned (op0, op1));
16994 }
16995 else if (MEM_P (op0))
16996 {
16997 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16998 {
16999 m = adjust_address (op0, mode, 0);
17000 emit_insn (extract (m, op1, const0_rtx));
17001 m = adjust_address (op0, mode, 16);
17002 emit_insn (extract (m, op1, const1_rtx));
17003 }
17004 else
17005 emit_insn (store_unaligned (op0, op1));
17006 }
17007 else
17008 gcc_unreachable ();
17009 }
17010
17011 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17012 straight to ix86_expand_vector_move. */
17013 /* Code generation for scalar reg-reg moves of single and double precision data:
17014 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17015 movaps reg, reg
17016 else
17017 movss reg, reg
17018 if (x86_sse_partial_reg_dependency == true)
17019 movapd reg, reg
17020 else
17021 movsd reg, reg
17022
17023 Code generation for scalar loads of double precision data:
17024 if (x86_sse_split_regs == true)
17025 movlpd mem, reg (gas syntax)
17026 else
17027 movsd mem, reg
17028
17029 Code generation for unaligned packed loads of single precision data
17030 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17031 if (x86_sse_unaligned_move_optimal)
17032 movups mem, reg
17033
17034 if (x86_sse_partial_reg_dependency == true)
17035 {
17036 xorps reg, reg
17037 movlps mem, reg
17038 movhps mem+8, reg
17039 }
17040 else
17041 {
17042 movlps mem, reg
17043 movhps mem+8, reg
17044 }
17045
17046 Code generation for unaligned packed loads of double precision data
17047 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17048 if (x86_sse_unaligned_move_optimal)
17049 movupd mem, reg
17050
17051 if (x86_sse_split_regs == true)
17052 {
17053 movlpd mem, reg
17054 movhpd mem+8, reg
17055 }
17056 else
17057 {
17058 movsd mem, reg
17059 movhpd mem+8, reg
17060 }
17061 */
17062
17063 void
17064 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17065 {
17066 rtx op0, op1, orig_op0 = NULL_RTX, m;
17067 rtx (*load_unaligned) (rtx, rtx);
17068 rtx (*store_unaligned) (rtx, rtx);
17069
17070 op0 = operands[0];
17071 op1 = operands[1];
17072
17073 if (GET_MODE_SIZE (mode) == 64)
17074 {
17075 switch (GET_MODE_CLASS (mode))
17076 {
17077 case MODE_VECTOR_INT:
17078 case MODE_INT:
17079 if (GET_MODE (op0) != V16SImode)
17080 {
17081 if (!MEM_P (op0))
17082 {
17083 orig_op0 = op0;
17084 op0 = gen_reg_rtx (V16SImode);
17085 }
17086 else
17087 op0 = gen_lowpart (V16SImode, op0);
17088 }
17089 op1 = gen_lowpart (V16SImode, op1);
17090 /* FALLTHRU */
17091
17092 case MODE_VECTOR_FLOAT:
17093 switch (GET_MODE (op0))
17094 {
17095 default:
17096 gcc_unreachable ();
17097 case V16SImode:
17098 load_unaligned = gen_avx512f_loaddquv16si;
17099 store_unaligned = gen_avx512f_storedquv16si;
17100 break;
17101 case V16SFmode:
17102 load_unaligned = gen_avx512f_loadups512;
17103 store_unaligned = gen_avx512f_storeups512;
17104 break;
17105 case V8DFmode:
17106 load_unaligned = gen_avx512f_loadupd512;
17107 store_unaligned = gen_avx512f_storeupd512;
17108 break;
17109 }
17110
17111 if (MEM_P (op1))
17112 emit_insn (load_unaligned (op0, op1));
17113 else if (MEM_P (op0))
17114 emit_insn (store_unaligned (op0, op1));
17115 else
17116 gcc_unreachable ();
17117 if (orig_op0)
17118 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17119 break;
17120
17121 default:
17122 gcc_unreachable ();
17123 }
17124
17125 return;
17126 }
17127
17128 if (TARGET_AVX
17129 && GET_MODE_SIZE (mode) == 32)
17130 {
17131 switch (GET_MODE_CLASS (mode))
17132 {
17133 case MODE_VECTOR_INT:
17134 case MODE_INT:
17135 if (GET_MODE (op0) != V32QImode)
17136 {
17137 if (!MEM_P (op0))
17138 {
17139 orig_op0 = op0;
17140 op0 = gen_reg_rtx (V32QImode);
17141 }
17142 else
17143 op0 = gen_lowpart (V32QImode, op0);
17144 }
17145 op1 = gen_lowpart (V32QImode, op1);
17146 /* FALLTHRU */
17147
17148 case MODE_VECTOR_FLOAT:
17149 ix86_avx256_split_vector_move_misalign (op0, op1);
17150 if (orig_op0)
17151 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17152 break;
17153
17154 default:
17155 gcc_unreachable ();
17156 }
17157
17158 return;
17159 }
17160
17161 if (MEM_P (op1))
17162 {
17163 /* Normal *mov<mode>_internal pattern will handle
17164 unaligned loads just fine if misaligned_operand
17165 is true, and without the UNSPEC it can be combined
17166 with arithmetic instructions. */
17167 if (TARGET_AVX
17168 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17169 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17170 && misaligned_operand (op1, GET_MODE (op1)))
17171 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17172 /* ??? If we have typed data, then it would appear that using
17173 movdqu is the only way to get unaligned data loaded with
17174 integer type. */
17175 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17176 {
17177 if (GET_MODE (op0) != V16QImode)
17178 {
17179 orig_op0 = op0;
17180 op0 = gen_reg_rtx (V16QImode);
17181 }
17182 op1 = gen_lowpart (V16QImode, op1);
17183 /* We will eventually emit movups based on insn attributes. */
17184 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17185 if (orig_op0)
17186 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17187 }
17188 else if (TARGET_SSE2 && mode == V2DFmode)
17189 {
17190 rtx zero;
17191
17192 if (TARGET_AVX
17193 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17194 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17195 || optimize_insn_for_size_p ())
17196 {
17197 /* We will eventually emit movups based on insn attributes. */
17198 emit_insn (gen_sse2_loadupd (op0, op1));
17199 return;
17200 }
17201
17202 /* When SSE registers are split into halves, we can avoid
17203 writing to the top half twice. */
17204 if (TARGET_SSE_SPLIT_REGS)
17205 {
17206 emit_clobber (op0);
17207 zero = op0;
17208 }
17209 else
17210 {
17211 /* ??? Not sure about the best option for the Intel chips.
17212 The following would seem to satisfy; the register is
17213 entirely cleared, breaking the dependency chain. We
17214 then store to the upper half, with a dependency depth
17215 of one. A rumor has it that Intel recommends two movsd
17216 followed by an unpacklpd, but this is unconfirmed. And
17217 given that the dependency depth of the unpacklpd would
17218 still be one, I'm not sure why this would be better. */
17219 zero = CONST0_RTX (V2DFmode);
17220 }
17221
17222 m = adjust_address (op1, DFmode, 0);
17223 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17224 m = adjust_address (op1, DFmode, 8);
17225 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17226 }
17227 else
17228 {
17229 rtx t;
17230
17231 if (TARGET_AVX
17232 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17233 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17234 || optimize_insn_for_size_p ())
17235 {
17236 if (GET_MODE (op0) != V4SFmode)
17237 {
17238 orig_op0 = op0;
17239 op0 = gen_reg_rtx (V4SFmode);
17240 }
17241 op1 = gen_lowpart (V4SFmode, op1);
17242 emit_insn (gen_sse_loadups (op0, op1));
17243 if (orig_op0)
17244 emit_move_insn (orig_op0,
17245 gen_lowpart (GET_MODE (orig_op0), op0));
17246 return;
17247 }
17248
17249 if (mode != V4SFmode)
17250 t = gen_reg_rtx (V4SFmode);
17251 else
17252 t = op0;
17253
17254 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17255 emit_move_insn (t, CONST0_RTX (V4SFmode));
17256 else
17257 emit_clobber (t);
17258
17259 m = adjust_address (op1, V2SFmode, 0);
17260 emit_insn (gen_sse_loadlps (t, t, m));
17261 m = adjust_address (op1, V2SFmode, 8);
17262 emit_insn (gen_sse_loadhps (t, t, m));
17263 if (mode != V4SFmode)
17264 emit_move_insn (op0, gen_lowpart (mode, t));
17265 }
17266 }
17267 else if (MEM_P (op0))
17268 {
17269 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17270 {
17271 op0 = gen_lowpart (V16QImode, op0);
17272 op1 = gen_lowpart (V16QImode, op1);
17273 /* We will eventually emit movups based on insn attributes. */
17274 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17275 }
17276 else if (TARGET_SSE2 && mode == V2DFmode)
17277 {
17278 if (TARGET_AVX
17279 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17280 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17281 || optimize_insn_for_size_p ())
17282 /* We will eventually emit movups based on insn attributes. */
17283 emit_insn (gen_sse2_storeupd (op0, op1));
17284 else
17285 {
17286 m = adjust_address (op0, DFmode, 0);
17287 emit_insn (gen_sse2_storelpd (m, op1));
17288 m = adjust_address (op0, DFmode, 8);
17289 emit_insn (gen_sse2_storehpd (m, op1));
17290 }
17291 }
17292 else
17293 {
17294 if (mode != V4SFmode)
17295 op1 = gen_lowpart (V4SFmode, op1);
17296
17297 if (TARGET_AVX
17298 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17299 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17300 || optimize_insn_for_size_p ())
17301 {
17302 op0 = gen_lowpart (V4SFmode, op0);
17303 emit_insn (gen_sse_storeups (op0, op1));
17304 }
17305 else
17306 {
17307 m = adjust_address (op0, V2SFmode, 0);
17308 emit_insn (gen_sse_storelps (m, op1));
17309 m = adjust_address (op0, V2SFmode, 8);
17310 emit_insn (gen_sse_storehps (m, op1));
17311 }
17312 }
17313 }
17314 else
17315 gcc_unreachable ();
17316 }
17317
17318 /* Helper function of ix86_fixup_binary_operands to canonicalize
17319 operand order. Returns true if the operands should be swapped. */
17320
17321 static bool
17322 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17323 rtx operands[])
17324 {
17325 rtx dst = operands[0];
17326 rtx src1 = operands[1];
17327 rtx src2 = operands[2];
17328
17329 /* If the operation is not commutative, we can't do anything. */
17330 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17331 return false;
17332
17333 /* Highest priority is that src1 should match dst. */
17334 if (rtx_equal_p (dst, src1))
17335 return false;
17336 if (rtx_equal_p (dst, src2))
17337 return true;
17338
17339 /* Next highest priority is that immediate constants come second. */
17340 if (immediate_operand (src2, mode))
17341 return false;
17342 if (immediate_operand (src1, mode))
17343 return true;
17344
17345 /* Lowest priority is that memory references should come second. */
17346 if (MEM_P (src2))
17347 return false;
17348 if (MEM_P (src1))
17349 return true;
17350
17351 return false;
17352 }
17353
17354
17355 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17356 destination to use for the operation. If different from the true
17357 destination in operands[0], a copy operation will be required. */
17358
17359 rtx
17360 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17361 rtx operands[])
17362 {
17363 rtx dst = operands[0];
17364 rtx src1 = operands[1];
17365 rtx src2 = operands[2];
17366
17367 /* Canonicalize operand order. */
17368 if (ix86_swap_binary_operands_p (code, mode, operands))
17369 {
17370 rtx temp;
17371
17372 /* It is invalid to swap operands of different modes. */
17373 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17374
17375 temp = src1;
17376 src1 = src2;
17377 src2 = temp;
17378 }
17379
17380 /* Both source operands cannot be in memory. */
17381 if (MEM_P (src1) && MEM_P (src2))
17382 {
17383 /* Optimization: Only read from memory once. */
17384 if (rtx_equal_p (src1, src2))
17385 {
17386 src2 = force_reg (mode, src2);
17387 src1 = src2;
17388 }
17389 else if (rtx_equal_p (dst, src1))
17390 src2 = force_reg (mode, src2);
17391 else
17392 src1 = force_reg (mode, src1);
17393 }
17394
17395 /* If the destination is memory, and we do not have matching source
17396 operands, do things in registers. */
17397 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17398 dst = gen_reg_rtx (mode);
17399
17400 /* Source 1 cannot be a constant. */
17401 if (CONSTANT_P (src1))
17402 src1 = force_reg (mode, src1);
17403
17404 /* Source 1 cannot be a non-matching memory. */
17405 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17406 src1 = force_reg (mode, src1);
17407
17408 /* Improve address combine. */
17409 if (code == PLUS
17410 && GET_MODE_CLASS (mode) == MODE_INT
17411 && MEM_P (src2))
17412 src2 = force_reg (mode, src2);
17413
17414 operands[1] = src1;
17415 operands[2] = src2;
17416 return dst;
17417 }
17418
17419 /* Similarly, but assume that the destination has already been
17420 set up properly. */
17421
17422 void
17423 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17424 enum machine_mode mode, rtx operands[])
17425 {
17426 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17427 gcc_assert (dst == operands[0]);
17428 }
17429
17430 /* Attempt to expand a binary operator. Make the expansion closer to the
17431 actual machine, then just general_operand, which will allow 3 separate
17432 memory references (one output, two input) in a single insn. */
17433
17434 void
17435 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17436 rtx operands[])
17437 {
17438 rtx src1, src2, dst, op, clob;
17439
17440 dst = ix86_fixup_binary_operands (code, mode, operands);
17441 src1 = operands[1];
17442 src2 = operands[2];
17443
17444 /* Emit the instruction. */
17445
17446 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17447 if (reload_in_progress)
17448 {
17449 /* Reload doesn't know about the flags register, and doesn't know that
17450 it doesn't want to clobber it. We can only do this with PLUS. */
17451 gcc_assert (code == PLUS);
17452 emit_insn (op);
17453 }
17454 else if (reload_completed
17455 && code == PLUS
17456 && !rtx_equal_p (dst, src1))
17457 {
17458 /* This is going to be an LEA; avoid splitting it later. */
17459 emit_insn (op);
17460 }
17461 else
17462 {
17463 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17464 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17465 }
17466
17467 /* Fix up the destination if needed. */
17468 if (dst != operands[0])
17469 emit_move_insn (operands[0], dst);
17470 }
17471
17472 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17473 the given OPERANDS. */
17474
17475 void
17476 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17477 rtx operands[])
17478 {
17479 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17480 if (GET_CODE (operands[1]) == SUBREG)
17481 {
17482 op1 = operands[1];
17483 op2 = operands[2];
17484 }
17485 else if (GET_CODE (operands[2]) == SUBREG)
17486 {
17487 op1 = operands[2];
17488 op2 = operands[1];
17489 }
17490 /* Optimize (__m128i) d | (__m128i) e and similar code
17491 when d and e are float vectors into float vector logical
17492 insn. In C/C++ without using intrinsics there is no other way
17493 to express vector logical operation on float vectors than
17494 to cast them temporarily to integer vectors. */
17495 if (op1
17496 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17497 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17498 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17499 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17500 && SUBREG_BYTE (op1) == 0
17501 && (GET_CODE (op2) == CONST_VECTOR
17502 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17503 && SUBREG_BYTE (op2) == 0))
17504 && can_create_pseudo_p ())
17505 {
17506 rtx dst;
17507 switch (GET_MODE (SUBREG_REG (op1)))
17508 {
17509 case V4SFmode:
17510 case V8SFmode:
17511 case V2DFmode:
17512 case V4DFmode:
17513 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17514 if (GET_CODE (op2) == CONST_VECTOR)
17515 {
17516 op2 = gen_lowpart (GET_MODE (dst), op2);
17517 op2 = force_reg (GET_MODE (dst), op2);
17518 }
17519 else
17520 {
17521 op1 = operands[1];
17522 op2 = SUBREG_REG (operands[2]);
17523 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17524 op2 = force_reg (GET_MODE (dst), op2);
17525 }
17526 op1 = SUBREG_REG (op1);
17527 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17528 op1 = force_reg (GET_MODE (dst), op1);
17529 emit_insn (gen_rtx_SET (VOIDmode, dst,
17530 gen_rtx_fmt_ee (code, GET_MODE (dst),
17531 op1, op2)));
17532 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17533 return;
17534 default:
17535 break;
17536 }
17537 }
17538 if (!nonimmediate_operand (operands[1], mode))
17539 operands[1] = force_reg (mode, operands[1]);
17540 if (!nonimmediate_operand (operands[2], mode))
17541 operands[2] = force_reg (mode, operands[2]);
17542 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17543 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17544 gen_rtx_fmt_ee (code, mode, operands[1],
17545 operands[2])));
17546 }
17547
17548 /* Return TRUE or FALSE depending on whether the binary operator meets the
17549 appropriate constraints. */
17550
17551 bool
17552 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17553 rtx operands[3])
17554 {
17555 rtx dst = operands[0];
17556 rtx src1 = operands[1];
17557 rtx src2 = operands[2];
17558
17559 /* Both source operands cannot be in memory. */
17560 if (MEM_P (src1) && MEM_P (src2))
17561 return false;
17562
17563 /* Canonicalize operand order for commutative operators. */
17564 if (ix86_swap_binary_operands_p (code, mode, operands))
17565 {
17566 rtx temp = src1;
17567 src1 = src2;
17568 src2 = temp;
17569 }
17570
17571 /* If the destination is memory, we must have a matching source operand. */
17572 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17573 return false;
17574
17575 /* Source 1 cannot be a constant. */
17576 if (CONSTANT_P (src1))
17577 return false;
17578
17579 /* Source 1 cannot be a non-matching memory. */
17580 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17581 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17582 return (code == AND
17583 && (mode == HImode
17584 || mode == SImode
17585 || (TARGET_64BIT && mode == DImode))
17586 && satisfies_constraint_L (src2));
17587
17588 return true;
17589 }
17590
17591 /* Attempt to expand a unary operator. Make the expansion closer to the
17592 actual machine, then just general_operand, which will allow 2 separate
17593 memory references (one output, one input) in a single insn. */
17594
17595 void
17596 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17597 rtx operands[])
17598 {
17599 int matching_memory;
17600 rtx src, dst, op, clob;
17601
17602 dst = operands[0];
17603 src = operands[1];
17604
17605 /* If the destination is memory, and we do not have matching source
17606 operands, do things in registers. */
17607 matching_memory = 0;
17608 if (MEM_P (dst))
17609 {
17610 if (rtx_equal_p (dst, src))
17611 matching_memory = 1;
17612 else
17613 dst = gen_reg_rtx (mode);
17614 }
17615
17616 /* When source operand is memory, destination must match. */
17617 if (MEM_P (src) && !matching_memory)
17618 src = force_reg (mode, src);
17619
17620 /* Emit the instruction. */
17621
17622 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17623 if (reload_in_progress || code == NOT)
17624 {
17625 /* Reload doesn't know about the flags register, and doesn't know that
17626 it doesn't want to clobber it. */
17627 gcc_assert (code == NOT);
17628 emit_insn (op);
17629 }
17630 else
17631 {
17632 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17633 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17634 }
17635
17636 /* Fix up the destination if needed. */
17637 if (dst != operands[0])
17638 emit_move_insn (operands[0], dst);
17639 }
17640
17641 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17642 divisor are within the range [0-255]. */
17643
17644 void
17645 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17646 bool signed_p)
17647 {
17648 rtx end_label, qimode_label;
17649 rtx insn, div, mod;
17650 rtx scratch, tmp0, tmp1, tmp2;
17651 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17652 rtx (*gen_zero_extend) (rtx, rtx);
17653 rtx (*gen_test_ccno_1) (rtx, rtx);
17654
17655 switch (mode)
17656 {
17657 case SImode:
17658 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17659 gen_test_ccno_1 = gen_testsi_ccno_1;
17660 gen_zero_extend = gen_zero_extendqisi2;
17661 break;
17662 case DImode:
17663 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17664 gen_test_ccno_1 = gen_testdi_ccno_1;
17665 gen_zero_extend = gen_zero_extendqidi2;
17666 break;
17667 default:
17668 gcc_unreachable ();
17669 }
17670
17671 end_label = gen_label_rtx ();
17672 qimode_label = gen_label_rtx ();
17673
17674 scratch = gen_reg_rtx (mode);
17675
17676 /* Use 8bit unsigned divimod if dividend and divisor are within
17677 the range [0-255]. */
17678 emit_move_insn (scratch, operands[2]);
17679 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17680 scratch, 1, OPTAB_DIRECT);
17681 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17682 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17683 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17684 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17685 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17686 pc_rtx);
17687 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17688 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17689 JUMP_LABEL (insn) = qimode_label;
17690
17691 /* Generate original signed/unsigned divimod. */
17692 div = gen_divmod4_1 (operands[0], operands[1],
17693 operands[2], operands[3]);
17694 emit_insn (div);
17695
17696 /* Branch to the end. */
17697 emit_jump_insn (gen_jump (end_label));
17698 emit_barrier ();
17699
17700 /* Generate 8bit unsigned divide. */
17701 emit_label (qimode_label);
17702 /* Don't use operands[0] for result of 8bit divide since not all
17703 registers support QImode ZERO_EXTRACT. */
17704 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17705 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17706 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17707 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17708
17709 if (signed_p)
17710 {
17711 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17712 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17713 }
17714 else
17715 {
17716 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17717 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17718 }
17719
17720 /* Extract remainder from AH. */
17721 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17722 if (REG_P (operands[1]))
17723 insn = emit_move_insn (operands[1], tmp1);
17724 else
17725 {
17726 /* Need a new scratch register since the old one has result
17727 of 8bit divide. */
17728 scratch = gen_reg_rtx (mode);
17729 emit_move_insn (scratch, tmp1);
17730 insn = emit_move_insn (operands[1], scratch);
17731 }
17732 set_unique_reg_note (insn, REG_EQUAL, mod);
17733
17734 /* Zero extend quotient from AL. */
17735 tmp1 = gen_lowpart (QImode, tmp0);
17736 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17737 set_unique_reg_note (insn, REG_EQUAL, div);
17738
17739 emit_label (end_label);
17740 }
17741
17742 /* Whether it is OK to emit CFI directives when emitting asm code. */
17743
17744 bool
17745 ix86_emit_cfi ()
17746 {
17747 return dwarf2out_do_cfi_asm ();
17748 }
17749
17750 #define LEA_MAX_STALL (3)
17751 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17752
17753 /* Increase given DISTANCE in half-cycles according to
17754 dependencies between PREV and NEXT instructions.
17755 Add 1 half-cycle if there is no dependency and
17756 go to next cycle if there is some dependecy. */
17757
17758 static unsigned int
17759 increase_distance (rtx prev, rtx next, unsigned int distance)
17760 {
17761 df_ref *use_rec;
17762 df_ref *def_rec;
17763
17764 if (!prev || !next)
17765 return distance + (distance & 1) + 2;
17766
17767 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17768 return distance + 1;
17769
17770 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17771 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17772 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17773 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17774 return distance + (distance & 1) + 2;
17775
17776 return distance + 1;
17777 }
17778
17779 /* Function checks if instruction INSN defines register number
17780 REGNO1 or REGNO2. */
17781
17782 static bool
17783 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17784 rtx insn)
17785 {
17786 df_ref *def_rec;
17787
17788 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17789 if (DF_REF_REG_DEF_P (*def_rec)
17790 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17791 && (regno1 == DF_REF_REGNO (*def_rec)
17792 || regno2 == DF_REF_REGNO (*def_rec)))
17793 {
17794 return true;
17795 }
17796
17797 return false;
17798 }
17799
17800 /* Function checks if instruction INSN uses register number
17801 REGNO as a part of address expression. */
17802
17803 static bool
17804 insn_uses_reg_mem (unsigned int regno, rtx insn)
17805 {
17806 df_ref *use_rec;
17807
17808 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17809 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17810 return true;
17811
17812 return false;
17813 }
17814
17815 /* Search backward for non-agu definition of register number REGNO1
17816 or register number REGNO2 in basic block starting from instruction
17817 START up to head of basic block or instruction INSN.
17818
17819 Function puts true value into *FOUND var if definition was found
17820 and false otherwise.
17821
17822 Distance in half-cycles between START and found instruction or head
17823 of BB is added to DISTANCE and returned. */
17824
17825 static int
17826 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17827 rtx insn, int distance,
17828 rtx start, bool *found)
17829 {
17830 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17831 rtx prev = start;
17832 rtx next = NULL;
17833
17834 *found = false;
17835
17836 while (prev
17837 && prev != insn
17838 && distance < LEA_SEARCH_THRESHOLD)
17839 {
17840 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17841 {
17842 distance = increase_distance (prev, next, distance);
17843 if (insn_defines_reg (regno1, regno2, prev))
17844 {
17845 if (recog_memoized (prev) < 0
17846 || get_attr_type (prev) != TYPE_LEA)
17847 {
17848 *found = true;
17849 return distance;
17850 }
17851 }
17852
17853 next = prev;
17854 }
17855 if (prev == BB_HEAD (bb))
17856 break;
17857
17858 prev = PREV_INSN (prev);
17859 }
17860
17861 return distance;
17862 }
17863
17864 /* Search backward for non-agu definition of register number REGNO1
17865 or register number REGNO2 in INSN's basic block until
17866 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17867 2. Reach neighbour BBs boundary, or
17868 3. Reach agu definition.
17869 Returns the distance between the non-agu definition point and INSN.
17870 If no definition point, returns -1. */
17871
17872 static int
17873 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17874 rtx insn)
17875 {
17876 basic_block bb = BLOCK_FOR_INSN (insn);
17877 int distance = 0;
17878 bool found = false;
17879
17880 if (insn != BB_HEAD (bb))
17881 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17882 distance, PREV_INSN (insn),
17883 &found);
17884
17885 if (!found && distance < LEA_SEARCH_THRESHOLD)
17886 {
17887 edge e;
17888 edge_iterator ei;
17889 bool simple_loop = false;
17890
17891 FOR_EACH_EDGE (e, ei, bb->preds)
17892 if (e->src == bb)
17893 {
17894 simple_loop = true;
17895 break;
17896 }
17897
17898 if (simple_loop)
17899 distance = distance_non_agu_define_in_bb (regno1, regno2,
17900 insn, distance,
17901 BB_END (bb), &found);
17902 else
17903 {
17904 int shortest_dist = -1;
17905 bool found_in_bb = false;
17906
17907 FOR_EACH_EDGE (e, ei, bb->preds)
17908 {
17909 int bb_dist
17910 = distance_non_agu_define_in_bb (regno1, regno2,
17911 insn, distance,
17912 BB_END (e->src),
17913 &found_in_bb);
17914 if (found_in_bb)
17915 {
17916 if (shortest_dist < 0)
17917 shortest_dist = bb_dist;
17918 else if (bb_dist > 0)
17919 shortest_dist = MIN (bb_dist, shortest_dist);
17920
17921 found = true;
17922 }
17923 }
17924
17925 distance = shortest_dist;
17926 }
17927 }
17928
17929 /* get_attr_type may modify recog data. We want to make sure
17930 that recog data is valid for instruction INSN, on which
17931 distance_non_agu_define is called. INSN is unchanged here. */
17932 extract_insn_cached (insn);
17933
17934 if (!found)
17935 return -1;
17936
17937 return distance >> 1;
17938 }
17939
17940 /* Return the distance in half-cycles between INSN and the next
17941 insn that uses register number REGNO in memory address added
17942 to DISTANCE. Return -1 if REGNO0 is set.
17943
17944 Put true value into *FOUND if register usage was found and
17945 false otherwise.
17946 Put true value into *REDEFINED if register redefinition was
17947 found and false otherwise. */
17948
17949 static int
17950 distance_agu_use_in_bb (unsigned int regno,
17951 rtx insn, int distance, rtx start,
17952 bool *found, bool *redefined)
17953 {
17954 basic_block bb = NULL;
17955 rtx next = start;
17956 rtx prev = NULL;
17957
17958 *found = false;
17959 *redefined = false;
17960
17961 if (start != NULL_RTX)
17962 {
17963 bb = BLOCK_FOR_INSN (start);
17964 if (start != BB_HEAD (bb))
17965 /* If insn and start belong to the same bb, set prev to insn,
17966 so the call to increase_distance will increase the distance
17967 between insns by 1. */
17968 prev = insn;
17969 }
17970
17971 while (next
17972 && next != insn
17973 && distance < LEA_SEARCH_THRESHOLD)
17974 {
17975 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17976 {
17977 distance = increase_distance(prev, next, distance);
17978 if (insn_uses_reg_mem (regno, next))
17979 {
17980 /* Return DISTANCE if OP0 is used in memory
17981 address in NEXT. */
17982 *found = true;
17983 return distance;
17984 }
17985
17986 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17987 {
17988 /* Return -1 if OP0 is set in NEXT. */
17989 *redefined = true;
17990 return -1;
17991 }
17992
17993 prev = next;
17994 }
17995
17996 if (next == BB_END (bb))
17997 break;
17998
17999 next = NEXT_INSN (next);
18000 }
18001
18002 return distance;
18003 }
18004
18005 /* Return the distance between INSN and the next insn that uses
18006 register number REGNO0 in memory address. Return -1 if no such
18007 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18008
18009 static int
18010 distance_agu_use (unsigned int regno0, rtx insn)
18011 {
18012 basic_block bb = BLOCK_FOR_INSN (insn);
18013 int distance = 0;
18014 bool found = false;
18015 bool redefined = false;
18016
18017 if (insn != BB_END (bb))
18018 distance = distance_agu_use_in_bb (regno0, insn, distance,
18019 NEXT_INSN (insn),
18020 &found, &redefined);
18021
18022 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18023 {
18024 edge e;
18025 edge_iterator ei;
18026 bool simple_loop = false;
18027
18028 FOR_EACH_EDGE (e, ei, bb->succs)
18029 if (e->dest == bb)
18030 {
18031 simple_loop = true;
18032 break;
18033 }
18034
18035 if (simple_loop)
18036 distance = distance_agu_use_in_bb (regno0, insn,
18037 distance, BB_HEAD (bb),
18038 &found, &redefined);
18039 else
18040 {
18041 int shortest_dist = -1;
18042 bool found_in_bb = false;
18043 bool redefined_in_bb = false;
18044
18045 FOR_EACH_EDGE (e, ei, bb->succs)
18046 {
18047 int bb_dist
18048 = distance_agu_use_in_bb (regno0, insn,
18049 distance, BB_HEAD (e->dest),
18050 &found_in_bb, &redefined_in_bb);
18051 if (found_in_bb)
18052 {
18053 if (shortest_dist < 0)
18054 shortest_dist = bb_dist;
18055 else if (bb_dist > 0)
18056 shortest_dist = MIN (bb_dist, shortest_dist);
18057
18058 found = true;
18059 }
18060 }
18061
18062 distance = shortest_dist;
18063 }
18064 }
18065
18066 if (!found || redefined)
18067 return -1;
18068
18069 return distance >> 1;
18070 }
18071
18072 /* Define this macro to tune LEA priority vs ADD, it take effect when
18073 there is a dilemma of choicing LEA or ADD
18074 Negative value: ADD is more preferred than LEA
18075 Zero: Netrual
18076 Positive value: LEA is more preferred than ADD*/
18077 #define IX86_LEA_PRIORITY 0
18078
18079 /* Return true if usage of lea INSN has performance advantage
18080 over a sequence of instructions. Instructions sequence has
18081 SPLIT_COST cycles higher latency than lea latency. */
18082
18083 static bool
18084 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18085 unsigned int regno2, int split_cost, bool has_scale)
18086 {
18087 int dist_define, dist_use;
18088
18089 /* For Silvermont if using a 2-source or 3-source LEA for
18090 non-destructive destination purposes, or due to wanting
18091 ability to use SCALE, the use of LEA is justified. */
18092 if (TARGET_SILVERMONT || TARGET_INTEL)
18093 {
18094 if (has_scale)
18095 return true;
18096 if (split_cost < 1)
18097 return false;
18098 if (regno0 == regno1 || regno0 == regno2)
18099 return false;
18100 return true;
18101 }
18102
18103 dist_define = distance_non_agu_define (regno1, regno2, insn);
18104 dist_use = distance_agu_use (regno0, insn);
18105
18106 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18107 {
18108 /* If there is no non AGU operand definition, no AGU
18109 operand usage and split cost is 0 then both lea
18110 and non lea variants have same priority. Currently
18111 we prefer lea for 64 bit code and non lea on 32 bit
18112 code. */
18113 if (dist_use < 0 && split_cost == 0)
18114 return TARGET_64BIT || IX86_LEA_PRIORITY;
18115 else
18116 return true;
18117 }
18118
18119 /* With longer definitions distance lea is more preferable.
18120 Here we change it to take into account splitting cost and
18121 lea priority. */
18122 dist_define += split_cost + IX86_LEA_PRIORITY;
18123
18124 /* If there is no use in memory addess then we just check
18125 that split cost exceeds AGU stall. */
18126 if (dist_use < 0)
18127 return dist_define > LEA_MAX_STALL;
18128
18129 /* If this insn has both backward non-agu dependence and forward
18130 agu dependence, the one with short distance takes effect. */
18131 return dist_define >= dist_use;
18132 }
18133
18134 /* Return true if it is legal to clobber flags by INSN and
18135 false otherwise. */
18136
18137 static bool
18138 ix86_ok_to_clobber_flags (rtx insn)
18139 {
18140 basic_block bb = BLOCK_FOR_INSN (insn);
18141 df_ref *use;
18142 bitmap live;
18143
18144 while (insn)
18145 {
18146 if (NONDEBUG_INSN_P (insn))
18147 {
18148 for (use = DF_INSN_USES (insn); *use; use++)
18149 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18150 return false;
18151
18152 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18153 return true;
18154 }
18155
18156 if (insn == BB_END (bb))
18157 break;
18158
18159 insn = NEXT_INSN (insn);
18160 }
18161
18162 live = df_get_live_out(bb);
18163 return !REGNO_REG_SET_P (live, FLAGS_REG);
18164 }
18165
18166 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18167 move and add to avoid AGU stalls. */
18168
18169 bool
18170 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18171 {
18172 unsigned int regno0, regno1, regno2;
18173
18174 /* Check if we need to optimize. */
18175 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18176 return false;
18177
18178 /* Check it is correct to split here. */
18179 if (!ix86_ok_to_clobber_flags(insn))
18180 return false;
18181
18182 regno0 = true_regnum (operands[0]);
18183 regno1 = true_regnum (operands[1]);
18184 regno2 = true_regnum (operands[2]);
18185
18186 /* We need to split only adds with non destructive
18187 destination operand. */
18188 if (regno0 == regno1 || regno0 == regno2)
18189 return false;
18190 else
18191 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18192 }
18193
18194 /* Return true if we should emit lea instruction instead of mov
18195 instruction. */
18196
18197 bool
18198 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18199 {
18200 unsigned int regno0, regno1;
18201
18202 /* Check if we need to optimize. */
18203 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18204 return false;
18205
18206 /* Use lea for reg to reg moves only. */
18207 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18208 return false;
18209
18210 regno0 = true_regnum (operands[0]);
18211 regno1 = true_regnum (operands[1]);
18212
18213 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18214 }
18215
18216 /* Return true if we need to split lea into a sequence of
18217 instructions to avoid AGU stalls. */
18218
18219 bool
18220 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18221 {
18222 unsigned int regno0, regno1, regno2;
18223 int split_cost;
18224 struct ix86_address parts;
18225 int ok;
18226
18227 /* Check we need to optimize. */
18228 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18229 return false;
18230
18231 /* The "at least two components" test below might not catch simple
18232 move or zero extension insns if parts.base is non-NULL and parts.disp
18233 is const0_rtx as the only components in the address, e.g. if the
18234 register is %rbp or %r13. As this test is much cheaper and moves or
18235 zero extensions are the common case, do this check first. */
18236 if (REG_P (operands[1])
18237 || (SImode_address_operand (operands[1], VOIDmode)
18238 && REG_P (XEXP (operands[1], 0))))
18239 return false;
18240
18241 /* Check if it is OK to split here. */
18242 if (!ix86_ok_to_clobber_flags (insn))
18243 return false;
18244
18245 ok = ix86_decompose_address (operands[1], &parts);
18246 gcc_assert (ok);
18247
18248 /* There should be at least two components in the address. */
18249 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18250 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18251 return false;
18252
18253 /* We should not split into add if non legitimate pic
18254 operand is used as displacement. */
18255 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18256 return false;
18257
18258 regno0 = true_regnum (operands[0]) ;
18259 regno1 = INVALID_REGNUM;
18260 regno2 = INVALID_REGNUM;
18261
18262 if (parts.base)
18263 regno1 = true_regnum (parts.base);
18264 if (parts.index)
18265 regno2 = true_regnum (parts.index);
18266
18267 split_cost = 0;
18268
18269 /* Compute how many cycles we will add to execution time
18270 if split lea into a sequence of instructions. */
18271 if (parts.base || parts.index)
18272 {
18273 /* Have to use mov instruction if non desctructive
18274 destination form is used. */
18275 if (regno1 != regno0 && regno2 != regno0)
18276 split_cost += 1;
18277
18278 /* Have to add index to base if both exist. */
18279 if (parts.base && parts.index)
18280 split_cost += 1;
18281
18282 /* Have to use shift and adds if scale is 2 or greater. */
18283 if (parts.scale > 1)
18284 {
18285 if (regno0 != regno1)
18286 split_cost += 1;
18287 else if (regno2 == regno0)
18288 split_cost += 4;
18289 else
18290 split_cost += parts.scale;
18291 }
18292
18293 /* Have to use add instruction with immediate if
18294 disp is non zero. */
18295 if (parts.disp && parts.disp != const0_rtx)
18296 split_cost += 1;
18297
18298 /* Subtract the price of lea. */
18299 split_cost -= 1;
18300 }
18301
18302 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18303 parts.scale > 1);
18304 }
18305
18306 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18307 matches destination. RTX includes clobber of FLAGS_REG. */
18308
18309 static void
18310 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18311 rtx dst, rtx src)
18312 {
18313 rtx op, clob;
18314
18315 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18316 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18317
18318 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18319 }
18320
18321 /* Return true if regno1 def is nearest to the insn. */
18322
18323 static bool
18324 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18325 {
18326 rtx prev = insn;
18327 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18328
18329 if (insn == start)
18330 return false;
18331 while (prev && prev != start)
18332 {
18333 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18334 {
18335 prev = PREV_INSN (prev);
18336 continue;
18337 }
18338 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18339 return true;
18340 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18341 return false;
18342 prev = PREV_INSN (prev);
18343 }
18344
18345 /* None of the regs is defined in the bb. */
18346 return false;
18347 }
18348
18349 /* Split lea instructions into a sequence of instructions
18350 which are executed on ALU to avoid AGU stalls.
18351 It is assumed that it is allowed to clobber flags register
18352 at lea position. */
18353
18354 void
18355 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18356 {
18357 unsigned int regno0, regno1, regno2;
18358 struct ix86_address parts;
18359 rtx target, tmp;
18360 int ok, adds;
18361
18362 ok = ix86_decompose_address (operands[1], &parts);
18363 gcc_assert (ok);
18364
18365 target = gen_lowpart (mode, operands[0]);
18366
18367 regno0 = true_regnum (target);
18368 regno1 = INVALID_REGNUM;
18369 regno2 = INVALID_REGNUM;
18370
18371 if (parts.base)
18372 {
18373 parts.base = gen_lowpart (mode, parts.base);
18374 regno1 = true_regnum (parts.base);
18375 }
18376
18377 if (parts.index)
18378 {
18379 parts.index = gen_lowpart (mode, parts.index);
18380 regno2 = true_regnum (parts.index);
18381 }
18382
18383 if (parts.disp)
18384 parts.disp = gen_lowpart (mode, parts.disp);
18385
18386 if (parts.scale > 1)
18387 {
18388 /* Case r1 = r1 + ... */
18389 if (regno1 == regno0)
18390 {
18391 /* If we have a case r1 = r1 + C * r2 then we
18392 should use multiplication which is very
18393 expensive. Assume cost model is wrong if we
18394 have such case here. */
18395 gcc_assert (regno2 != regno0);
18396
18397 for (adds = parts.scale; adds > 0; adds--)
18398 ix86_emit_binop (PLUS, mode, target, parts.index);
18399 }
18400 else
18401 {
18402 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18403 if (regno0 != regno2)
18404 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18405
18406 /* Use shift for scaling. */
18407 ix86_emit_binop (ASHIFT, mode, target,
18408 GEN_INT (exact_log2 (parts.scale)));
18409
18410 if (parts.base)
18411 ix86_emit_binop (PLUS, mode, target, parts.base);
18412
18413 if (parts.disp && parts.disp != const0_rtx)
18414 ix86_emit_binop (PLUS, mode, target, parts.disp);
18415 }
18416 }
18417 else if (!parts.base && !parts.index)
18418 {
18419 gcc_assert(parts.disp);
18420 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18421 }
18422 else
18423 {
18424 if (!parts.base)
18425 {
18426 if (regno0 != regno2)
18427 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18428 }
18429 else if (!parts.index)
18430 {
18431 if (regno0 != regno1)
18432 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18433 }
18434 else
18435 {
18436 if (regno0 == regno1)
18437 tmp = parts.index;
18438 else if (regno0 == regno2)
18439 tmp = parts.base;
18440 else
18441 {
18442 rtx tmp1;
18443
18444 /* Find better operand for SET instruction, depending
18445 on which definition is farther from the insn. */
18446 if (find_nearest_reg_def (insn, regno1, regno2))
18447 tmp = parts.index, tmp1 = parts.base;
18448 else
18449 tmp = parts.base, tmp1 = parts.index;
18450
18451 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18452
18453 if (parts.disp && parts.disp != const0_rtx)
18454 ix86_emit_binop (PLUS, mode, target, parts.disp);
18455
18456 ix86_emit_binop (PLUS, mode, target, tmp1);
18457 return;
18458 }
18459
18460 ix86_emit_binop (PLUS, mode, target, tmp);
18461 }
18462
18463 if (parts.disp && parts.disp != const0_rtx)
18464 ix86_emit_binop (PLUS, mode, target, parts.disp);
18465 }
18466 }
18467
18468 /* Return true if it is ok to optimize an ADD operation to LEA
18469 operation to avoid flag register consumation. For most processors,
18470 ADD is faster than LEA. For the processors like BONNELL, if the
18471 destination register of LEA holds an actual address which will be
18472 used soon, LEA is better and otherwise ADD is better. */
18473
18474 bool
18475 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18476 {
18477 unsigned int regno0 = true_regnum (operands[0]);
18478 unsigned int regno1 = true_regnum (operands[1]);
18479 unsigned int regno2 = true_regnum (operands[2]);
18480
18481 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18482 if (regno0 != regno1 && regno0 != regno2)
18483 return true;
18484
18485 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18486 return false;
18487
18488 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18489 }
18490
18491 /* Return true if destination reg of SET_BODY is shift count of
18492 USE_BODY. */
18493
18494 static bool
18495 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18496 {
18497 rtx set_dest;
18498 rtx shift_rtx;
18499 int i;
18500
18501 /* Retrieve destination of SET_BODY. */
18502 switch (GET_CODE (set_body))
18503 {
18504 case SET:
18505 set_dest = SET_DEST (set_body);
18506 if (!set_dest || !REG_P (set_dest))
18507 return false;
18508 break;
18509 case PARALLEL:
18510 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18511 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18512 use_body))
18513 return true;
18514 default:
18515 return false;
18516 break;
18517 }
18518
18519 /* Retrieve shift count of USE_BODY. */
18520 switch (GET_CODE (use_body))
18521 {
18522 case SET:
18523 shift_rtx = XEXP (use_body, 1);
18524 break;
18525 case PARALLEL:
18526 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18527 if (ix86_dep_by_shift_count_body (set_body,
18528 XVECEXP (use_body, 0, i)))
18529 return true;
18530 default:
18531 return false;
18532 break;
18533 }
18534
18535 if (shift_rtx
18536 && (GET_CODE (shift_rtx) == ASHIFT
18537 || GET_CODE (shift_rtx) == LSHIFTRT
18538 || GET_CODE (shift_rtx) == ASHIFTRT
18539 || GET_CODE (shift_rtx) == ROTATE
18540 || GET_CODE (shift_rtx) == ROTATERT))
18541 {
18542 rtx shift_count = XEXP (shift_rtx, 1);
18543
18544 /* Return true if shift count is dest of SET_BODY. */
18545 if (REG_P (shift_count))
18546 {
18547 /* Add check since it can be invoked before register
18548 allocation in pre-reload schedule. */
18549 if (reload_completed
18550 && true_regnum (set_dest) == true_regnum (shift_count))
18551 return true;
18552 else if (REGNO(set_dest) == REGNO(shift_count))
18553 return true;
18554 }
18555 }
18556
18557 return false;
18558 }
18559
18560 /* Return true if destination reg of SET_INSN is shift count of
18561 USE_INSN. */
18562
18563 bool
18564 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18565 {
18566 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18567 PATTERN (use_insn));
18568 }
18569
18570 /* Return TRUE or FALSE depending on whether the unary operator meets the
18571 appropriate constraints. */
18572
18573 bool
18574 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18575 enum machine_mode mode ATTRIBUTE_UNUSED,
18576 rtx operands[2])
18577 {
18578 /* If one of operands is memory, source and destination must match. */
18579 if ((MEM_P (operands[0])
18580 || MEM_P (operands[1]))
18581 && ! rtx_equal_p (operands[0], operands[1]))
18582 return false;
18583 return true;
18584 }
18585
18586 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18587 are ok, keeping in mind the possible movddup alternative. */
18588
18589 bool
18590 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18591 {
18592 if (MEM_P (operands[0]))
18593 return rtx_equal_p (operands[0], operands[1 + high]);
18594 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18595 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18596 return true;
18597 }
18598
18599 /* Post-reload splitter for converting an SF or DFmode value in an
18600 SSE register into an unsigned SImode. */
18601
18602 void
18603 ix86_split_convert_uns_si_sse (rtx operands[])
18604 {
18605 enum machine_mode vecmode;
18606 rtx value, large, zero_or_two31, input, two31, x;
18607
18608 large = operands[1];
18609 zero_or_two31 = operands[2];
18610 input = operands[3];
18611 two31 = operands[4];
18612 vecmode = GET_MODE (large);
18613 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18614
18615 /* Load up the value into the low element. We must ensure that the other
18616 elements are valid floats -- zero is the easiest such value. */
18617 if (MEM_P (input))
18618 {
18619 if (vecmode == V4SFmode)
18620 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18621 else
18622 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18623 }
18624 else
18625 {
18626 input = gen_rtx_REG (vecmode, REGNO (input));
18627 emit_move_insn (value, CONST0_RTX (vecmode));
18628 if (vecmode == V4SFmode)
18629 emit_insn (gen_sse_movss (value, value, input));
18630 else
18631 emit_insn (gen_sse2_movsd (value, value, input));
18632 }
18633
18634 emit_move_insn (large, two31);
18635 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18636
18637 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18638 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18639
18640 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18641 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18642
18643 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18644 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18645
18646 large = gen_rtx_REG (V4SImode, REGNO (large));
18647 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18648
18649 x = gen_rtx_REG (V4SImode, REGNO (value));
18650 if (vecmode == V4SFmode)
18651 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18652 else
18653 emit_insn (gen_sse2_cvttpd2dq (x, value));
18654 value = x;
18655
18656 emit_insn (gen_xorv4si3 (value, value, large));
18657 }
18658
18659 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18660 Expects the 64-bit DImode to be supplied in a pair of integral
18661 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18662 -mfpmath=sse, !optimize_size only. */
18663
18664 void
18665 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18666 {
18667 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18668 rtx int_xmm, fp_xmm;
18669 rtx biases, exponents;
18670 rtx x;
18671
18672 int_xmm = gen_reg_rtx (V4SImode);
18673 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18674 emit_insn (gen_movdi_to_sse (int_xmm, input));
18675 else if (TARGET_SSE_SPLIT_REGS)
18676 {
18677 emit_clobber (int_xmm);
18678 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18679 }
18680 else
18681 {
18682 x = gen_reg_rtx (V2DImode);
18683 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18684 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18685 }
18686
18687 x = gen_rtx_CONST_VECTOR (V4SImode,
18688 gen_rtvec (4, GEN_INT (0x43300000UL),
18689 GEN_INT (0x45300000UL),
18690 const0_rtx, const0_rtx));
18691 exponents = validize_mem (force_const_mem (V4SImode, x));
18692
18693 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18694 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18695
18696 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18697 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18698 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18699 (0x1.0p84 + double(fp_value_hi_xmm)).
18700 Note these exponents differ by 32. */
18701
18702 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18703
18704 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18705 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18706 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18707 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18708 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18709 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18710 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18711 biases = validize_mem (force_const_mem (V2DFmode, biases));
18712 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18713
18714 /* Add the upper and lower DFmode values together. */
18715 if (TARGET_SSE3)
18716 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18717 else
18718 {
18719 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18720 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18721 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18722 }
18723
18724 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18725 }
18726
18727 /* Not used, but eases macroization of patterns. */
18728 void
18729 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18730 rtx input ATTRIBUTE_UNUSED)
18731 {
18732 gcc_unreachable ();
18733 }
18734
18735 /* Convert an unsigned SImode value into a DFmode. Only currently used
18736 for SSE, but applicable anywhere. */
18737
18738 void
18739 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18740 {
18741 REAL_VALUE_TYPE TWO31r;
18742 rtx x, fp;
18743
18744 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18745 NULL, 1, OPTAB_DIRECT);
18746
18747 fp = gen_reg_rtx (DFmode);
18748 emit_insn (gen_floatsidf2 (fp, x));
18749
18750 real_ldexp (&TWO31r, &dconst1, 31);
18751 x = const_double_from_real_value (TWO31r, DFmode);
18752
18753 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18754 if (x != target)
18755 emit_move_insn (target, x);
18756 }
18757
18758 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18759 32-bit mode; otherwise we have a direct convert instruction. */
18760
18761 void
18762 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18763 {
18764 REAL_VALUE_TYPE TWO32r;
18765 rtx fp_lo, fp_hi, x;
18766
18767 fp_lo = gen_reg_rtx (DFmode);
18768 fp_hi = gen_reg_rtx (DFmode);
18769
18770 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18771
18772 real_ldexp (&TWO32r, &dconst1, 32);
18773 x = const_double_from_real_value (TWO32r, DFmode);
18774 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18775
18776 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18777
18778 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18779 0, OPTAB_DIRECT);
18780 if (x != target)
18781 emit_move_insn (target, x);
18782 }
18783
18784 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18785 For x86_32, -mfpmath=sse, !optimize_size only. */
18786 void
18787 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18788 {
18789 REAL_VALUE_TYPE ONE16r;
18790 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18791
18792 real_ldexp (&ONE16r, &dconst1, 16);
18793 x = const_double_from_real_value (ONE16r, SFmode);
18794 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18795 NULL, 0, OPTAB_DIRECT);
18796 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18797 NULL, 0, OPTAB_DIRECT);
18798 fp_hi = gen_reg_rtx (SFmode);
18799 fp_lo = gen_reg_rtx (SFmode);
18800 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18801 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18802 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18803 0, OPTAB_DIRECT);
18804 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18805 0, OPTAB_DIRECT);
18806 if (!rtx_equal_p (target, fp_hi))
18807 emit_move_insn (target, fp_hi);
18808 }
18809
18810 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18811 a vector of unsigned ints VAL to vector of floats TARGET. */
18812
18813 void
18814 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18815 {
18816 rtx tmp[8];
18817 REAL_VALUE_TYPE TWO16r;
18818 enum machine_mode intmode = GET_MODE (val);
18819 enum machine_mode fltmode = GET_MODE (target);
18820 rtx (*cvt) (rtx, rtx);
18821
18822 if (intmode == V4SImode)
18823 cvt = gen_floatv4siv4sf2;
18824 else
18825 cvt = gen_floatv8siv8sf2;
18826 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18827 tmp[0] = force_reg (intmode, tmp[0]);
18828 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18829 OPTAB_DIRECT);
18830 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18831 NULL_RTX, 1, OPTAB_DIRECT);
18832 tmp[3] = gen_reg_rtx (fltmode);
18833 emit_insn (cvt (tmp[3], tmp[1]));
18834 tmp[4] = gen_reg_rtx (fltmode);
18835 emit_insn (cvt (tmp[4], tmp[2]));
18836 real_ldexp (&TWO16r, &dconst1, 16);
18837 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18838 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18839 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18840 OPTAB_DIRECT);
18841 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18842 OPTAB_DIRECT);
18843 if (tmp[7] != target)
18844 emit_move_insn (target, tmp[7]);
18845 }
18846
18847 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18848 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18849 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18850 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18851
18852 rtx
18853 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18854 {
18855 REAL_VALUE_TYPE TWO31r;
18856 rtx two31r, tmp[4];
18857 enum machine_mode mode = GET_MODE (val);
18858 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18859 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18860 rtx (*cmp) (rtx, rtx, rtx, rtx);
18861 int i;
18862
18863 for (i = 0; i < 3; i++)
18864 tmp[i] = gen_reg_rtx (mode);
18865 real_ldexp (&TWO31r, &dconst1, 31);
18866 two31r = const_double_from_real_value (TWO31r, scalarmode);
18867 two31r = ix86_build_const_vector (mode, 1, two31r);
18868 two31r = force_reg (mode, two31r);
18869 switch (mode)
18870 {
18871 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18872 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18873 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18874 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18875 default: gcc_unreachable ();
18876 }
18877 tmp[3] = gen_rtx_LE (mode, two31r, val);
18878 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18879 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18880 0, OPTAB_DIRECT);
18881 if (intmode == V4SImode || TARGET_AVX2)
18882 *xorp = expand_simple_binop (intmode, ASHIFT,
18883 gen_lowpart (intmode, tmp[0]),
18884 GEN_INT (31), NULL_RTX, 0,
18885 OPTAB_DIRECT);
18886 else
18887 {
18888 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18889 two31 = ix86_build_const_vector (intmode, 1, two31);
18890 *xorp = expand_simple_binop (intmode, AND,
18891 gen_lowpart (intmode, tmp[0]),
18892 two31, NULL_RTX, 0,
18893 OPTAB_DIRECT);
18894 }
18895 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18896 0, OPTAB_DIRECT);
18897 }
18898
18899 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18900 then replicate the value for all elements of the vector
18901 register. */
18902
18903 rtx
18904 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18905 {
18906 int i, n_elt;
18907 rtvec v;
18908 enum machine_mode scalar_mode;
18909
18910 switch (mode)
18911 {
18912 case V64QImode:
18913 case V32QImode:
18914 case V16QImode:
18915 case V32HImode:
18916 case V16HImode:
18917 case V8HImode:
18918 case V16SImode:
18919 case V8SImode:
18920 case V4SImode:
18921 case V8DImode:
18922 case V4DImode:
18923 case V2DImode:
18924 gcc_assert (vect);
18925 case V16SFmode:
18926 case V8SFmode:
18927 case V4SFmode:
18928 case V8DFmode:
18929 case V4DFmode:
18930 case V2DFmode:
18931 n_elt = GET_MODE_NUNITS (mode);
18932 v = rtvec_alloc (n_elt);
18933 scalar_mode = GET_MODE_INNER (mode);
18934
18935 RTVEC_ELT (v, 0) = value;
18936
18937 for (i = 1; i < n_elt; ++i)
18938 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18939
18940 return gen_rtx_CONST_VECTOR (mode, v);
18941
18942 default:
18943 gcc_unreachable ();
18944 }
18945 }
18946
18947 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18948 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18949 for an SSE register. If VECT is true, then replicate the mask for
18950 all elements of the vector register. If INVERT is true, then create
18951 a mask excluding the sign bit. */
18952
18953 rtx
18954 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18955 {
18956 enum machine_mode vec_mode, imode;
18957 HOST_WIDE_INT hi, lo;
18958 int shift = 63;
18959 rtx v;
18960 rtx mask;
18961
18962 /* Find the sign bit, sign extended to 2*HWI. */
18963 switch (mode)
18964 {
18965 case V16SImode:
18966 case V16SFmode:
18967 case V8SImode:
18968 case V4SImode:
18969 case V8SFmode:
18970 case V4SFmode:
18971 vec_mode = mode;
18972 mode = GET_MODE_INNER (mode);
18973 imode = SImode;
18974 lo = 0x80000000, hi = lo < 0;
18975 break;
18976
18977 case V8DImode:
18978 case V4DImode:
18979 case V2DImode:
18980 case V8DFmode:
18981 case V4DFmode:
18982 case V2DFmode:
18983 vec_mode = mode;
18984 mode = GET_MODE_INNER (mode);
18985 imode = DImode;
18986 if (HOST_BITS_PER_WIDE_INT >= 64)
18987 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18988 else
18989 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18990 break;
18991
18992 case TImode:
18993 case TFmode:
18994 vec_mode = VOIDmode;
18995 if (HOST_BITS_PER_WIDE_INT >= 64)
18996 {
18997 imode = TImode;
18998 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18999 }
19000 else
19001 {
19002 rtvec vec;
19003
19004 imode = DImode;
19005 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19006
19007 if (invert)
19008 {
19009 lo = ~lo, hi = ~hi;
19010 v = constm1_rtx;
19011 }
19012 else
19013 v = const0_rtx;
19014
19015 mask = immed_double_const (lo, hi, imode);
19016
19017 vec = gen_rtvec (2, v, mask);
19018 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19019 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19020
19021 return v;
19022 }
19023 break;
19024
19025 default:
19026 gcc_unreachable ();
19027 }
19028
19029 if (invert)
19030 lo = ~lo, hi = ~hi;
19031
19032 /* Force this value into the low part of a fp vector constant. */
19033 mask = immed_double_const (lo, hi, imode);
19034 mask = gen_lowpart (mode, mask);
19035
19036 if (vec_mode == VOIDmode)
19037 return force_reg (mode, mask);
19038
19039 v = ix86_build_const_vector (vec_mode, vect, mask);
19040 return force_reg (vec_mode, v);
19041 }
19042
19043 /* Generate code for floating point ABS or NEG. */
19044
19045 void
19046 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19047 rtx operands[])
19048 {
19049 rtx mask, set, dst, src;
19050 bool use_sse = false;
19051 bool vector_mode = VECTOR_MODE_P (mode);
19052 enum machine_mode vmode = mode;
19053
19054 if (vector_mode)
19055 use_sse = true;
19056 else if (mode == TFmode)
19057 use_sse = true;
19058 else if (TARGET_SSE_MATH)
19059 {
19060 use_sse = SSE_FLOAT_MODE_P (mode);
19061 if (mode == SFmode)
19062 vmode = V4SFmode;
19063 else if (mode == DFmode)
19064 vmode = V2DFmode;
19065 }
19066
19067 /* NEG and ABS performed with SSE use bitwise mask operations.
19068 Create the appropriate mask now. */
19069 if (use_sse)
19070 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19071 else
19072 mask = NULL_RTX;
19073
19074 dst = operands[0];
19075 src = operands[1];
19076
19077 set = gen_rtx_fmt_e (code, mode, src);
19078 set = gen_rtx_SET (VOIDmode, dst, set);
19079
19080 if (mask)
19081 {
19082 rtx use, clob;
19083 rtvec par;
19084
19085 use = gen_rtx_USE (VOIDmode, mask);
19086 if (vector_mode)
19087 par = gen_rtvec (2, set, use);
19088 else
19089 {
19090 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19091 par = gen_rtvec (3, set, use, clob);
19092 }
19093 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19094 }
19095 else
19096 emit_insn (set);
19097 }
19098
19099 /* Expand a copysign operation. Special case operand 0 being a constant. */
19100
19101 void
19102 ix86_expand_copysign (rtx operands[])
19103 {
19104 enum machine_mode mode, vmode;
19105 rtx dest, op0, op1, mask, nmask;
19106
19107 dest = operands[0];
19108 op0 = operands[1];
19109 op1 = operands[2];
19110
19111 mode = GET_MODE (dest);
19112
19113 if (mode == SFmode)
19114 vmode = V4SFmode;
19115 else if (mode == DFmode)
19116 vmode = V2DFmode;
19117 else
19118 vmode = mode;
19119
19120 if (GET_CODE (op0) == CONST_DOUBLE)
19121 {
19122 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19123
19124 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19125 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19126
19127 if (mode == SFmode || mode == DFmode)
19128 {
19129 if (op0 == CONST0_RTX (mode))
19130 op0 = CONST0_RTX (vmode);
19131 else
19132 {
19133 rtx v = ix86_build_const_vector (vmode, false, op0);
19134
19135 op0 = force_reg (vmode, v);
19136 }
19137 }
19138 else if (op0 != CONST0_RTX (mode))
19139 op0 = force_reg (mode, op0);
19140
19141 mask = ix86_build_signbit_mask (vmode, 0, 0);
19142
19143 if (mode == SFmode)
19144 copysign_insn = gen_copysignsf3_const;
19145 else if (mode == DFmode)
19146 copysign_insn = gen_copysigndf3_const;
19147 else
19148 copysign_insn = gen_copysigntf3_const;
19149
19150 emit_insn (copysign_insn (dest, op0, op1, mask));
19151 }
19152 else
19153 {
19154 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19155
19156 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19157 mask = ix86_build_signbit_mask (vmode, 0, 0);
19158
19159 if (mode == SFmode)
19160 copysign_insn = gen_copysignsf3_var;
19161 else if (mode == DFmode)
19162 copysign_insn = gen_copysigndf3_var;
19163 else
19164 copysign_insn = gen_copysigntf3_var;
19165
19166 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19167 }
19168 }
19169
19170 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19171 be a constant, and so has already been expanded into a vector constant. */
19172
19173 void
19174 ix86_split_copysign_const (rtx operands[])
19175 {
19176 enum machine_mode mode, vmode;
19177 rtx dest, op0, mask, x;
19178
19179 dest = operands[0];
19180 op0 = operands[1];
19181 mask = operands[3];
19182
19183 mode = GET_MODE (dest);
19184 vmode = GET_MODE (mask);
19185
19186 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19187 x = gen_rtx_AND (vmode, dest, mask);
19188 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19189
19190 if (op0 != CONST0_RTX (vmode))
19191 {
19192 x = gen_rtx_IOR (vmode, dest, op0);
19193 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19194 }
19195 }
19196
19197 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19198 so we have to do two masks. */
19199
19200 void
19201 ix86_split_copysign_var (rtx operands[])
19202 {
19203 enum machine_mode mode, vmode;
19204 rtx dest, scratch, op0, op1, mask, nmask, x;
19205
19206 dest = operands[0];
19207 scratch = operands[1];
19208 op0 = operands[2];
19209 op1 = operands[3];
19210 nmask = operands[4];
19211 mask = operands[5];
19212
19213 mode = GET_MODE (dest);
19214 vmode = GET_MODE (mask);
19215
19216 if (rtx_equal_p (op0, op1))
19217 {
19218 /* Shouldn't happen often (it's useless, obviously), but when it does
19219 we'd generate incorrect code if we continue below. */
19220 emit_move_insn (dest, op0);
19221 return;
19222 }
19223
19224 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19225 {
19226 gcc_assert (REGNO (op1) == REGNO (scratch));
19227
19228 x = gen_rtx_AND (vmode, scratch, mask);
19229 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19230
19231 dest = mask;
19232 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19233 x = gen_rtx_NOT (vmode, dest);
19234 x = gen_rtx_AND (vmode, x, op0);
19235 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19236 }
19237 else
19238 {
19239 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19240 {
19241 x = gen_rtx_AND (vmode, scratch, mask);
19242 }
19243 else /* alternative 2,4 */
19244 {
19245 gcc_assert (REGNO (mask) == REGNO (scratch));
19246 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19247 x = gen_rtx_AND (vmode, scratch, op1);
19248 }
19249 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19250
19251 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19252 {
19253 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19254 x = gen_rtx_AND (vmode, dest, nmask);
19255 }
19256 else /* alternative 3,4 */
19257 {
19258 gcc_assert (REGNO (nmask) == REGNO (dest));
19259 dest = nmask;
19260 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19261 x = gen_rtx_AND (vmode, dest, op0);
19262 }
19263 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19264 }
19265
19266 x = gen_rtx_IOR (vmode, dest, scratch);
19267 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19268 }
19269
19270 /* Return TRUE or FALSE depending on whether the first SET in INSN
19271 has source and destination with matching CC modes, and that the
19272 CC mode is at least as constrained as REQ_MODE. */
19273
19274 bool
19275 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19276 {
19277 rtx set;
19278 enum machine_mode set_mode;
19279
19280 set = PATTERN (insn);
19281 if (GET_CODE (set) == PARALLEL)
19282 set = XVECEXP (set, 0, 0);
19283 gcc_assert (GET_CODE (set) == SET);
19284 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19285
19286 set_mode = GET_MODE (SET_DEST (set));
19287 switch (set_mode)
19288 {
19289 case CCNOmode:
19290 if (req_mode != CCNOmode
19291 && (req_mode != CCmode
19292 || XEXP (SET_SRC (set), 1) != const0_rtx))
19293 return false;
19294 break;
19295 case CCmode:
19296 if (req_mode == CCGCmode)
19297 return false;
19298 /* FALLTHRU */
19299 case CCGCmode:
19300 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19301 return false;
19302 /* FALLTHRU */
19303 case CCGOCmode:
19304 if (req_mode == CCZmode)
19305 return false;
19306 /* FALLTHRU */
19307 case CCZmode:
19308 break;
19309
19310 case CCAmode:
19311 case CCCmode:
19312 case CCOmode:
19313 case CCSmode:
19314 if (set_mode != req_mode)
19315 return false;
19316 break;
19317
19318 default:
19319 gcc_unreachable ();
19320 }
19321
19322 return GET_MODE (SET_SRC (set)) == set_mode;
19323 }
19324
19325 /* Generate insn patterns to do an integer compare of OPERANDS. */
19326
19327 static rtx
19328 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19329 {
19330 enum machine_mode cmpmode;
19331 rtx tmp, flags;
19332
19333 cmpmode = SELECT_CC_MODE (code, op0, op1);
19334 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19335
19336 /* This is very simple, but making the interface the same as in the
19337 FP case makes the rest of the code easier. */
19338 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19339 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19340
19341 /* Return the test that should be put into the flags user, i.e.
19342 the bcc, scc, or cmov instruction. */
19343 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19344 }
19345
19346 /* Figure out whether to use ordered or unordered fp comparisons.
19347 Return the appropriate mode to use. */
19348
19349 enum machine_mode
19350 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19351 {
19352 /* ??? In order to make all comparisons reversible, we do all comparisons
19353 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19354 all forms trapping and nontrapping comparisons, we can make inequality
19355 comparisons trapping again, since it results in better code when using
19356 FCOM based compares. */
19357 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19358 }
19359
19360 enum machine_mode
19361 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19362 {
19363 enum machine_mode mode = GET_MODE (op0);
19364
19365 if (SCALAR_FLOAT_MODE_P (mode))
19366 {
19367 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19368 return ix86_fp_compare_mode (code);
19369 }
19370
19371 switch (code)
19372 {
19373 /* Only zero flag is needed. */
19374 case EQ: /* ZF=0 */
19375 case NE: /* ZF!=0 */
19376 return CCZmode;
19377 /* Codes needing carry flag. */
19378 case GEU: /* CF=0 */
19379 case LTU: /* CF=1 */
19380 /* Detect overflow checks. They need just the carry flag. */
19381 if (GET_CODE (op0) == PLUS
19382 && rtx_equal_p (op1, XEXP (op0, 0)))
19383 return CCCmode;
19384 else
19385 return CCmode;
19386 case GTU: /* CF=0 & ZF=0 */
19387 case LEU: /* CF=1 | ZF=1 */
19388 return CCmode;
19389 /* Codes possibly doable only with sign flag when
19390 comparing against zero. */
19391 case GE: /* SF=OF or SF=0 */
19392 case LT: /* SF<>OF or SF=1 */
19393 if (op1 == const0_rtx)
19394 return CCGOCmode;
19395 else
19396 /* For other cases Carry flag is not required. */
19397 return CCGCmode;
19398 /* Codes doable only with sign flag when comparing
19399 against zero, but we miss jump instruction for it
19400 so we need to use relational tests against overflow
19401 that thus needs to be zero. */
19402 case GT: /* ZF=0 & SF=OF */
19403 case LE: /* ZF=1 | SF<>OF */
19404 if (op1 == const0_rtx)
19405 return CCNOmode;
19406 else
19407 return CCGCmode;
19408 /* strcmp pattern do (use flags) and combine may ask us for proper
19409 mode. */
19410 case USE:
19411 return CCmode;
19412 default:
19413 gcc_unreachable ();
19414 }
19415 }
19416
19417 /* Return the fixed registers used for condition codes. */
19418
19419 static bool
19420 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19421 {
19422 *p1 = FLAGS_REG;
19423 *p2 = FPSR_REG;
19424 return true;
19425 }
19426
19427 /* If two condition code modes are compatible, return a condition code
19428 mode which is compatible with both. Otherwise, return
19429 VOIDmode. */
19430
19431 static enum machine_mode
19432 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19433 {
19434 if (m1 == m2)
19435 return m1;
19436
19437 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19438 return VOIDmode;
19439
19440 if ((m1 == CCGCmode && m2 == CCGOCmode)
19441 || (m1 == CCGOCmode && m2 == CCGCmode))
19442 return CCGCmode;
19443
19444 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19445 return m2;
19446 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19447 return m1;
19448
19449 switch (m1)
19450 {
19451 default:
19452 gcc_unreachable ();
19453
19454 case CCmode:
19455 case CCGCmode:
19456 case CCGOCmode:
19457 case CCNOmode:
19458 case CCAmode:
19459 case CCCmode:
19460 case CCOmode:
19461 case CCSmode:
19462 case CCZmode:
19463 switch (m2)
19464 {
19465 default:
19466 return VOIDmode;
19467
19468 case CCmode:
19469 case CCGCmode:
19470 case CCGOCmode:
19471 case CCNOmode:
19472 case CCAmode:
19473 case CCCmode:
19474 case CCOmode:
19475 case CCSmode:
19476 case CCZmode:
19477 return CCmode;
19478 }
19479
19480 case CCFPmode:
19481 case CCFPUmode:
19482 /* These are only compatible with themselves, which we already
19483 checked above. */
19484 return VOIDmode;
19485 }
19486 }
19487
19488
19489 /* Return a comparison we can do and that it is equivalent to
19490 swap_condition (code) apart possibly from orderedness.
19491 But, never change orderedness if TARGET_IEEE_FP, returning
19492 UNKNOWN in that case if necessary. */
19493
19494 static enum rtx_code
19495 ix86_fp_swap_condition (enum rtx_code code)
19496 {
19497 switch (code)
19498 {
19499 case GT: /* GTU - CF=0 & ZF=0 */
19500 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19501 case GE: /* GEU - CF=0 */
19502 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19503 case UNLT: /* LTU - CF=1 */
19504 return TARGET_IEEE_FP ? UNKNOWN : GT;
19505 case UNLE: /* LEU - CF=1 | ZF=1 */
19506 return TARGET_IEEE_FP ? UNKNOWN : GE;
19507 default:
19508 return swap_condition (code);
19509 }
19510 }
19511
19512 /* Return cost of comparison CODE using the best strategy for performance.
19513 All following functions do use number of instructions as a cost metrics.
19514 In future this should be tweaked to compute bytes for optimize_size and
19515 take into account performance of various instructions on various CPUs. */
19516
19517 static int
19518 ix86_fp_comparison_cost (enum rtx_code code)
19519 {
19520 int arith_cost;
19521
19522 /* The cost of code using bit-twiddling on %ah. */
19523 switch (code)
19524 {
19525 case UNLE:
19526 case UNLT:
19527 case LTGT:
19528 case GT:
19529 case GE:
19530 case UNORDERED:
19531 case ORDERED:
19532 case UNEQ:
19533 arith_cost = 4;
19534 break;
19535 case LT:
19536 case NE:
19537 case EQ:
19538 case UNGE:
19539 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19540 break;
19541 case LE:
19542 case UNGT:
19543 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19544 break;
19545 default:
19546 gcc_unreachable ();
19547 }
19548
19549 switch (ix86_fp_comparison_strategy (code))
19550 {
19551 case IX86_FPCMP_COMI:
19552 return arith_cost > 4 ? 3 : 2;
19553 case IX86_FPCMP_SAHF:
19554 return arith_cost > 4 ? 4 : 3;
19555 default:
19556 return arith_cost;
19557 }
19558 }
19559
19560 /* Return strategy to use for floating-point. We assume that fcomi is always
19561 preferrable where available, since that is also true when looking at size
19562 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19563
19564 enum ix86_fpcmp_strategy
19565 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19566 {
19567 /* Do fcomi/sahf based test when profitable. */
19568
19569 if (TARGET_CMOVE)
19570 return IX86_FPCMP_COMI;
19571
19572 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19573 return IX86_FPCMP_SAHF;
19574
19575 return IX86_FPCMP_ARITH;
19576 }
19577
19578 /* Swap, force into registers, or otherwise massage the two operands
19579 to a fp comparison. The operands are updated in place; the new
19580 comparison code is returned. */
19581
19582 static enum rtx_code
19583 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19584 {
19585 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19586 rtx op0 = *pop0, op1 = *pop1;
19587 enum machine_mode op_mode = GET_MODE (op0);
19588 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19589
19590 /* All of the unordered compare instructions only work on registers.
19591 The same is true of the fcomi compare instructions. The XFmode
19592 compare instructions require registers except when comparing
19593 against zero or when converting operand 1 from fixed point to
19594 floating point. */
19595
19596 if (!is_sse
19597 && (fpcmp_mode == CCFPUmode
19598 || (op_mode == XFmode
19599 && ! (standard_80387_constant_p (op0) == 1
19600 || standard_80387_constant_p (op1) == 1)
19601 && GET_CODE (op1) != FLOAT)
19602 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19603 {
19604 op0 = force_reg (op_mode, op0);
19605 op1 = force_reg (op_mode, op1);
19606 }
19607 else
19608 {
19609 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19610 things around if they appear profitable, otherwise force op0
19611 into a register. */
19612
19613 if (standard_80387_constant_p (op0) == 0
19614 || (MEM_P (op0)
19615 && ! (standard_80387_constant_p (op1) == 0
19616 || MEM_P (op1))))
19617 {
19618 enum rtx_code new_code = ix86_fp_swap_condition (code);
19619 if (new_code != UNKNOWN)
19620 {
19621 rtx tmp;
19622 tmp = op0, op0 = op1, op1 = tmp;
19623 code = new_code;
19624 }
19625 }
19626
19627 if (!REG_P (op0))
19628 op0 = force_reg (op_mode, op0);
19629
19630 if (CONSTANT_P (op1))
19631 {
19632 int tmp = standard_80387_constant_p (op1);
19633 if (tmp == 0)
19634 op1 = validize_mem (force_const_mem (op_mode, op1));
19635 else if (tmp == 1)
19636 {
19637 if (TARGET_CMOVE)
19638 op1 = force_reg (op_mode, op1);
19639 }
19640 else
19641 op1 = force_reg (op_mode, op1);
19642 }
19643 }
19644
19645 /* Try to rearrange the comparison to make it cheaper. */
19646 if (ix86_fp_comparison_cost (code)
19647 > ix86_fp_comparison_cost (swap_condition (code))
19648 && (REG_P (op1) || can_create_pseudo_p ()))
19649 {
19650 rtx tmp;
19651 tmp = op0, op0 = op1, op1 = tmp;
19652 code = swap_condition (code);
19653 if (!REG_P (op0))
19654 op0 = force_reg (op_mode, op0);
19655 }
19656
19657 *pop0 = op0;
19658 *pop1 = op1;
19659 return code;
19660 }
19661
19662 /* Convert comparison codes we use to represent FP comparison to integer
19663 code that will result in proper branch. Return UNKNOWN if no such code
19664 is available. */
19665
19666 enum rtx_code
19667 ix86_fp_compare_code_to_integer (enum rtx_code code)
19668 {
19669 switch (code)
19670 {
19671 case GT:
19672 return GTU;
19673 case GE:
19674 return GEU;
19675 case ORDERED:
19676 case UNORDERED:
19677 return code;
19678 break;
19679 case UNEQ:
19680 return EQ;
19681 break;
19682 case UNLT:
19683 return LTU;
19684 break;
19685 case UNLE:
19686 return LEU;
19687 break;
19688 case LTGT:
19689 return NE;
19690 break;
19691 default:
19692 return UNKNOWN;
19693 }
19694 }
19695
19696 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19697
19698 static rtx
19699 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19700 {
19701 enum machine_mode fpcmp_mode, intcmp_mode;
19702 rtx tmp, tmp2;
19703
19704 fpcmp_mode = ix86_fp_compare_mode (code);
19705 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19706
19707 /* Do fcomi/sahf based test when profitable. */
19708 switch (ix86_fp_comparison_strategy (code))
19709 {
19710 case IX86_FPCMP_COMI:
19711 intcmp_mode = fpcmp_mode;
19712 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19713 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19714 tmp);
19715 emit_insn (tmp);
19716 break;
19717
19718 case IX86_FPCMP_SAHF:
19719 intcmp_mode = fpcmp_mode;
19720 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19721 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19722 tmp);
19723
19724 if (!scratch)
19725 scratch = gen_reg_rtx (HImode);
19726 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19727 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19728 break;
19729
19730 case IX86_FPCMP_ARITH:
19731 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19732 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19733 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19734 if (!scratch)
19735 scratch = gen_reg_rtx (HImode);
19736 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19737
19738 /* In the unordered case, we have to check C2 for NaN's, which
19739 doesn't happen to work out to anything nice combination-wise.
19740 So do some bit twiddling on the value we've got in AH to come
19741 up with an appropriate set of condition codes. */
19742
19743 intcmp_mode = CCNOmode;
19744 switch (code)
19745 {
19746 case GT:
19747 case UNGT:
19748 if (code == GT || !TARGET_IEEE_FP)
19749 {
19750 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19751 code = EQ;
19752 }
19753 else
19754 {
19755 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19756 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19757 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19758 intcmp_mode = CCmode;
19759 code = GEU;
19760 }
19761 break;
19762 case LT:
19763 case UNLT:
19764 if (code == LT && TARGET_IEEE_FP)
19765 {
19766 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19767 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19768 intcmp_mode = CCmode;
19769 code = EQ;
19770 }
19771 else
19772 {
19773 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19774 code = NE;
19775 }
19776 break;
19777 case GE:
19778 case UNGE:
19779 if (code == GE || !TARGET_IEEE_FP)
19780 {
19781 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19782 code = EQ;
19783 }
19784 else
19785 {
19786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19787 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19788 code = NE;
19789 }
19790 break;
19791 case LE:
19792 case UNLE:
19793 if (code == LE && TARGET_IEEE_FP)
19794 {
19795 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19796 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19797 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19798 intcmp_mode = CCmode;
19799 code = LTU;
19800 }
19801 else
19802 {
19803 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19804 code = NE;
19805 }
19806 break;
19807 case EQ:
19808 case UNEQ:
19809 if (code == EQ && TARGET_IEEE_FP)
19810 {
19811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19812 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19813 intcmp_mode = CCmode;
19814 code = EQ;
19815 }
19816 else
19817 {
19818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19819 code = NE;
19820 }
19821 break;
19822 case NE:
19823 case LTGT:
19824 if (code == NE && TARGET_IEEE_FP)
19825 {
19826 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19827 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19828 GEN_INT (0x40)));
19829 code = NE;
19830 }
19831 else
19832 {
19833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19834 code = EQ;
19835 }
19836 break;
19837
19838 case UNORDERED:
19839 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19840 code = NE;
19841 break;
19842 case ORDERED:
19843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19844 code = EQ;
19845 break;
19846
19847 default:
19848 gcc_unreachable ();
19849 }
19850 break;
19851
19852 default:
19853 gcc_unreachable();
19854 }
19855
19856 /* Return the test that should be put into the flags user, i.e.
19857 the bcc, scc, or cmov instruction. */
19858 return gen_rtx_fmt_ee (code, VOIDmode,
19859 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19860 const0_rtx);
19861 }
19862
19863 static rtx
19864 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19865 {
19866 rtx ret;
19867
19868 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19869 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19870
19871 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19872 {
19873 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19874 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19875 }
19876 else
19877 ret = ix86_expand_int_compare (code, op0, op1);
19878
19879 return ret;
19880 }
19881
19882 void
19883 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19884 {
19885 enum machine_mode mode = GET_MODE (op0);
19886 rtx tmp;
19887
19888 switch (mode)
19889 {
19890 case SFmode:
19891 case DFmode:
19892 case XFmode:
19893 case QImode:
19894 case HImode:
19895 case SImode:
19896 simple:
19897 tmp = ix86_expand_compare (code, op0, op1);
19898 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19899 gen_rtx_LABEL_REF (VOIDmode, label),
19900 pc_rtx);
19901 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19902 return;
19903
19904 case DImode:
19905 if (TARGET_64BIT)
19906 goto simple;
19907 case TImode:
19908 /* Expand DImode branch into multiple compare+branch. */
19909 {
19910 rtx lo[2], hi[2], label2;
19911 enum rtx_code code1, code2, code3;
19912 enum machine_mode submode;
19913
19914 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19915 {
19916 tmp = op0, op0 = op1, op1 = tmp;
19917 code = swap_condition (code);
19918 }
19919
19920 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19921 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19922
19923 submode = mode == DImode ? SImode : DImode;
19924
19925 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19926 avoid two branches. This costs one extra insn, so disable when
19927 optimizing for size. */
19928
19929 if ((code == EQ || code == NE)
19930 && (!optimize_insn_for_size_p ()
19931 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19932 {
19933 rtx xor0, xor1;
19934
19935 xor1 = hi[0];
19936 if (hi[1] != const0_rtx)
19937 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19938 NULL_RTX, 0, OPTAB_WIDEN);
19939
19940 xor0 = lo[0];
19941 if (lo[1] != const0_rtx)
19942 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19943 NULL_RTX, 0, OPTAB_WIDEN);
19944
19945 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19946 NULL_RTX, 0, OPTAB_WIDEN);
19947
19948 ix86_expand_branch (code, tmp, const0_rtx, label);
19949 return;
19950 }
19951
19952 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19953 op1 is a constant and the low word is zero, then we can just
19954 examine the high word. Similarly for low word -1 and
19955 less-or-equal-than or greater-than. */
19956
19957 if (CONST_INT_P (hi[1]))
19958 switch (code)
19959 {
19960 case LT: case LTU: case GE: case GEU:
19961 if (lo[1] == const0_rtx)
19962 {
19963 ix86_expand_branch (code, hi[0], hi[1], label);
19964 return;
19965 }
19966 break;
19967 case LE: case LEU: case GT: case GTU:
19968 if (lo[1] == constm1_rtx)
19969 {
19970 ix86_expand_branch (code, hi[0], hi[1], label);
19971 return;
19972 }
19973 break;
19974 default:
19975 break;
19976 }
19977
19978 /* Otherwise, we need two or three jumps. */
19979
19980 label2 = gen_label_rtx ();
19981
19982 code1 = code;
19983 code2 = swap_condition (code);
19984 code3 = unsigned_condition (code);
19985
19986 switch (code)
19987 {
19988 case LT: case GT: case LTU: case GTU:
19989 break;
19990
19991 case LE: code1 = LT; code2 = GT; break;
19992 case GE: code1 = GT; code2 = LT; break;
19993 case LEU: code1 = LTU; code2 = GTU; break;
19994 case GEU: code1 = GTU; code2 = LTU; break;
19995
19996 case EQ: code1 = UNKNOWN; code2 = NE; break;
19997 case NE: code2 = UNKNOWN; break;
19998
19999 default:
20000 gcc_unreachable ();
20001 }
20002
20003 /*
20004 * a < b =>
20005 * if (hi(a) < hi(b)) goto true;
20006 * if (hi(a) > hi(b)) goto false;
20007 * if (lo(a) < lo(b)) goto true;
20008 * false:
20009 */
20010
20011 if (code1 != UNKNOWN)
20012 ix86_expand_branch (code1, hi[0], hi[1], label);
20013 if (code2 != UNKNOWN)
20014 ix86_expand_branch (code2, hi[0], hi[1], label2);
20015
20016 ix86_expand_branch (code3, lo[0], lo[1], label);
20017
20018 if (code2 != UNKNOWN)
20019 emit_label (label2);
20020 return;
20021 }
20022
20023 default:
20024 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20025 goto simple;
20026 }
20027 }
20028
20029 /* Split branch based on floating point condition. */
20030 void
20031 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20032 rtx target1, rtx target2, rtx tmp)
20033 {
20034 rtx condition;
20035 rtx i;
20036
20037 if (target2 != pc_rtx)
20038 {
20039 rtx tmp = target2;
20040 code = reverse_condition_maybe_unordered (code);
20041 target2 = target1;
20042 target1 = tmp;
20043 }
20044
20045 condition = ix86_expand_fp_compare (code, op1, op2,
20046 tmp);
20047
20048 i = emit_jump_insn (gen_rtx_SET
20049 (VOIDmode, pc_rtx,
20050 gen_rtx_IF_THEN_ELSE (VOIDmode,
20051 condition, target1, target2)));
20052 if (split_branch_probability >= 0)
20053 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20054 }
20055
20056 void
20057 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20058 {
20059 rtx ret;
20060
20061 gcc_assert (GET_MODE (dest) == QImode);
20062
20063 ret = ix86_expand_compare (code, op0, op1);
20064 PUT_MODE (ret, QImode);
20065 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20066 }
20067
20068 /* Expand comparison setting or clearing carry flag. Return true when
20069 successful and set pop for the operation. */
20070 static bool
20071 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20072 {
20073 enum machine_mode mode =
20074 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20075
20076 /* Do not handle double-mode compares that go through special path. */
20077 if (mode == (TARGET_64BIT ? TImode : DImode))
20078 return false;
20079
20080 if (SCALAR_FLOAT_MODE_P (mode))
20081 {
20082 rtx compare_op, compare_seq;
20083
20084 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20085
20086 /* Shortcut: following common codes never translate
20087 into carry flag compares. */
20088 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20089 || code == ORDERED || code == UNORDERED)
20090 return false;
20091
20092 /* These comparisons require zero flag; swap operands so they won't. */
20093 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20094 && !TARGET_IEEE_FP)
20095 {
20096 rtx tmp = op0;
20097 op0 = op1;
20098 op1 = tmp;
20099 code = swap_condition (code);
20100 }
20101
20102 /* Try to expand the comparison and verify that we end up with
20103 carry flag based comparison. This fails to be true only when
20104 we decide to expand comparison using arithmetic that is not
20105 too common scenario. */
20106 start_sequence ();
20107 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20108 compare_seq = get_insns ();
20109 end_sequence ();
20110
20111 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20112 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20113 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20114 else
20115 code = GET_CODE (compare_op);
20116
20117 if (code != LTU && code != GEU)
20118 return false;
20119
20120 emit_insn (compare_seq);
20121 *pop = compare_op;
20122 return true;
20123 }
20124
20125 if (!INTEGRAL_MODE_P (mode))
20126 return false;
20127
20128 switch (code)
20129 {
20130 case LTU:
20131 case GEU:
20132 break;
20133
20134 /* Convert a==0 into (unsigned)a<1. */
20135 case EQ:
20136 case NE:
20137 if (op1 != const0_rtx)
20138 return false;
20139 op1 = const1_rtx;
20140 code = (code == EQ ? LTU : GEU);
20141 break;
20142
20143 /* Convert a>b into b<a or a>=b-1. */
20144 case GTU:
20145 case LEU:
20146 if (CONST_INT_P (op1))
20147 {
20148 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20149 /* Bail out on overflow. We still can swap operands but that
20150 would force loading of the constant into register. */
20151 if (op1 == const0_rtx
20152 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20153 return false;
20154 code = (code == GTU ? GEU : LTU);
20155 }
20156 else
20157 {
20158 rtx tmp = op1;
20159 op1 = op0;
20160 op0 = tmp;
20161 code = (code == GTU ? LTU : GEU);
20162 }
20163 break;
20164
20165 /* Convert a>=0 into (unsigned)a<0x80000000. */
20166 case LT:
20167 case GE:
20168 if (mode == DImode || op1 != const0_rtx)
20169 return false;
20170 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20171 code = (code == LT ? GEU : LTU);
20172 break;
20173 case LE:
20174 case GT:
20175 if (mode == DImode || op1 != constm1_rtx)
20176 return false;
20177 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20178 code = (code == LE ? GEU : LTU);
20179 break;
20180
20181 default:
20182 return false;
20183 }
20184 /* Swapping operands may cause constant to appear as first operand. */
20185 if (!nonimmediate_operand (op0, VOIDmode))
20186 {
20187 if (!can_create_pseudo_p ())
20188 return false;
20189 op0 = force_reg (mode, op0);
20190 }
20191 *pop = ix86_expand_compare (code, op0, op1);
20192 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20193 return true;
20194 }
20195
20196 bool
20197 ix86_expand_int_movcc (rtx operands[])
20198 {
20199 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20200 rtx compare_seq, compare_op;
20201 enum machine_mode mode = GET_MODE (operands[0]);
20202 bool sign_bit_compare_p = false;
20203 rtx op0 = XEXP (operands[1], 0);
20204 rtx op1 = XEXP (operands[1], 1);
20205
20206 if (GET_MODE (op0) == TImode
20207 || (GET_MODE (op0) == DImode
20208 && !TARGET_64BIT))
20209 return false;
20210
20211 start_sequence ();
20212 compare_op = ix86_expand_compare (code, op0, op1);
20213 compare_seq = get_insns ();
20214 end_sequence ();
20215
20216 compare_code = GET_CODE (compare_op);
20217
20218 if ((op1 == const0_rtx && (code == GE || code == LT))
20219 || (op1 == constm1_rtx && (code == GT || code == LE)))
20220 sign_bit_compare_p = true;
20221
20222 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20223 HImode insns, we'd be swallowed in word prefix ops. */
20224
20225 if ((mode != HImode || TARGET_FAST_PREFIX)
20226 && (mode != (TARGET_64BIT ? TImode : DImode))
20227 && CONST_INT_P (operands[2])
20228 && CONST_INT_P (operands[3]))
20229 {
20230 rtx out = operands[0];
20231 HOST_WIDE_INT ct = INTVAL (operands[2]);
20232 HOST_WIDE_INT cf = INTVAL (operands[3]);
20233 HOST_WIDE_INT diff;
20234
20235 diff = ct - cf;
20236 /* Sign bit compares are better done using shifts than we do by using
20237 sbb. */
20238 if (sign_bit_compare_p
20239 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20240 {
20241 /* Detect overlap between destination and compare sources. */
20242 rtx tmp = out;
20243
20244 if (!sign_bit_compare_p)
20245 {
20246 rtx flags;
20247 bool fpcmp = false;
20248
20249 compare_code = GET_CODE (compare_op);
20250
20251 flags = XEXP (compare_op, 0);
20252
20253 if (GET_MODE (flags) == CCFPmode
20254 || GET_MODE (flags) == CCFPUmode)
20255 {
20256 fpcmp = true;
20257 compare_code
20258 = ix86_fp_compare_code_to_integer (compare_code);
20259 }
20260
20261 /* To simplify rest of code, restrict to the GEU case. */
20262 if (compare_code == LTU)
20263 {
20264 HOST_WIDE_INT tmp = ct;
20265 ct = cf;
20266 cf = tmp;
20267 compare_code = reverse_condition (compare_code);
20268 code = reverse_condition (code);
20269 }
20270 else
20271 {
20272 if (fpcmp)
20273 PUT_CODE (compare_op,
20274 reverse_condition_maybe_unordered
20275 (GET_CODE (compare_op)));
20276 else
20277 PUT_CODE (compare_op,
20278 reverse_condition (GET_CODE (compare_op)));
20279 }
20280 diff = ct - cf;
20281
20282 if (reg_overlap_mentioned_p (out, op0)
20283 || reg_overlap_mentioned_p (out, op1))
20284 tmp = gen_reg_rtx (mode);
20285
20286 if (mode == DImode)
20287 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20288 else
20289 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20290 flags, compare_op));
20291 }
20292 else
20293 {
20294 if (code == GT || code == GE)
20295 code = reverse_condition (code);
20296 else
20297 {
20298 HOST_WIDE_INT tmp = ct;
20299 ct = cf;
20300 cf = tmp;
20301 diff = ct - cf;
20302 }
20303 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20304 }
20305
20306 if (diff == 1)
20307 {
20308 /*
20309 * cmpl op0,op1
20310 * sbbl dest,dest
20311 * [addl dest, ct]
20312 *
20313 * Size 5 - 8.
20314 */
20315 if (ct)
20316 tmp = expand_simple_binop (mode, PLUS,
20317 tmp, GEN_INT (ct),
20318 copy_rtx (tmp), 1, OPTAB_DIRECT);
20319 }
20320 else if (cf == -1)
20321 {
20322 /*
20323 * cmpl op0,op1
20324 * sbbl dest,dest
20325 * orl $ct, dest
20326 *
20327 * Size 8.
20328 */
20329 tmp = expand_simple_binop (mode, IOR,
20330 tmp, GEN_INT (ct),
20331 copy_rtx (tmp), 1, OPTAB_DIRECT);
20332 }
20333 else if (diff == -1 && ct)
20334 {
20335 /*
20336 * cmpl op0,op1
20337 * sbbl dest,dest
20338 * notl dest
20339 * [addl dest, cf]
20340 *
20341 * Size 8 - 11.
20342 */
20343 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20344 if (cf)
20345 tmp = expand_simple_binop (mode, PLUS,
20346 copy_rtx (tmp), GEN_INT (cf),
20347 copy_rtx (tmp), 1, OPTAB_DIRECT);
20348 }
20349 else
20350 {
20351 /*
20352 * cmpl op0,op1
20353 * sbbl dest,dest
20354 * [notl dest]
20355 * andl cf - ct, dest
20356 * [addl dest, ct]
20357 *
20358 * Size 8 - 11.
20359 */
20360
20361 if (cf == 0)
20362 {
20363 cf = ct;
20364 ct = 0;
20365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20366 }
20367
20368 tmp = expand_simple_binop (mode, AND,
20369 copy_rtx (tmp),
20370 gen_int_mode (cf - ct, mode),
20371 copy_rtx (tmp), 1, OPTAB_DIRECT);
20372 if (ct)
20373 tmp = expand_simple_binop (mode, PLUS,
20374 copy_rtx (tmp), GEN_INT (ct),
20375 copy_rtx (tmp), 1, OPTAB_DIRECT);
20376 }
20377
20378 if (!rtx_equal_p (tmp, out))
20379 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20380
20381 return true;
20382 }
20383
20384 if (diff < 0)
20385 {
20386 enum machine_mode cmp_mode = GET_MODE (op0);
20387
20388 HOST_WIDE_INT tmp;
20389 tmp = ct, ct = cf, cf = tmp;
20390 diff = -diff;
20391
20392 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20393 {
20394 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20395
20396 /* We may be reversing unordered compare to normal compare, that
20397 is not valid in general (we may convert non-trapping condition
20398 to trapping one), however on i386 we currently emit all
20399 comparisons unordered. */
20400 compare_code = reverse_condition_maybe_unordered (compare_code);
20401 code = reverse_condition_maybe_unordered (code);
20402 }
20403 else
20404 {
20405 compare_code = reverse_condition (compare_code);
20406 code = reverse_condition (code);
20407 }
20408 }
20409
20410 compare_code = UNKNOWN;
20411 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20412 && CONST_INT_P (op1))
20413 {
20414 if (op1 == const0_rtx
20415 && (code == LT || code == GE))
20416 compare_code = code;
20417 else if (op1 == constm1_rtx)
20418 {
20419 if (code == LE)
20420 compare_code = LT;
20421 else if (code == GT)
20422 compare_code = GE;
20423 }
20424 }
20425
20426 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20427 if (compare_code != UNKNOWN
20428 && GET_MODE (op0) == GET_MODE (out)
20429 && (cf == -1 || ct == -1))
20430 {
20431 /* If lea code below could be used, only optimize
20432 if it results in a 2 insn sequence. */
20433
20434 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20435 || diff == 3 || diff == 5 || diff == 9)
20436 || (compare_code == LT && ct == -1)
20437 || (compare_code == GE && cf == -1))
20438 {
20439 /*
20440 * notl op1 (if necessary)
20441 * sarl $31, op1
20442 * orl cf, op1
20443 */
20444 if (ct != -1)
20445 {
20446 cf = ct;
20447 ct = -1;
20448 code = reverse_condition (code);
20449 }
20450
20451 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20452
20453 out = expand_simple_binop (mode, IOR,
20454 out, GEN_INT (cf),
20455 out, 1, OPTAB_DIRECT);
20456 if (out != operands[0])
20457 emit_move_insn (operands[0], out);
20458
20459 return true;
20460 }
20461 }
20462
20463
20464 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20465 || diff == 3 || diff == 5 || diff == 9)
20466 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20467 && (mode != DImode
20468 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20469 {
20470 /*
20471 * xorl dest,dest
20472 * cmpl op1,op2
20473 * setcc dest
20474 * lea cf(dest*(ct-cf)),dest
20475 *
20476 * Size 14.
20477 *
20478 * This also catches the degenerate setcc-only case.
20479 */
20480
20481 rtx tmp;
20482 int nops;
20483
20484 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20485
20486 nops = 0;
20487 /* On x86_64 the lea instruction operates on Pmode, so we need
20488 to get arithmetics done in proper mode to match. */
20489 if (diff == 1)
20490 tmp = copy_rtx (out);
20491 else
20492 {
20493 rtx out1;
20494 out1 = copy_rtx (out);
20495 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20496 nops++;
20497 if (diff & 1)
20498 {
20499 tmp = gen_rtx_PLUS (mode, tmp, out1);
20500 nops++;
20501 }
20502 }
20503 if (cf != 0)
20504 {
20505 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20506 nops++;
20507 }
20508 if (!rtx_equal_p (tmp, out))
20509 {
20510 if (nops == 1)
20511 out = force_operand (tmp, copy_rtx (out));
20512 else
20513 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20514 }
20515 if (!rtx_equal_p (out, operands[0]))
20516 emit_move_insn (operands[0], copy_rtx (out));
20517
20518 return true;
20519 }
20520
20521 /*
20522 * General case: Jumpful:
20523 * xorl dest,dest cmpl op1, op2
20524 * cmpl op1, op2 movl ct, dest
20525 * setcc dest jcc 1f
20526 * decl dest movl cf, dest
20527 * andl (cf-ct),dest 1:
20528 * addl ct,dest
20529 *
20530 * Size 20. Size 14.
20531 *
20532 * This is reasonably steep, but branch mispredict costs are
20533 * high on modern cpus, so consider failing only if optimizing
20534 * for space.
20535 */
20536
20537 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20538 && BRANCH_COST (optimize_insn_for_speed_p (),
20539 false) >= 2)
20540 {
20541 if (cf == 0)
20542 {
20543 enum machine_mode cmp_mode = GET_MODE (op0);
20544
20545 cf = ct;
20546 ct = 0;
20547
20548 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20549 {
20550 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20551
20552 /* We may be reversing unordered compare to normal compare,
20553 that is not valid in general (we may convert non-trapping
20554 condition to trapping one), however on i386 we currently
20555 emit all comparisons unordered. */
20556 code = reverse_condition_maybe_unordered (code);
20557 }
20558 else
20559 {
20560 code = reverse_condition (code);
20561 if (compare_code != UNKNOWN)
20562 compare_code = reverse_condition (compare_code);
20563 }
20564 }
20565
20566 if (compare_code != UNKNOWN)
20567 {
20568 /* notl op1 (if needed)
20569 sarl $31, op1
20570 andl (cf-ct), op1
20571 addl ct, op1
20572
20573 For x < 0 (resp. x <= -1) there will be no notl,
20574 so if possible swap the constants to get rid of the
20575 complement.
20576 True/false will be -1/0 while code below (store flag
20577 followed by decrement) is 0/-1, so the constants need
20578 to be exchanged once more. */
20579
20580 if (compare_code == GE || !cf)
20581 {
20582 code = reverse_condition (code);
20583 compare_code = LT;
20584 }
20585 else
20586 {
20587 HOST_WIDE_INT tmp = cf;
20588 cf = ct;
20589 ct = tmp;
20590 }
20591
20592 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20593 }
20594 else
20595 {
20596 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20597
20598 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20599 constm1_rtx,
20600 copy_rtx (out), 1, OPTAB_DIRECT);
20601 }
20602
20603 out = expand_simple_binop (mode, AND, copy_rtx (out),
20604 gen_int_mode (cf - ct, mode),
20605 copy_rtx (out), 1, OPTAB_DIRECT);
20606 if (ct)
20607 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20608 copy_rtx (out), 1, OPTAB_DIRECT);
20609 if (!rtx_equal_p (out, operands[0]))
20610 emit_move_insn (operands[0], copy_rtx (out));
20611
20612 return true;
20613 }
20614 }
20615
20616 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20617 {
20618 /* Try a few things more with specific constants and a variable. */
20619
20620 optab op;
20621 rtx var, orig_out, out, tmp;
20622
20623 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20624 return false;
20625
20626 /* If one of the two operands is an interesting constant, load a
20627 constant with the above and mask it in with a logical operation. */
20628
20629 if (CONST_INT_P (operands[2]))
20630 {
20631 var = operands[3];
20632 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20633 operands[3] = constm1_rtx, op = and_optab;
20634 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20635 operands[3] = const0_rtx, op = ior_optab;
20636 else
20637 return false;
20638 }
20639 else if (CONST_INT_P (operands[3]))
20640 {
20641 var = operands[2];
20642 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20643 operands[2] = constm1_rtx, op = and_optab;
20644 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20645 operands[2] = const0_rtx, op = ior_optab;
20646 else
20647 return false;
20648 }
20649 else
20650 return false;
20651
20652 orig_out = operands[0];
20653 tmp = gen_reg_rtx (mode);
20654 operands[0] = tmp;
20655
20656 /* Recurse to get the constant loaded. */
20657 if (ix86_expand_int_movcc (operands) == 0)
20658 return false;
20659
20660 /* Mask in the interesting variable. */
20661 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20662 OPTAB_WIDEN);
20663 if (!rtx_equal_p (out, orig_out))
20664 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20665
20666 return true;
20667 }
20668
20669 /*
20670 * For comparison with above,
20671 *
20672 * movl cf,dest
20673 * movl ct,tmp
20674 * cmpl op1,op2
20675 * cmovcc tmp,dest
20676 *
20677 * Size 15.
20678 */
20679
20680 if (! nonimmediate_operand (operands[2], mode))
20681 operands[2] = force_reg (mode, operands[2]);
20682 if (! nonimmediate_operand (operands[3], mode))
20683 operands[3] = force_reg (mode, operands[3]);
20684
20685 if (! register_operand (operands[2], VOIDmode)
20686 && (mode == QImode
20687 || ! register_operand (operands[3], VOIDmode)))
20688 operands[2] = force_reg (mode, operands[2]);
20689
20690 if (mode == QImode
20691 && ! register_operand (operands[3], VOIDmode))
20692 operands[3] = force_reg (mode, operands[3]);
20693
20694 emit_insn (compare_seq);
20695 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20696 gen_rtx_IF_THEN_ELSE (mode,
20697 compare_op, operands[2],
20698 operands[3])));
20699 return true;
20700 }
20701
20702 /* Swap, force into registers, or otherwise massage the two operands
20703 to an sse comparison with a mask result. Thus we differ a bit from
20704 ix86_prepare_fp_compare_args which expects to produce a flags result.
20705
20706 The DEST operand exists to help determine whether to commute commutative
20707 operators. The POP0/POP1 operands are updated in place. The new
20708 comparison code is returned, or UNKNOWN if not implementable. */
20709
20710 static enum rtx_code
20711 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20712 rtx *pop0, rtx *pop1)
20713 {
20714 rtx tmp;
20715
20716 switch (code)
20717 {
20718 case LTGT:
20719 case UNEQ:
20720 /* AVX supports all the needed comparisons. */
20721 if (TARGET_AVX)
20722 break;
20723 /* We have no LTGT as an operator. We could implement it with
20724 NE & ORDERED, but this requires an extra temporary. It's
20725 not clear that it's worth it. */
20726 return UNKNOWN;
20727
20728 case LT:
20729 case LE:
20730 case UNGT:
20731 case UNGE:
20732 /* These are supported directly. */
20733 break;
20734
20735 case EQ:
20736 case NE:
20737 case UNORDERED:
20738 case ORDERED:
20739 /* AVX has 3 operand comparisons, no need to swap anything. */
20740 if (TARGET_AVX)
20741 break;
20742 /* For commutative operators, try to canonicalize the destination
20743 operand to be first in the comparison - this helps reload to
20744 avoid extra moves. */
20745 if (!dest || !rtx_equal_p (dest, *pop1))
20746 break;
20747 /* FALLTHRU */
20748
20749 case GE:
20750 case GT:
20751 case UNLE:
20752 case UNLT:
20753 /* These are not supported directly before AVX, and furthermore
20754 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20755 comparison operands to transform into something that is
20756 supported. */
20757 tmp = *pop0;
20758 *pop0 = *pop1;
20759 *pop1 = tmp;
20760 code = swap_condition (code);
20761 break;
20762
20763 default:
20764 gcc_unreachable ();
20765 }
20766
20767 return code;
20768 }
20769
20770 /* Detect conditional moves that exactly match min/max operational
20771 semantics. Note that this is IEEE safe, as long as we don't
20772 interchange the operands.
20773
20774 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20775 and TRUE if the operation is successful and instructions are emitted. */
20776
20777 static bool
20778 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20779 rtx cmp_op1, rtx if_true, rtx if_false)
20780 {
20781 enum machine_mode mode;
20782 bool is_min;
20783 rtx tmp;
20784
20785 if (code == LT)
20786 ;
20787 else if (code == UNGE)
20788 {
20789 tmp = if_true;
20790 if_true = if_false;
20791 if_false = tmp;
20792 }
20793 else
20794 return false;
20795
20796 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20797 is_min = true;
20798 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20799 is_min = false;
20800 else
20801 return false;
20802
20803 mode = GET_MODE (dest);
20804
20805 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20806 but MODE may be a vector mode and thus not appropriate. */
20807 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20808 {
20809 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20810 rtvec v;
20811
20812 if_true = force_reg (mode, if_true);
20813 v = gen_rtvec (2, if_true, if_false);
20814 tmp = gen_rtx_UNSPEC (mode, v, u);
20815 }
20816 else
20817 {
20818 code = is_min ? SMIN : SMAX;
20819 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20820 }
20821
20822 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20823 return true;
20824 }
20825
20826 /* Expand an sse vector comparison. Return the register with the result. */
20827
20828 static rtx
20829 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20830 rtx op_true, rtx op_false)
20831 {
20832 enum machine_mode mode = GET_MODE (dest);
20833 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20834
20835 /* In general case result of comparison can differ from operands' type. */
20836 enum machine_mode cmp_mode;
20837
20838 /* In AVX512F the result of comparison is an integer mask. */
20839 bool maskcmp = false;
20840 rtx x;
20841
20842 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20843 {
20844 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20845 gcc_assert (cmp_mode != BLKmode);
20846
20847 maskcmp = true;
20848 }
20849 else
20850 cmp_mode = cmp_ops_mode;
20851
20852
20853 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20854 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20855 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20856
20857 if (optimize
20858 || reg_overlap_mentioned_p (dest, op_true)
20859 || reg_overlap_mentioned_p (dest, op_false))
20860 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20861
20862 /* Compare patterns for int modes are unspec in AVX512F only. */
20863 if (maskcmp && (code == GT || code == EQ))
20864 {
20865 rtx (*gen)(rtx, rtx, rtx);
20866
20867 switch (cmp_ops_mode)
20868 {
20869 case V16SImode:
20870 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20871 break;
20872 case V8DImode:
20873 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20874 break;
20875 default:
20876 gen = NULL;
20877 }
20878
20879 if (gen)
20880 {
20881 emit_insn (gen (dest, cmp_op0, cmp_op1));
20882 return dest;
20883 }
20884 }
20885 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20886
20887 if (cmp_mode != mode && !maskcmp)
20888 {
20889 x = force_reg (cmp_ops_mode, x);
20890 convert_move (dest, x, false);
20891 }
20892 else
20893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20894
20895 return dest;
20896 }
20897
20898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20899 operations. This is used for both scalar and vector conditional moves. */
20900
20901 static void
20902 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20903 {
20904 enum machine_mode mode = GET_MODE (dest);
20905 enum machine_mode cmpmode = GET_MODE (cmp);
20906
20907 /* In AVX512F the result of comparison is an integer mask. */
20908 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20909
20910 rtx t2, t3, x;
20911
20912 if (vector_all_ones_operand (op_true, mode)
20913 && rtx_equal_p (op_false, CONST0_RTX (mode))
20914 && !maskcmp)
20915 {
20916 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20917 }
20918 else if (op_false == CONST0_RTX (mode)
20919 && !maskcmp)
20920 {
20921 op_true = force_reg (mode, op_true);
20922 x = gen_rtx_AND (mode, cmp, op_true);
20923 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20924 }
20925 else if (op_true == CONST0_RTX (mode)
20926 && !maskcmp)
20927 {
20928 op_false = force_reg (mode, op_false);
20929 x = gen_rtx_NOT (mode, cmp);
20930 x = gen_rtx_AND (mode, x, op_false);
20931 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20932 }
20933 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20934 && !maskcmp)
20935 {
20936 op_false = force_reg (mode, op_false);
20937 x = gen_rtx_IOR (mode, cmp, op_false);
20938 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20939 }
20940 else if (TARGET_XOP
20941 && !maskcmp)
20942 {
20943 op_true = force_reg (mode, op_true);
20944
20945 if (!nonimmediate_operand (op_false, mode))
20946 op_false = force_reg (mode, op_false);
20947
20948 emit_insn (gen_rtx_SET (mode, dest,
20949 gen_rtx_IF_THEN_ELSE (mode, cmp,
20950 op_true,
20951 op_false)));
20952 }
20953 else
20954 {
20955 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20956 rtx d = dest;
20957
20958 if (!nonimmediate_operand (op_true, mode))
20959 op_true = force_reg (mode, op_true);
20960
20961 op_false = force_reg (mode, op_false);
20962
20963 switch (mode)
20964 {
20965 case V4SFmode:
20966 if (TARGET_SSE4_1)
20967 gen = gen_sse4_1_blendvps;
20968 break;
20969 case V2DFmode:
20970 if (TARGET_SSE4_1)
20971 gen = gen_sse4_1_blendvpd;
20972 break;
20973 case V16QImode:
20974 case V8HImode:
20975 case V4SImode:
20976 case V2DImode:
20977 if (TARGET_SSE4_1)
20978 {
20979 gen = gen_sse4_1_pblendvb;
20980 if (mode != V16QImode)
20981 d = gen_reg_rtx (V16QImode);
20982 op_false = gen_lowpart (V16QImode, op_false);
20983 op_true = gen_lowpart (V16QImode, op_true);
20984 cmp = gen_lowpart (V16QImode, cmp);
20985 }
20986 break;
20987 case V8SFmode:
20988 if (TARGET_AVX)
20989 gen = gen_avx_blendvps256;
20990 break;
20991 case V4DFmode:
20992 if (TARGET_AVX)
20993 gen = gen_avx_blendvpd256;
20994 break;
20995 case V32QImode:
20996 case V16HImode:
20997 case V8SImode:
20998 case V4DImode:
20999 if (TARGET_AVX2)
21000 {
21001 gen = gen_avx2_pblendvb;
21002 if (mode != V32QImode)
21003 d = gen_reg_rtx (V32QImode);
21004 op_false = gen_lowpart (V32QImode, op_false);
21005 op_true = gen_lowpart (V32QImode, op_true);
21006 cmp = gen_lowpart (V32QImode, cmp);
21007 }
21008 break;
21009
21010 case V16SImode:
21011 gen = gen_avx512f_blendmv16si;
21012 break;
21013 case V8DImode:
21014 gen = gen_avx512f_blendmv8di;
21015 break;
21016 case V8DFmode:
21017 gen = gen_avx512f_blendmv8df;
21018 break;
21019 case V16SFmode:
21020 gen = gen_avx512f_blendmv16sf;
21021 break;
21022
21023 default:
21024 break;
21025 }
21026
21027 if (gen != NULL)
21028 {
21029 emit_insn (gen (d, op_false, op_true, cmp));
21030 if (d != dest)
21031 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21032 }
21033 else
21034 {
21035 op_true = force_reg (mode, op_true);
21036
21037 t2 = gen_reg_rtx (mode);
21038 if (optimize)
21039 t3 = gen_reg_rtx (mode);
21040 else
21041 t3 = dest;
21042
21043 x = gen_rtx_AND (mode, op_true, cmp);
21044 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21045
21046 x = gen_rtx_NOT (mode, cmp);
21047 x = gen_rtx_AND (mode, x, op_false);
21048 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21049
21050 x = gen_rtx_IOR (mode, t3, t2);
21051 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21052 }
21053 }
21054 }
21055
21056 /* Expand a floating-point conditional move. Return true if successful. */
21057
21058 bool
21059 ix86_expand_fp_movcc (rtx operands[])
21060 {
21061 enum machine_mode mode = GET_MODE (operands[0]);
21062 enum rtx_code code = GET_CODE (operands[1]);
21063 rtx tmp, compare_op;
21064 rtx op0 = XEXP (operands[1], 0);
21065 rtx op1 = XEXP (operands[1], 1);
21066
21067 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21068 {
21069 enum machine_mode cmode;
21070
21071 /* Since we've no cmove for sse registers, don't force bad register
21072 allocation just to gain access to it. Deny movcc when the
21073 comparison mode doesn't match the move mode. */
21074 cmode = GET_MODE (op0);
21075 if (cmode == VOIDmode)
21076 cmode = GET_MODE (op1);
21077 if (cmode != mode)
21078 return false;
21079
21080 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21081 if (code == UNKNOWN)
21082 return false;
21083
21084 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21085 operands[2], operands[3]))
21086 return true;
21087
21088 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21089 operands[2], operands[3]);
21090 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21091 return true;
21092 }
21093
21094 if (GET_MODE (op0) == TImode
21095 || (GET_MODE (op0) == DImode
21096 && !TARGET_64BIT))
21097 return false;
21098
21099 /* The floating point conditional move instructions don't directly
21100 support conditions resulting from a signed integer comparison. */
21101
21102 compare_op = ix86_expand_compare (code, op0, op1);
21103 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21104 {
21105 tmp = gen_reg_rtx (QImode);
21106 ix86_expand_setcc (tmp, code, op0, op1);
21107
21108 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21109 }
21110
21111 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21112 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21113 operands[2], operands[3])));
21114
21115 return true;
21116 }
21117
21118 /* Expand a floating-point vector conditional move; a vcond operation
21119 rather than a movcc operation. */
21120
21121 bool
21122 ix86_expand_fp_vcond (rtx operands[])
21123 {
21124 enum rtx_code code = GET_CODE (operands[3]);
21125 rtx cmp;
21126
21127 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21128 &operands[4], &operands[5]);
21129 if (code == UNKNOWN)
21130 {
21131 rtx temp;
21132 switch (GET_CODE (operands[3]))
21133 {
21134 case LTGT:
21135 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21136 operands[5], operands[0], operands[0]);
21137 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21138 operands[5], operands[1], operands[2]);
21139 code = AND;
21140 break;
21141 case UNEQ:
21142 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21143 operands[5], operands[0], operands[0]);
21144 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21145 operands[5], operands[1], operands[2]);
21146 code = IOR;
21147 break;
21148 default:
21149 gcc_unreachable ();
21150 }
21151 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21152 OPTAB_DIRECT);
21153 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21154 return true;
21155 }
21156
21157 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21158 operands[5], operands[1], operands[2]))
21159 return true;
21160
21161 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21162 operands[1], operands[2]);
21163 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21164 return true;
21165 }
21166
21167 /* Expand a signed/unsigned integral vector conditional move. */
21168
21169 bool
21170 ix86_expand_int_vcond (rtx operands[])
21171 {
21172 enum machine_mode data_mode = GET_MODE (operands[0]);
21173 enum machine_mode mode = GET_MODE (operands[4]);
21174 enum rtx_code code = GET_CODE (operands[3]);
21175 bool negate = false;
21176 rtx x, cop0, cop1;
21177
21178 cop0 = operands[4];
21179 cop1 = operands[5];
21180
21181 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21182 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21183 if ((code == LT || code == GE)
21184 && data_mode == mode
21185 && cop1 == CONST0_RTX (mode)
21186 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21187 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21188 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21189 && (GET_MODE_SIZE (data_mode) == 16
21190 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21191 {
21192 rtx negop = operands[2 - (code == LT)];
21193 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21194 if (negop == CONST1_RTX (data_mode))
21195 {
21196 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21197 operands[0], 1, OPTAB_DIRECT);
21198 if (res != operands[0])
21199 emit_move_insn (operands[0], res);
21200 return true;
21201 }
21202 else if (GET_MODE_INNER (data_mode) != DImode
21203 && vector_all_ones_operand (negop, data_mode))
21204 {
21205 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21206 operands[0], 0, OPTAB_DIRECT);
21207 if (res != operands[0])
21208 emit_move_insn (operands[0], res);
21209 return true;
21210 }
21211 }
21212
21213 if (!nonimmediate_operand (cop1, mode))
21214 cop1 = force_reg (mode, cop1);
21215 if (!general_operand (operands[1], data_mode))
21216 operands[1] = force_reg (data_mode, operands[1]);
21217 if (!general_operand (operands[2], data_mode))
21218 operands[2] = force_reg (data_mode, operands[2]);
21219
21220 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21221 if (TARGET_XOP
21222 && (mode == V16QImode || mode == V8HImode
21223 || mode == V4SImode || mode == V2DImode))
21224 ;
21225 else
21226 {
21227 /* Canonicalize the comparison to EQ, GT, GTU. */
21228 switch (code)
21229 {
21230 case EQ:
21231 case GT:
21232 case GTU:
21233 break;
21234
21235 case NE:
21236 case LE:
21237 case LEU:
21238 code = reverse_condition (code);
21239 negate = true;
21240 break;
21241
21242 case GE:
21243 case GEU:
21244 code = reverse_condition (code);
21245 negate = true;
21246 /* FALLTHRU */
21247
21248 case LT:
21249 case LTU:
21250 code = swap_condition (code);
21251 x = cop0, cop0 = cop1, cop1 = x;
21252 break;
21253
21254 default:
21255 gcc_unreachable ();
21256 }
21257
21258 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21259 if (mode == V2DImode)
21260 {
21261 switch (code)
21262 {
21263 case EQ:
21264 /* SSE4.1 supports EQ. */
21265 if (!TARGET_SSE4_1)
21266 return false;
21267 break;
21268
21269 case GT:
21270 case GTU:
21271 /* SSE4.2 supports GT/GTU. */
21272 if (!TARGET_SSE4_2)
21273 return false;
21274 break;
21275
21276 default:
21277 gcc_unreachable ();
21278 }
21279 }
21280
21281 /* Unsigned parallel compare is not supported by the hardware.
21282 Play some tricks to turn this into a signed comparison
21283 against 0. */
21284 if (code == GTU)
21285 {
21286 cop0 = force_reg (mode, cop0);
21287
21288 switch (mode)
21289 {
21290 case V16SImode:
21291 case V8DImode:
21292 case V8SImode:
21293 case V4DImode:
21294 case V4SImode:
21295 case V2DImode:
21296 {
21297 rtx t1, t2, mask;
21298 rtx (*gen_sub3) (rtx, rtx, rtx);
21299
21300 switch (mode)
21301 {
21302 case V16SImode: gen_sub3 = gen_subv16si3; break;
21303 case V8DImode: gen_sub3 = gen_subv8di3; break;
21304 case V8SImode: gen_sub3 = gen_subv8si3; break;
21305 case V4DImode: gen_sub3 = gen_subv4di3; break;
21306 case V4SImode: gen_sub3 = gen_subv4si3; break;
21307 case V2DImode: gen_sub3 = gen_subv2di3; break;
21308 default:
21309 gcc_unreachable ();
21310 }
21311 /* Subtract (-(INT MAX) - 1) from both operands to make
21312 them signed. */
21313 mask = ix86_build_signbit_mask (mode, true, false);
21314 t1 = gen_reg_rtx (mode);
21315 emit_insn (gen_sub3 (t1, cop0, mask));
21316
21317 t2 = gen_reg_rtx (mode);
21318 emit_insn (gen_sub3 (t2, cop1, mask));
21319
21320 cop0 = t1;
21321 cop1 = t2;
21322 code = GT;
21323 }
21324 break;
21325
21326 case V32QImode:
21327 case V16HImode:
21328 case V16QImode:
21329 case V8HImode:
21330 /* Perform a parallel unsigned saturating subtraction. */
21331 x = gen_reg_rtx (mode);
21332 emit_insn (gen_rtx_SET (VOIDmode, x,
21333 gen_rtx_US_MINUS (mode, cop0, cop1)));
21334
21335 cop0 = x;
21336 cop1 = CONST0_RTX (mode);
21337 code = EQ;
21338 negate = !negate;
21339 break;
21340
21341 default:
21342 gcc_unreachable ();
21343 }
21344 }
21345 }
21346
21347 /* Allow the comparison to be done in one mode, but the movcc to
21348 happen in another mode. */
21349 if (data_mode == mode)
21350 {
21351 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21352 operands[1+negate], operands[2-negate]);
21353 }
21354 else
21355 {
21356 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21357 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21358 operands[1+negate], operands[2-negate]);
21359 if (GET_MODE (x) == mode)
21360 x = gen_lowpart (data_mode, x);
21361 }
21362
21363 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21364 operands[2-negate]);
21365 return true;
21366 }
21367
21368 static bool
21369 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21370 {
21371 enum machine_mode mode = GET_MODE (op0);
21372 switch (mode)
21373 {
21374 case V16SImode:
21375 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21376 force_reg (V16SImode, mask),
21377 op1));
21378 return true;
21379 case V16SFmode:
21380 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21381 force_reg (V16SImode, mask),
21382 op1));
21383 return true;
21384 case V8DImode:
21385 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21386 force_reg (V8DImode, mask), op1));
21387 return true;
21388 case V8DFmode:
21389 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21390 force_reg (V8DImode, mask), op1));
21391 return true;
21392 default:
21393 return false;
21394 }
21395 }
21396
21397 /* Expand a variable vector permutation. */
21398
21399 void
21400 ix86_expand_vec_perm (rtx operands[])
21401 {
21402 rtx target = operands[0];
21403 rtx op0 = operands[1];
21404 rtx op1 = operands[2];
21405 rtx mask = operands[3];
21406 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21407 enum machine_mode mode = GET_MODE (op0);
21408 enum machine_mode maskmode = GET_MODE (mask);
21409 int w, e, i;
21410 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21411
21412 /* Number of elements in the vector. */
21413 w = GET_MODE_NUNITS (mode);
21414 e = GET_MODE_UNIT_SIZE (mode);
21415 gcc_assert (w <= 64);
21416
21417 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21418 return;
21419
21420 if (TARGET_AVX2)
21421 {
21422 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21423 {
21424 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21425 an constant shuffle operand. With a tiny bit of effort we can
21426 use VPERMD instead. A re-interpretation stall for V4DFmode is
21427 unfortunate but there's no avoiding it.
21428 Similarly for V16HImode we don't have instructions for variable
21429 shuffling, while for V32QImode we can use after preparing suitable
21430 masks vpshufb; vpshufb; vpermq; vpor. */
21431
21432 if (mode == V16HImode)
21433 {
21434 maskmode = mode = V32QImode;
21435 w = 32;
21436 e = 1;
21437 }
21438 else
21439 {
21440 maskmode = mode = V8SImode;
21441 w = 8;
21442 e = 4;
21443 }
21444 t1 = gen_reg_rtx (maskmode);
21445
21446 /* Replicate the low bits of the V4DImode mask into V8SImode:
21447 mask = { A B C D }
21448 t1 = { A A B B C C D D }. */
21449 for (i = 0; i < w / 2; ++i)
21450 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21451 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21452 vt = force_reg (maskmode, vt);
21453 mask = gen_lowpart (maskmode, mask);
21454 if (maskmode == V8SImode)
21455 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21456 else
21457 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21458
21459 /* Multiply the shuffle indicies by two. */
21460 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21461 OPTAB_DIRECT);
21462
21463 /* Add one to the odd shuffle indicies:
21464 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21465 for (i = 0; i < w / 2; ++i)
21466 {
21467 vec[i * 2] = const0_rtx;
21468 vec[i * 2 + 1] = const1_rtx;
21469 }
21470 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21471 vt = validize_mem (force_const_mem (maskmode, vt));
21472 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21473 OPTAB_DIRECT);
21474
21475 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21476 operands[3] = mask = t1;
21477 target = gen_reg_rtx (mode);
21478 op0 = gen_lowpart (mode, op0);
21479 op1 = gen_lowpart (mode, op1);
21480 }
21481
21482 switch (mode)
21483 {
21484 case V8SImode:
21485 /* The VPERMD and VPERMPS instructions already properly ignore
21486 the high bits of the shuffle elements. No need for us to
21487 perform an AND ourselves. */
21488 if (one_operand_shuffle)
21489 {
21490 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21491 if (target != operands[0])
21492 emit_move_insn (operands[0],
21493 gen_lowpart (GET_MODE (operands[0]), target));
21494 }
21495 else
21496 {
21497 t1 = gen_reg_rtx (V8SImode);
21498 t2 = gen_reg_rtx (V8SImode);
21499 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21500 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21501 goto merge_two;
21502 }
21503 return;
21504
21505 case V8SFmode:
21506 mask = gen_lowpart (V8SImode, mask);
21507 if (one_operand_shuffle)
21508 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21509 else
21510 {
21511 t1 = gen_reg_rtx (V8SFmode);
21512 t2 = gen_reg_rtx (V8SFmode);
21513 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21514 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21515 goto merge_two;
21516 }
21517 return;
21518
21519 case V4SImode:
21520 /* By combining the two 128-bit input vectors into one 256-bit
21521 input vector, we can use VPERMD and VPERMPS for the full
21522 two-operand shuffle. */
21523 t1 = gen_reg_rtx (V8SImode);
21524 t2 = gen_reg_rtx (V8SImode);
21525 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21526 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21527 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21528 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21529 return;
21530
21531 case V4SFmode:
21532 t1 = gen_reg_rtx (V8SFmode);
21533 t2 = gen_reg_rtx (V8SImode);
21534 mask = gen_lowpart (V4SImode, mask);
21535 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21536 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21537 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21538 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21539 return;
21540
21541 case V32QImode:
21542 t1 = gen_reg_rtx (V32QImode);
21543 t2 = gen_reg_rtx (V32QImode);
21544 t3 = gen_reg_rtx (V32QImode);
21545 vt2 = GEN_INT (-128);
21546 for (i = 0; i < 32; i++)
21547 vec[i] = vt2;
21548 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21549 vt = force_reg (V32QImode, vt);
21550 for (i = 0; i < 32; i++)
21551 vec[i] = i < 16 ? vt2 : const0_rtx;
21552 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21553 vt2 = force_reg (V32QImode, vt2);
21554 /* From mask create two adjusted masks, which contain the same
21555 bits as mask in the low 7 bits of each vector element.
21556 The first mask will have the most significant bit clear
21557 if it requests element from the same 128-bit lane
21558 and MSB set if it requests element from the other 128-bit lane.
21559 The second mask will have the opposite values of the MSB,
21560 and additionally will have its 128-bit lanes swapped.
21561 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21562 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21563 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21564 stands for other 12 bytes. */
21565 /* The bit whether element is from the same lane or the other
21566 lane is bit 4, so shift it up by 3 to the MSB position. */
21567 t5 = gen_reg_rtx (V4DImode);
21568 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21569 GEN_INT (3)));
21570 /* Clear MSB bits from the mask just in case it had them set. */
21571 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21572 /* After this t1 will have MSB set for elements from other lane. */
21573 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21574 /* Clear bits other than MSB. */
21575 emit_insn (gen_andv32qi3 (t1, t1, vt));
21576 /* Or in the lower bits from mask into t3. */
21577 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21578 /* And invert MSB bits in t1, so MSB is set for elements from the same
21579 lane. */
21580 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21581 /* Swap 128-bit lanes in t3. */
21582 t6 = gen_reg_rtx (V4DImode);
21583 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21584 const2_rtx, GEN_INT (3),
21585 const0_rtx, const1_rtx));
21586 /* And or in the lower bits from mask into t1. */
21587 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21588 if (one_operand_shuffle)
21589 {
21590 /* Each of these shuffles will put 0s in places where
21591 element from the other 128-bit lane is needed, otherwise
21592 will shuffle in the requested value. */
21593 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21594 gen_lowpart (V32QImode, t6)));
21595 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21596 /* For t3 the 128-bit lanes are swapped again. */
21597 t7 = gen_reg_rtx (V4DImode);
21598 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21599 const2_rtx, GEN_INT (3),
21600 const0_rtx, const1_rtx));
21601 /* And oring both together leads to the result. */
21602 emit_insn (gen_iorv32qi3 (target, t1,
21603 gen_lowpart (V32QImode, t7)));
21604 if (target != operands[0])
21605 emit_move_insn (operands[0],
21606 gen_lowpart (GET_MODE (operands[0]), target));
21607 return;
21608 }
21609
21610 t4 = gen_reg_rtx (V32QImode);
21611 /* Similarly to the above one_operand_shuffle code,
21612 just for repeated twice for each operand. merge_two:
21613 code will merge the two results together. */
21614 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21615 gen_lowpart (V32QImode, t6)));
21616 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21617 gen_lowpart (V32QImode, t6)));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21619 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21620 t7 = gen_reg_rtx (V4DImode);
21621 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21622 const2_rtx, GEN_INT (3),
21623 const0_rtx, const1_rtx));
21624 t8 = gen_reg_rtx (V4DImode);
21625 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21626 const2_rtx, GEN_INT (3),
21627 const0_rtx, const1_rtx));
21628 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21629 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21630 t1 = t4;
21631 t2 = t3;
21632 goto merge_two;
21633
21634 default:
21635 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21636 break;
21637 }
21638 }
21639
21640 if (TARGET_XOP)
21641 {
21642 /* The XOP VPPERM insn supports three inputs. By ignoring the
21643 one_operand_shuffle special case, we avoid creating another
21644 set of constant vectors in memory. */
21645 one_operand_shuffle = false;
21646
21647 /* mask = mask & {2*w-1, ...} */
21648 vt = GEN_INT (2*w - 1);
21649 }
21650 else
21651 {
21652 /* mask = mask & {w-1, ...} */
21653 vt = GEN_INT (w - 1);
21654 }
21655
21656 for (i = 0; i < w; i++)
21657 vec[i] = vt;
21658 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21659 mask = expand_simple_binop (maskmode, AND, mask, vt,
21660 NULL_RTX, 0, OPTAB_DIRECT);
21661
21662 /* For non-QImode operations, convert the word permutation control
21663 into a byte permutation control. */
21664 if (mode != V16QImode)
21665 {
21666 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21667 GEN_INT (exact_log2 (e)),
21668 NULL_RTX, 0, OPTAB_DIRECT);
21669
21670 /* Convert mask to vector of chars. */
21671 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21672
21673 /* Replicate each of the input bytes into byte positions:
21674 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21675 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21676 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21677 for (i = 0; i < 16; ++i)
21678 vec[i] = GEN_INT (i/e * e);
21679 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21680 vt = validize_mem (force_const_mem (V16QImode, vt));
21681 if (TARGET_XOP)
21682 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21683 else
21684 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21685
21686 /* Convert it into the byte positions by doing
21687 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21688 for (i = 0; i < 16; ++i)
21689 vec[i] = GEN_INT (i % e);
21690 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21691 vt = validize_mem (force_const_mem (V16QImode, vt));
21692 emit_insn (gen_addv16qi3 (mask, mask, vt));
21693 }
21694
21695 /* The actual shuffle operations all operate on V16QImode. */
21696 op0 = gen_lowpart (V16QImode, op0);
21697 op1 = gen_lowpart (V16QImode, op1);
21698
21699 if (TARGET_XOP)
21700 {
21701 if (GET_MODE (target) != V16QImode)
21702 target = gen_reg_rtx (V16QImode);
21703 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21704 if (target != operands[0])
21705 emit_move_insn (operands[0],
21706 gen_lowpart (GET_MODE (operands[0]), target));
21707 }
21708 else if (one_operand_shuffle)
21709 {
21710 if (GET_MODE (target) != V16QImode)
21711 target = gen_reg_rtx (V16QImode);
21712 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21713 if (target != operands[0])
21714 emit_move_insn (operands[0],
21715 gen_lowpart (GET_MODE (operands[0]), target));
21716 }
21717 else
21718 {
21719 rtx xops[6];
21720 bool ok;
21721
21722 /* Shuffle the two input vectors independently. */
21723 t1 = gen_reg_rtx (V16QImode);
21724 t2 = gen_reg_rtx (V16QImode);
21725 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21726 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21727
21728 merge_two:
21729 /* Then merge them together. The key is whether any given control
21730 element contained a bit set that indicates the second word. */
21731 mask = operands[3];
21732 vt = GEN_INT (w);
21733 if (maskmode == V2DImode && !TARGET_SSE4_1)
21734 {
21735 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21736 more shuffle to convert the V2DI input mask into a V4SI
21737 input mask. At which point the masking that expand_int_vcond
21738 will work as desired. */
21739 rtx t3 = gen_reg_rtx (V4SImode);
21740 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21741 const0_rtx, const0_rtx,
21742 const2_rtx, const2_rtx));
21743 mask = t3;
21744 maskmode = V4SImode;
21745 e = w = 4;
21746 }
21747
21748 for (i = 0; i < w; i++)
21749 vec[i] = vt;
21750 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21751 vt = force_reg (maskmode, vt);
21752 mask = expand_simple_binop (maskmode, AND, mask, vt,
21753 NULL_RTX, 0, OPTAB_DIRECT);
21754
21755 if (GET_MODE (target) != mode)
21756 target = gen_reg_rtx (mode);
21757 xops[0] = target;
21758 xops[1] = gen_lowpart (mode, t2);
21759 xops[2] = gen_lowpart (mode, t1);
21760 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21761 xops[4] = mask;
21762 xops[5] = vt;
21763 ok = ix86_expand_int_vcond (xops);
21764 gcc_assert (ok);
21765 if (target != operands[0])
21766 emit_move_insn (operands[0],
21767 gen_lowpart (GET_MODE (operands[0]), target));
21768 }
21769 }
21770
21771 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21772 true if we should do zero extension, else sign extension. HIGH_P is
21773 true if we want the N/2 high elements, else the low elements. */
21774
21775 void
21776 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21777 {
21778 enum machine_mode imode = GET_MODE (src);
21779 rtx tmp;
21780
21781 if (TARGET_SSE4_1)
21782 {
21783 rtx (*unpack)(rtx, rtx);
21784 rtx (*extract)(rtx, rtx) = NULL;
21785 enum machine_mode halfmode = BLKmode;
21786
21787 switch (imode)
21788 {
21789 case V32QImode:
21790 if (unsigned_p)
21791 unpack = gen_avx2_zero_extendv16qiv16hi2;
21792 else
21793 unpack = gen_avx2_sign_extendv16qiv16hi2;
21794 halfmode = V16QImode;
21795 extract
21796 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21797 break;
21798 case V32HImode:
21799 if (unsigned_p)
21800 unpack = gen_avx512f_zero_extendv16hiv16si2;
21801 else
21802 unpack = gen_avx512f_sign_extendv16hiv16si2;
21803 halfmode = V16HImode;
21804 extract
21805 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21806 break;
21807 case V16HImode:
21808 if (unsigned_p)
21809 unpack = gen_avx2_zero_extendv8hiv8si2;
21810 else
21811 unpack = gen_avx2_sign_extendv8hiv8si2;
21812 halfmode = V8HImode;
21813 extract
21814 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21815 break;
21816 case V16SImode:
21817 if (unsigned_p)
21818 unpack = gen_avx512f_zero_extendv8siv8di2;
21819 else
21820 unpack = gen_avx512f_sign_extendv8siv8di2;
21821 halfmode = V8SImode;
21822 extract
21823 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21824 break;
21825 case V8SImode:
21826 if (unsigned_p)
21827 unpack = gen_avx2_zero_extendv4siv4di2;
21828 else
21829 unpack = gen_avx2_sign_extendv4siv4di2;
21830 halfmode = V4SImode;
21831 extract
21832 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21833 break;
21834 case V16QImode:
21835 if (unsigned_p)
21836 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21837 else
21838 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21839 break;
21840 case V8HImode:
21841 if (unsigned_p)
21842 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21843 else
21844 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21845 break;
21846 case V4SImode:
21847 if (unsigned_p)
21848 unpack = gen_sse4_1_zero_extendv2siv2di2;
21849 else
21850 unpack = gen_sse4_1_sign_extendv2siv2di2;
21851 break;
21852 default:
21853 gcc_unreachable ();
21854 }
21855
21856 if (GET_MODE_SIZE (imode) >= 32)
21857 {
21858 tmp = gen_reg_rtx (halfmode);
21859 emit_insn (extract (tmp, src));
21860 }
21861 else if (high_p)
21862 {
21863 /* Shift higher 8 bytes to lower 8 bytes. */
21864 tmp = gen_reg_rtx (V1TImode);
21865 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21866 GEN_INT (64)));
21867 tmp = gen_lowpart (imode, tmp);
21868 }
21869 else
21870 tmp = src;
21871
21872 emit_insn (unpack (dest, tmp));
21873 }
21874 else
21875 {
21876 rtx (*unpack)(rtx, rtx, rtx);
21877
21878 switch (imode)
21879 {
21880 case V16QImode:
21881 if (high_p)
21882 unpack = gen_vec_interleave_highv16qi;
21883 else
21884 unpack = gen_vec_interleave_lowv16qi;
21885 break;
21886 case V8HImode:
21887 if (high_p)
21888 unpack = gen_vec_interleave_highv8hi;
21889 else
21890 unpack = gen_vec_interleave_lowv8hi;
21891 break;
21892 case V4SImode:
21893 if (high_p)
21894 unpack = gen_vec_interleave_highv4si;
21895 else
21896 unpack = gen_vec_interleave_lowv4si;
21897 break;
21898 default:
21899 gcc_unreachable ();
21900 }
21901
21902 if (unsigned_p)
21903 tmp = force_reg (imode, CONST0_RTX (imode));
21904 else
21905 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21906 src, pc_rtx, pc_rtx);
21907
21908 rtx tmp2 = gen_reg_rtx (imode);
21909 emit_insn (unpack (tmp2, src, tmp));
21910 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21911 }
21912 }
21913
21914 /* Expand conditional increment or decrement using adb/sbb instructions.
21915 The default case using setcc followed by the conditional move can be
21916 done by generic code. */
21917 bool
21918 ix86_expand_int_addcc (rtx operands[])
21919 {
21920 enum rtx_code code = GET_CODE (operands[1]);
21921 rtx flags;
21922 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21923 rtx compare_op;
21924 rtx val = const0_rtx;
21925 bool fpcmp = false;
21926 enum machine_mode mode;
21927 rtx op0 = XEXP (operands[1], 0);
21928 rtx op1 = XEXP (operands[1], 1);
21929
21930 if (operands[3] != const1_rtx
21931 && operands[3] != constm1_rtx)
21932 return false;
21933 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21934 return false;
21935 code = GET_CODE (compare_op);
21936
21937 flags = XEXP (compare_op, 0);
21938
21939 if (GET_MODE (flags) == CCFPmode
21940 || GET_MODE (flags) == CCFPUmode)
21941 {
21942 fpcmp = true;
21943 code = ix86_fp_compare_code_to_integer (code);
21944 }
21945
21946 if (code != LTU)
21947 {
21948 val = constm1_rtx;
21949 if (fpcmp)
21950 PUT_CODE (compare_op,
21951 reverse_condition_maybe_unordered
21952 (GET_CODE (compare_op)));
21953 else
21954 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21955 }
21956
21957 mode = GET_MODE (operands[0]);
21958
21959 /* Construct either adc or sbb insn. */
21960 if ((code == LTU) == (operands[3] == constm1_rtx))
21961 {
21962 switch (mode)
21963 {
21964 case QImode:
21965 insn = gen_subqi3_carry;
21966 break;
21967 case HImode:
21968 insn = gen_subhi3_carry;
21969 break;
21970 case SImode:
21971 insn = gen_subsi3_carry;
21972 break;
21973 case DImode:
21974 insn = gen_subdi3_carry;
21975 break;
21976 default:
21977 gcc_unreachable ();
21978 }
21979 }
21980 else
21981 {
21982 switch (mode)
21983 {
21984 case QImode:
21985 insn = gen_addqi3_carry;
21986 break;
21987 case HImode:
21988 insn = gen_addhi3_carry;
21989 break;
21990 case SImode:
21991 insn = gen_addsi3_carry;
21992 break;
21993 case DImode:
21994 insn = gen_adddi3_carry;
21995 break;
21996 default:
21997 gcc_unreachable ();
21998 }
21999 }
22000 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22001
22002 return true;
22003 }
22004
22005
22006 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22007 but works for floating pointer parameters and nonoffsetable memories.
22008 For pushes, it returns just stack offsets; the values will be saved
22009 in the right order. Maximally three parts are generated. */
22010
22011 static int
22012 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22013 {
22014 int size;
22015
22016 if (!TARGET_64BIT)
22017 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22018 else
22019 size = (GET_MODE_SIZE (mode) + 4) / 8;
22020
22021 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22022 gcc_assert (size >= 2 && size <= 4);
22023
22024 /* Optimize constant pool reference to immediates. This is used by fp
22025 moves, that force all constants to memory to allow combining. */
22026 if (MEM_P (operand) && MEM_READONLY_P (operand))
22027 {
22028 rtx tmp = maybe_get_pool_constant (operand);
22029 if (tmp)
22030 operand = tmp;
22031 }
22032
22033 if (MEM_P (operand) && !offsettable_memref_p (operand))
22034 {
22035 /* The only non-offsetable memories we handle are pushes. */
22036 int ok = push_operand (operand, VOIDmode);
22037
22038 gcc_assert (ok);
22039
22040 operand = copy_rtx (operand);
22041 PUT_MODE (operand, word_mode);
22042 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22043 return size;
22044 }
22045
22046 if (GET_CODE (operand) == CONST_VECTOR)
22047 {
22048 enum machine_mode imode = int_mode_for_mode (mode);
22049 /* Caution: if we looked through a constant pool memory above,
22050 the operand may actually have a different mode now. That's
22051 ok, since we want to pun this all the way back to an integer. */
22052 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22053 gcc_assert (operand != NULL);
22054 mode = imode;
22055 }
22056
22057 if (!TARGET_64BIT)
22058 {
22059 if (mode == DImode)
22060 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22061 else
22062 {
22063 int i;
22064
22065 if (REG_P (operand))
22066 {
22067 gcc_assert (reload_completed);
22068 for (i = 0; i < size; i++)
22069 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22070 }
22071 else if (offsettable_memref_p (operand))
22072 {
22073 operand = adjust_address (operand, SImode, 0);
22074 parts[0] = operand;
22075 for (i = 1; i < size; i++)
22076 parts[i] = adjust_address (operand, SImode, 4 * i);
22077 }
22078 else if (GET_CODE (operand) == CONST_DOUBLE)
22079 {
22080 REAL_VALUE_TYPE r;
22081 long l[4];
22082
22083 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22084 switch (mode)
22085 {
22086 case TFmode:
22087 real_to_target (l, &r, mode);
22088 parts[3] = gen_int_mode (l[3], SImode);
22089 parts[2] = gen_int_mode (l[2], SImode);
22090 break;
22091 case XFmode:
22092 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22093 long double may not be 80-bit. */
22094 real_to_target (l, &r, mode);
22095 parts[2] = gen_int_mode (l[2], SImode);
22096 break;
22097 case DFmode:
22098 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22099 break;
22100 default:
22101 gcc_unreachable ();
22102 }
22103 parts[1] = gen_int_mode (l[1], SImode);
22104 parts[0] = gen_int_mode (l[0], SImode);
22105 }
22106 else
22107 gcc_unreachable ();
22108 }
22109 }
22110 else
22111 {
22112 if (mode == TImode)
22113 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22114 if (mode == XFmode || mode == TFmode)
22115 {
22116 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22117 if (REG_P (operand))
22118 {
22119 gcc_assert (reload_completed);
22120 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22121 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22122 }
22123 else if (offsettable_memref_p (operand))
22124 {
22125 operand = adjust_address (operand, DImode, 0);
22126 parts[0] = operand;
22127 parts[1] = adjust_address (operand, upper_mode, 8);
22128 }
22129 else if (GET_CODE (operand) == CONST_DOUBLE)
22130 {
22131 REAL_VALUE_TYPE r;
22132 long l[4];
22133
22134 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22135 real_to_target (l, &r, mode);
22136
22137 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22138 if (HOST_BITS_PER_WIDE_INT >= 64)
22139 parts[0]
22140 = gen_int_mode
22141 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22142 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22143 DImode);
22144 else
22145 parts[0] = immed_double_const (l[0], l[1], DImode);
22146
22147 if (upper_mode == SImode)
22148 parts[1] = gen_int_mode (l[2], SImode);
22149 else if (HOST_BITS_PER_WIDE_INT >= 64)
22150 parts[1]
22151 = gen_int_mode
22152 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22153 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22154 DImode);
22155 else
22156 parts[1] = immed_double_const (l[2], l[3], DImode);
22157 }
22158 else
22159 gcc_unreachable ();
22160 }
22161 }
22162
22163 return size;
22164 }
22165
22166 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22167 Return false when normal moves are needed; true when all required
22168 insns have been emitted. Operands 2-4 contain the input values
22169 int the correct order; operands 5-7 contain the output values. */
22170
22171 void
22172 ix86_split_long_move (rtx operands[])
22173 {
22174 rtx part[2][4];
22175 int nparts, i, j;
22176 int push = 0;
22177 int collisions = 0;
22178 enum machine_mode mode = GET_MODE (operands[0]);
22179 bool collisionparts[4];
22180
22181 /* The DFmode expanders may ask us to move double.
22182 For 64bit target this is single move. By hiding the fact
22183 here we simplify i386.md splitters. */
22184 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22185 {
22186 /* Optimize constant pool reference to immediates. This is used by
22187 fp moves, that force all constants to memory to allow combining. */
22188
22189 if (MEM_P (operands[1])
22190 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22191 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22192 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22193 if (push_operand (operands[0], VOIDmode))
22194 {
22195 operands[0] = copy_rtx (operands[0]);
22196 PUT_MODE (operands[0], word_mode);
22197 }
22198 else
22199 operands[0] = gen_lowpart (DImode, operands[0]);
22200 operands[1] = gen_lowpart (DImode, operands[1]);
22201 emit_move_insn (operands[0], operands[1]);
22202 return;
22203 }
22204
22205 /* The only non-offsettable memory we handle is push. */
22206 if (push_operand (operands[0], VOIDmode))
22207 push = 1;
22208 else
22209 gcc_assert (!MEM_P (operands[0])
22210 || offsettable_memref_p (operands[0]));
22211
22212 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22213 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22214
22215 /* When emitting push, take care for source operands on the stack. */
22216 if (push && MEM_P (operands[1])
22217 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22218 {
22219 rtx src_base = XEXP (part[1][nparts - 1], 0);
22220
22221 /* Compensate for the stack decrement by 4. */
22222 if (!TARGET_64BIT && nparts == 3
22223 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22224 src_base = plus_constant (Pmode, src_base, 4);
22225
22226 /* src_base refers to the stack pointer and is
22227 automatically decreased by emitted push. */
22228 for (i = 0; i < nparts; i++)
22229 part[1][i] = change_address (part[1][i],
22230 GET_MODE (part[1][i]), src_base);
22231 }
22232
22233 /* We need to do copy in the right order in case an address register
22234 of the source overlaps the destination. */
22235 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22236 {
22237 rtx tmp;
22238
22239 for (i = 0; i < nparts; i++)
22240 {
22241 collisionparts[i]
22242 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22243 if (collisionparts[i])
22244 collisions++;
22245 }
22246
22247 /* Collision in the middle part can be handled by reordering. */
22248 if (collisions == 1 && nparts == 3 && collisionparts [1])
22249 {
22250 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22251 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22252 }
22253 else if (collisions == 1
22254 && nparts == 4
22255 && (collisionparts [1] || collisionparts [2]))
22256 {
22257 if (collisionparts [1])
22258 {
22259 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22260 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22261 }
22262 else
22263 {
22264 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22265 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22266 }
22267 }
22268
22269 /* If there are more collisions, we can't handle it by reordering.
22270 Do an lea to the last part and use only one colliding move. */
22271 else if (collisions > 1)
22272 {
22273 rtx base;
22274
22275 collisions = 1;
22276
22277 base = part[0][nparts - 1];
22278
22279 /* Handle the case when the last part isn't valid for lea.
22280 Happens in 64-bit mode storing the 12-byte XFmode. */
22281 if (GET_MODE (base) != Pmode)
22282 base = gen_rtx_REG (Pmode, REGNO (base));
22283
22284 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22285 part[1][0] = replace_equiv_address (part[1][0], base);
22286 for (i = 1; i < nparts; i++)
22287 {
22288 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22289 part[1][i] = replace_equiv_address (part[1][i], tmp);
22290 }
22291 }
22292 }
22293
22294 if (push)
22295 {
22296 if (!TARGET_64BIT)
22297 {
22298 if (nparts == 3)
22299 {
22300 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22301 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22302 stack_pointer_rtx, GEN_INT (-4)));
22303 emit_move_insn (part[0][2], part[1][2]);
22304 }
22305 else if (nparts == 4)
22306 {
22307 emit_move_insn (part[0][3], part[1][3]);
22308 emit_move_insn (part[0][2], part[1][2]);
22309 }
22310 }
22311 else
22312 {
22313 /* In 64bit mode we don't have 32bit push available. In case this is
22314 register, it is OK - we will just use larger counterpart. We also
22315 retype memory - these comes from attempt to avoid REX prefix on
22316 moving of second half of TFmode value. */
22317 if (GET_MODE (part[1][1]) == SImode)
22318 {
22319 switch (GET_CODE (part[1][1]))
22320 {
22321 case MEM:
22322 part[1][1] = adjust_address (part[1][1], DImode, 0);
22323 break;
22324
22325 case REG:
22326 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22327 break;
22328
22329 default:
22330 gcc_unreachable ();
22331 }
22332
22333 if (GET_MODE (part[1][0]) == SImode)
22334 part[1][0] = part[1][1];
22335 }
22336 }
22337 emit_move_insn (part[0][1], part[1][1]);
22338 emit_move_insn (part[0][0], part[1][0]);
22339 return;
22340 }
22341
22342 /* Choose correct order to not overwrite the source before it is copied. */
22343 if ((REG_P (part[0][0])
22344 && REG_P (part[1][1])
22345 && (REGNO (part[0][0]) == REGNO (part[1][1])
22346 || (nparts == 3
22347 && REGNO (part[0][0]) == REGNO (part[1][2]))
22348 || (nparts == 4
22349 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22350 || (collisions > 0
22351 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22352 {
22353 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22354 {
22355 operands[2 + i] = part[0][j];
22356 operands[6 + i] = part[1][j];
22357 }
22358 }
22359 else
22360 {
22361 for (i = 0; i < nparts; i++)
22362 {
22363 operands[2 + i] = part[0][i];
22364 operands[6 + i] = part[1][i];
22365 }
22366 }
22367
22368 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22369 if (optimize_insn_for_size_p ())
22370 {
22371 for (j = 0; j < nparts - 1; j++)
22372 if (CONST_INT_P (operands[6 + j])
22373 && operands[6 + j] != const0_rtx
22374 && REG_P (operands[2 + j]))
22375 for (i = j; i < nparts - 1; i++)
22376 if (CONST_INT_P (operands[7 + i])
22377 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22378 operands[7 + i] = operands[2 + j];
22379 }
22380
22381 for (i = 0; i < nparts; i++)
22382 emit_move_insn (operands[2 + i], operands[6 + i]);
22383
22384 return;
22385 }
22386
22387 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22388 left shift by a constant, either using a single shift or
22389 a sequence of add instructions. */
22390
22391 static void
22392 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22393 {
22394 rtx (*insn)(rtx, rtx, rtx);
22395
22396 if (count == 1
22397 || (count * ix86_cost->add <= ix86_cost->shift_const
22398 && !optimize_insn_for_size_p ()))
22399 {
22400 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22401 while (count-- > 0)
22402 emit_insn (insn (operand, operand, operand));
22403 }
22404 else
22405 {
22406 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22407 emit_insn (insn (operand, operand, GEN_INT (count)));
22408 }
22409 }
22410
22411 void
22412 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22413 {
22414 rtx (*gen_ashl3)(rtx, rtx, rtx);
22415 rtx (*gen_shld)(rtx, rtx, rtx);
22416 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22417
22418 rtx low[2], high[2];
22419 int count;
22420
22421 if (CONST_INT_P (operands[2]))
22422 {
22423 split_double_mode (mode, operands, 2, low, high);
22424 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22425
22426 if (count >= half_width)
22427 {
22428 emit_move_insn (high[0], low[1]);
22429 emit_move_insn (low[0], const0_rtx);
22430
22431 if (count > half_width)
22432 ix86_expand_ashl_const (high[0], count - half_width, mode);
22433 }
22434 else
22435 {
22436 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22437
22438 if (!rtx_equal_p (operands[0], operands[1]))
22439 emit_move_insn (operands[0], operands[1]);
22440
22441 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22442 ix86_expand_ashl_const (low[0], count, mode);
22443 }
22444 return;
22445 }
22446
22447 split_double_mode (mode, operands, 1, low, high);
22448
22449 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22450
22451 if (operands[1] == const1_rtx)
22452 {
22453 /* Assuming we've chosen a QImode capable registers, then 1 << N
22454 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22455 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22456 {
22457 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22458
22459 ix86_expand_clear (low[0]);
22460 ix86_expand_clear (high[0]);
22461 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22462
22463 d = gen_lowpart (QImode, low[0]);
22464 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22465 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22466 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22467
22468 d = gen_lowpart (QImode, high[0]);
22469 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22470 s = gen_rtx_NE (QImode, flags, const0_rtx);
22471 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22472 }
22473
22474 /* Otherwise, we can get the same results by manually performing
22475 a bit extract operation on bit 5/6, and then performing the two
22476 shifts. The two methods of getting 0/1 into low/high are exactly
22477 the same size. Avoiding the shift in the bit extract case helps
22478 pentium4 a bit; no one else seems to care much either way. */
22479 else
22480 {
22481 enum machine_mode half_mode;
22482 rtx (*gen_lshr3)(rtx, rtx, rtx);
22483 rtx (*gen_and3)(rtx, rtx, rtx);
22484 rtx (*gen_xor3)(rtx, rtx, rtx);
22485 HOST_WIDE_INT bits;
22486 rtx x;
22487
22488 if (mode == DImode)
22489 {
22490 half_mode = SImode;
22491 gen_lshr3 = gen_lshrsi3;
22492 gen_and3 = gen_andsi3;
22493 gen_xor3 = gen_xorsi3;
22494 bits = 5;
22495 }
22496 else
22497 {
22498 half_mode = DImode;
22499 gen_lshr3 = gen_lshrdi3;
22500 gen_and3 = gen_anddi3;
22501 gen_xor3 = gen_xordi3;
22502 bits = 6;
22503 }
22504
22505 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22506 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22507 else
22508 x = gen_lowpart (half_mode, operands[2]);
22509 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22510
22511 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22512 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22513 emit_move_insn (low[0], high[0]);
22514 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22515 }
22516
22517 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22518 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22519 return;
22520 }
22521
22522 if (operands[1] == constm1_rtx)
22523 {
22524 /* For -1 << N, we can avoid the shld instruction, because we
22525 know that we're shifting 0...31/63 ones into a -1. */
22526 emit_move_insn (low[0], constm1_rtx);
22527 if (optimize_insn_for_size_p ())
22528 emit_move_insn (high[0], low[0]);
22529 else
22530 emit_move_insn (high[0], constm1_rtx);
22531 }
22532 else
22533 {
22534 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22535
22536 if (!rtx_equal_p (operands[0], operands[1]))
22537 emit_move_insn (operands[0], operands[1]);
22538
22539 split_double_mode (mode, operands, 1, low, high);
22540 emit_insn (gen_shld (high[0], low[0], operands[2]));
22541 }
22542
22543 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22544
22545 if (TARGET_CMOVE && scratch)
22546 {
22547 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22548 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22549
22550 ix86_expand_clear (scratch);
22551 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22552 }
22553 else
22554 {
22555 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22556 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22557
22558 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22559 }
22560 }
22561
22562 void
22563 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22564 {
22565 rtx (*gen_ashr3)(rtx, rtx, rtx)
22566 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22567 rtx (*gen_shrd)(rtx, rtx, rtx);
22568 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22569
22570 rtx low[2], high[2];
22571 int count;
22572
22573 if (CONST_INT_P (operands[2]))
22574 {
22575 split_double_mode (mode, operands, 2, low, high);
22576 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22577
22578 if (count == GET_MODE_BITSIZE (mode) - 1)
22579 {
22580 emit_move_insn (high[0], high[1]);
22581 emit_insn (gen_ashr3 (high[0], high[0],
22582 GEN_INT (half_width - 1)));
22583 emit_move_insn (low[0], high[0]);
22584
22585 }
22586 else if (count >= half_width)
22587 {
22588 emit_move_insn (low[0], high[1]);
22589 emit_move_insn (high[0], low[0]);
22590 emit_insn (gen_ashr3 (high[0], high[0],
22591 GEN_INT (half_width - 1)));
22592
22593 if (count > half_width)
22594 emit_insn (gen_ashr3 (low[0], low[0],
22595 GEN_INT (count - half_width)));
22596 }
22597 else
22598 {
22599 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22600
22601 if (!rtx_equal_p (operands[0], operands[1]))
22602 emit_move_insn (operands[0], operands[1]);
22603
22604 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22605 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22606 }
22607 }
22608 else
22609 {
22610 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22611
22612 if (!rtx_equal_p (operands[0], operands[1]))
22613 emit_move_insn (operands[0], operands[1]);
22614
22615 split_double_mode (mode, operands, 1, low, high);
22616
22617 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22618 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22619
22620 if (TARGET_CMOVE && scratch)
22621 {
22622 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22623 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22624
22625 emit_move_insn (scratch, high[0]);
22626 emit_insn (gen_ashr3 (scratch, scratch,
22627 GEN_INT (half_width - 1)));
22628 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22629 scratch));
22630 }
22631 else
22632 {
22633 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22634 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22635
22636 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22637 }
22638 }
22639 }
22640
22641 void
22642 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22643 {
22644 rtx (*gen_lshr3)(rtx, rtx, rtx)
22645 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22646 rtx (*gen_shrd)(rtx, rtx, rtx);
22647 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22648
22649 rtx low[2], high[2];
22650 int count;
22651
22652 if (CONST_INT_P (operands[2]))
22653 {
22654 split_double_mode (mode, operands, 2, low, high);
22655 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22656
22657 if (count >= half_width)
22658 {
22659 emit_move_insn (low[0], high[1]);
22660 ix86_expand_clear (high[0]);
22661
22662 if (count > half_width)
22663 emit_insn (gen_lshr3 (low[0], low[0],
22664 GEN_INT (count - half_width)));
22665 }
22666 else
22667 {
22668 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22669
22670 if (!rtx_equal_p (operands[0], operands[1]))
22671 emit_move_insn (operands[0], operands[1]);
22672
22673 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22674 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22675 }
22676 }
22677 else
22678 {
22679 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22680
22681 if (!rtx_equal_p (operands[0], operands[1]))
22682 emit_move_insn (operands[0], operands[1]);
22683
22684 split_double_mode (mode, operands, 1, low, high);
22685
22686 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22687 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22688
22689 if (TARGET_CMOVE && scratch)
22690 {
22691 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22692 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22693
22694 ix86_expand_clear (scratch);
22695 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22696 scratch));
22697 }
22698 else
22699 {
22700 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22701 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22702
22703 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22704 }
22705 }
22706 }
22707
22708 /* Predict just emitted jump instruction to be taken with probability PROB. */
22709 static void
22710 predict_jump (int prob)
22711 {
22712 rtx insn = get_last_insn ();
22713 gcc_assert (JUMP_P (insn));
22714 add_int_reg_note (insn, REG_BR_PROB, prob);
22715 }
22716
22717 /* Helper function for the string operations below. Dest VARIABLE whether
22718 it is aligned to VALUE bytes. If true, jump to the label. */
22719 static rtx
22720 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22721 {
22722 rtx label = gen_label_rtx ();
22723 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22724 if (GET_MODE (variable) == DImode)
22725 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22726 else
22727 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22728 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22729 1, label);
22730 if (epilogue)
22731 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22732 else
22733 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22734 return label;
22735 }
22736
22737 /* Adjust COUNTER by the VALUE. */
22738 static void
22739 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22740 {
22741 rtx (*gen_add)(rtx, rtx, rtx)
22742 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22743
22744 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22745 }
22746
22747 /* Zero extend possibly SImode EXP to Pmode register. */
22748 rtx
22749 ix86_zero_extend_to_Pmode (rtx exp)
22750 {
22751 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22752 }
22753
22754 /* Divide COUNTREG by SCALE. */
22755 static rtx
22756 scale_counter (rtx countreg, int scale)
22757 {
22758 rtx sc;
22759
22760 if (scale == 1)
22761 return countreg;
22762 if (CONST_INT_P (countreg))
22763 return GEN_INT (INTVAL (countreg) / scale);
22764 gcc_assert (REG_P (countreg));
22765
22766 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22767 GEN_INT (exact_log2 (scale)),
22768 NULL, 1, OPTAB_DIRECT);
22769 return sc;
22770 }
22771
22772 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22773 DImode for constant loop counts. */
22774
22775 static enum machine_mode
22776 counter_mode (rtx count_exp)
22777 {
22778 if (GET_MODE (count_exp) != VOIDmode)
22779 return GET_MODE (count_exp);
22780 if (!CONST_INT_P (count_exp))
22781 return Pmode;
22782 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22783 return DImode;
22784 return SImode;
22785 }
22786
22787 /* Copy the address to a Pmode register. This is used for x32 to
22788 truncate DImode TLS address to a SImode register. */
22789
22790 static rtx
22791 ix86_copy_addr_to_reg (rtx addr)
22792 {
22793 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22794 return copy_addr_to_reg (addr);
22795 else
22796 {
22797 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22798 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22799 }
22800 }
22801
22802 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22803 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22804 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22805 memory by VALUE (supposed to be in MODE).
22806
22807 The size is rounded down to whole number of chunk size moved at once.
22808 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22809
22810
22811 static void
22812 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22813 rtx destptr, rtx srcptr, rtx value,
22814 rtx count, enum machine_mode mode, int unroll,
22815 int expected_size, bool issetmem)
22816 {
22817 rtx out_label, top_label, iter, tmp;
22818 enum machine_mode iter_mode = counter_mode (count);
22819 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22820 rtx piece_size = GEN_INT (piece_size_n);
22821 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22822 rtx size;
22823 int i;
22824
22825 top_label = gen_label_rtx ();
22826 out_label = gen_label_rtx ();
22827 iter = gen_reg_rtx (iter_mode);
22828
22829 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22830 NULL, 1, OPTAB_DIRECT);
22831 /* Those two should combine. */
22832 if (piece_size == const1_rtx)
22833 {
22834 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22835 true, out_label);
22836 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22837 }
22838 emit_move_insn (iter, const0_rtx);
22839
22840 emit_label (top_label);
22841
22842 tmp = convert_modes (Pmode, iter_mode, iter, true);
22843
22844 /* This assert could be relaxed - in this case we'll need to compute
22845 smallest power of two, containing in PIECE_SIZE_N and pass it to
22846 offset_address. */
22847 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22848 destmem = offset_address (destmem, tmp, piece_size_n);
22849 destmem = adjust_address (destmem, mode, 0);
22850
22851 if (!issetmem)
22852 {
22853 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22854 srcmem = adjust_address (srcmem, mode, 0);
22855
22856 /* When unrolling for chips that reorder memory reads and writes,
22857 we can save registers by using single temporary.
22858 Also using 4 temporaries is overkill in 32bit mode. */
22859 if (!TARGET_64BIT && 0)
22860 {
22861 for (i = 0; i < unroll; i++)
22862 {
22863 if (i)
22864 {
22865 destmem =
22866 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22867 srcmem =
22868 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22869 }
22870 emit_move_insn (destmem, srcmem);
22871 }
22872 }
22873 else
22874 {
22875 rtx tmpreg[4];
22876 gcc_assert (unroll <= 4);
22877 for (i = 0; i < unroll; i++)
22878 {
22879 tmpreg[i] = gen_reg_rtx (mode);
22880 if (i)
22881 {
22882 srcmem =
22883 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22884 }
22885 emit_move_insn (tmpreg[i], srcmem);
22886 }
22887 for (i = 0; i < unroll; i++)
22888 {
22889 if (i)
22890 {
22891 destmem =
22892 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22893 }
22894 emit_move_insn (destmem, tmpreg[i]);
22895 }
22896 }
22897 }
22898 else
22899 for (i = 0; i < unroll; i++)
22900 {
22901 if (i)
22902 destmem =
22903 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22904 emit_move_insn (destmem, value);
22905 }
22906
22907 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22908 true, OPTAB_LIB_WIDEN);
22909 if (tmp != iter)
22910 emit_move_insn (iter, tmp);
22911
22912 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22913 true, top_label);
22914 if (expected_size != -1)
22915 {
22916 expected_size /= GET_MODE_SIZE (mode) * unroll;
22917 if (expected_size == 0)
22918 predict_jump (0);
22919 else if (expected_size > REG_BR_PROB_BASE)
22920 predict_jump (REG_BR_PROB_BASE - 1);
22921 else
22922 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22923 }
22924 else
22925 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22926 iter = ix86_zero_extend_to_Pmode (iter);
22927 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22928 true, OPTAB_LIB_WIDEN);
22929 if (tmp != destptr)
22930 emit_move_insn (destptr, tmp);
22931 if (!issetmem)
22932 {
22933 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22934 true, OPTAB_LIB_WIDEN);
22935 if (tmp != srcptr)
22936 emit_move_insn (srcptr, tmp);
22937 }
22938 emit_label (out_label);
22939 }
22940
22941 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22942 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22943 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22944 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22945 ORIG_VALUE is the original value passed to memset to fill the memory with.
22946 Other arguments have same meaning as for previous function. */
22947
22948 static void
22949 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22950 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22951 rtx count,
22952 enum machine_mode mode, bool issetmem)
22953 {
22954 rtx destexp;
22955 rtx srcexp;
22956 rtx countreg;
22957 HOST_WIDE_INT rounded_count;
22958
22959 /* If possible, it is shorter to use rep movs.
22960 TODO: Maybe it is better to move this logic to decide_alg. */
22961 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22962 && (!issetmem || orig_value == const0_rtx))
22963 mode = SImode;
22964
22965 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22966 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22967
22968 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22969 GET_MODE_SIZE (mode)));
22970 if (mode != QImode)
22971 {
22972 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22973 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22974 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22975 }
22976 else
22977 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22978 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22979 {
22980 rounded_count = (INTVAL (count)
22981 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22982 destmem = shallow_copy_rtx (destmem);
22983 set_mem_size (destmem, rounded_count);
22984 }
22985 else if (MEM_SIZE_KNOWN_P (destmem))
22986 clear_mem_size (destmem);
22987
22988 if (issetmem)
22989 {
22990 value = force_reg (mode, gen_lowpart (mode, value));
22991 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22992 }
22993 else
22994 {
22995 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22996 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22997 if (mode != QImode)
22998 {
22999 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23000 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23001 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23002 }
23003 else
23004 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23005 if (CONST_INT_P (count))
23006 {
23007 rounded_count = (INTVAL (count)
23008 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23009 srcmem = shallow_copy_rtx (srcmem);
23010 set_mem_size (srcmem, rounded_count);
23011 }
23012 else
23013 {
23014 if (MEM_SIZE_KNOWN_P (srcmem))
23015 clear_mem_size (srcmem);
23016 }
23017 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23018 destexp, srcexp));
23019 }
23020 }
23021
23022 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23023 DESTMEM.
23024 SRC is passed by pointer to be updated on return.
23025 Return value is updated DST. */
23026 static rtx
23027 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23028 HOST_WIDE_INT size_to_move)
23029 {
23030 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23031 enum insn_code code;
23032 enum machine_mode move_mode;
23033 int piece_size, i;
23034
23035 /* Find the widest mode in which we could perform moves.
23036 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23037 it until move of such size is supported. */
23038 piece_size = 1 << floor_log2 (size_to_move);
23039 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23040 code = optab_handler (mov_optab, move_mode);
23041 while (code == CODE_FOR_nothing && piece_size > 1)
23042 {
23043 piece_size >>= 1;
23044 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23045 code = optab_handler (mov_optab, move_mode);
23046 }
23047
23048 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23049 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23050 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23051 {
23052 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23053 move_mode = mode_for_vector (word_mode, nunits);
23054 code = optab_handler (mov_optab, move_mode);
23055 if (code == CODE_FOR_nothing)
23056 {
23057 move_mode = word_mode;
23058 piece_size = GET_MODE_SIZE (move_mode);
23059 code = optab_handler (mov_optab, move_mode);
23060 }
23061 }
23062 gcc_assert (code != CODE_FOR_nothing);
23063
23064 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23065 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23066
23067 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23068 gcc_assert (size_to_move % piece_size == 0);
23069 adjust = GEN_INT (piece_size);
23070 for (i = 0; i < size_to_move; i += piece_size)
23071 {
23072 /* We move from memory to memory, so we'll need to do it via
23073 a temporary register. */
23074 tempreg = gen_reg_rtx (move_mode);
23075 emit_insn (GEN_FCN (code) (tempreg, src));
23076 emit_insn (GEN_FCN (code) (dst, tempreg));
23077
23078 emit_move_insn (destptr,
23079 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23080 emit_move_insn (srcptr,
23081 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23082
23083 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23084 piece_size);
23085 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23086 piece_size);
23087 }
23088
23089 /* Update DST and SRC rtx. */
23090 *srcmem = src;
23091 return dst;
23092 }
23093
23094 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23095 static void
23096 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23097 rtx destptr, rtx srcptr, rtx count, int max_size)
23098 {
23099 rtx src, dest;
23100 if (CONST_INT_P (count))
23101 {
23102 HOST_WIDE_INT countval = INTVAL (count);
23103 HOST_WIDE_INT epilogue_size = countval % max_size;
23104 int i;
23105
23106 /* For now MAX_SIZE should be a power of 2. This assert could be
23107 relaxed, but it'll require a bit more complicated epilogue
23108 expanding. */
23109 gcc_assert ((max_size & (max_size - 1)) == 0);
23110 for (i = max_size; i >= 1; i >>= 1)
23111 {
23112 if (epilogue_size & i)
23113 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23114 }
23115 return;
23116 }
23117 if (max_size > 8)
23118 {
23119 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23120 count, 1, OPTAB_DIRECT);
23121 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23122 count, QImode, 1, 4, false);
23123 return;
23124 }
23125
23126 /* When there are stringops, we can cheaply increase dest and src pointers.
23127 Otherwise we save code size by maintaining offset (zero is readily
23128 available from preceding rep operation) and using x86 addressing modes.
23129 */
23130 if (TARGET_SINGLE_STRINGOP)
23131 {
23132 if (max_size > 4)
23133 {
23134 rtx label = ix86_expand_aligntest (count, 4, true);
23135 src = change_address (srcmem, SImode, srcptr);
23136 dest = change_address (destmem, SImode, destptr);
23137 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23138 emit_label (label);
23139 LABEL_NUSES (label) = 1;
23140 }
23141 if (max_size > 2)
23142 {
23143 rtx label = ix86_expand_aligntest (count, 2, true);
23144 src = change_address (srcmem, HImode, srcptr);
23145 dest = change_address (destmem, HImode, destptr);
23146 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23147 emit_label (label);
23148 LABEL_NUSES (label) = 1;
23149 }
23150 if (max_size > 1)
23151 {
23152 rtx label = ix86_expand_aligntest (count, 1, true);
23153 src = change_address (srcmem, QImode, srcptr);
23154 dest = change_address (destmem, QImode, destptr);
23155 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23156 emit_label (label);
23157 LABEL_NUSES (label) = 1;
23158 }
23159 }
23160 else
23161 {
23162 rtx offset = force_reg (Pmode, const0_rtx);
23163 rtx tmp;
23164
23165 if (max_size > 4)
23166 {
23167 rtx label = ix86_expand_aligntest (count, 4, true);
23168 src = change_address (srcmem, SImode, srcptr);
23169 dest = change_address (destmem, SImode, destptr);
23170 emit_move_insn (dest, src);
23171 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23172 true, OPTAB_LIB_WIDEN);
23173 if (tmp != offset)
23174 emit_move_insn (offset, tmp);
23175 emit_label (label);
23176 LABEL_NUSES (label) = 1;
23177 }
23178 if (max_size > 2)
23179 {
23180 rtx label = ix86_expand_aligntest (count, 2, true);
23181 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23182 src = change_address (srcmem, HImode, tmp);
23183 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23184 dest = change_address (destmem, HImode, tmp);
23185 emit_move_insn (dest, src);
23186 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23187 true, OPTAB_LIB_WIDEN);
23188 if (tmp != offset)
23189 emit_move_insn (offset, tmp);
23190 emit_label (label);
23191 LABEL_NUSES (label) = 1;
23192 }
23193 if (max_size > 1)
23194 {
23195 rtx label = ix86_expand_aligntest (count, 1, true);
23196 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23197 src = change_address (srcmem, QImode, tmp);
23198 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23199 dest = change_address (destmem, QImode, tmp);
23200 emit_move_insn (dest, src);
23201 emit_label (label);
23202 LABEL_NUSES (label) = 1;
23203 }
23204 }
23205 }
23206
23207 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23208 with value PROMOTED_VAL.
23209 SRC is passed by pointer to be updated on return.
23210 Return value is updated DST. */
23211 static rtx
23212 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23213 HOST_WIDE_INT size_to_move)
23214 {
23215 rtx dst = destmem, adjust;
23216 enum insn_code code;
23217 enum machine_mode move_mode;
23218 int piece_size, i;
23219
23220 /* Find the widest mode in which we could perform moves.
23221 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23222 it until move of such size is supported. */
23223 move_mode = GET_MODE (promoted_val);
23224 if (move_mode == VOIDmode)
23225 move_mode = QImode;
23226 if (size_to_move < GET_MODE_SIZE (move_mode))
23227 {
23228 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23229 promoted_val = gen_lowpart (move_mode, promoted_val);
23230 }
23231 piece_size = GET_MODE_SIZE (move_mode);
23232 code = optab_handler (mov_optab, move_mode);
23233 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23234
23235 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23236
23237 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23238 gcc_assert (size_to_move % piece_size == 0);
23239 adjust = GEN_INT (piece_size);
23240 for (i = 0; i < size_to_move; i += piece_size)
23241 {
23242 if (piece_size <= GET_MODE_SIZE (word_mode))
23243 {
23244 emit_insn (gen_strset (destptr, dst, promoted_val));
23245 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23246 piece_size);
23247 continue;
23248 }
23249
23250 emit_insn (GEN_FCN (code) (dst, promoted_val));
23251
23252 emit_move_insn (destptr,
23253 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23254
23255 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23256 piece_size);
23257 }
23258
23259 /* Update DST rtx. */
23260 return dst;
23261 }
23262 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23263 static void
23264 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23265 rtx count, int max_size)
23266 {
23267 count =
23268 expand_simple_binop (counter_mode (count), AND, count,
23269 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23270 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23271 gen_lowpart (QImode, value), count, QImode,
23272 1, max_size / 2, true);
23273 }
23274
23275 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23276 static void
23277 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23278 rtx count, int max_size)
23279 {
23280 rtx dest;
23281
23282 if (CONST_INT_P (count))
23283 {
23284 HOST_WIDE_INT countval = INTVAL (count);
23285 HOST_WIDE_INT epilogue_size = countval % max_size;
23286 int i;
23287
23288 /* For now MAX_SIZE should be a power of 2. This assert could be
23289 relaxed, but it'll require a bit more complicated epilogue
23290 expanding. */
23291 gcc_assert ((max_size & (max_size - 1)) == 0);
23292 for (i = max_size; i >= 1; i >>= 1)
23293 {
23294 if (epilogue_size & i)
23295 {
23296 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23297 destmem = emit_memset (destmem, destptr, vec_value, i);
23298 else
23299 destmem = emit_memset (destmem, destptr, value, i);
23300 }
23301 }
23302 return;
23303 }
23304 if (max_size > 32)
23305 {
23306 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23307 return;
23308 }
23309 if (max_size > 16)
23310 {
23311 rtx label = ix86_expand_aligntest (count, 16, true);
23312 if (TARGET_64BIT)
23313 {
23314 dest = change_address (destmem, DImode, destptr);
23315 emit_insn (gen_strset (destptr, dest, value));
23316 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23317 emit_insn (gen_strset (destptr, dest, value));
23318 }
23319 else
23320 {
23321 dest = change_address (destmem, SImode, destptr);
23322 emit_insn (gen_strset (destptr, dest, value));
23323 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23324 emit_insn (gen_strset (destptr, dest, value));
23325 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23326 emit_insn (gen_strset (destptr, dest, value));
23327 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23328 emit_insn (gen_strset (destptr, dest, value));
23329 }
23330 emit_label (label);
23331 LABEL_NUSES (label) = 1;
23332 }
23333 if (max_size > 8)
23334 {
23335 rtx label = ix86_expand_aligntest (count, 8, true);
23336 if (TARGET_64BIT)
23337 {
23338 dest = change_address (destmem, DImode, destptr);
23339 emit_insn (gen_strset (destptr, dest, value));
23340 }
23341 else
23342 {
23343 dest = change_address (destmem, SImode, destptr);
23344 emit_insn (gen_strset (destptr, dest, value));
23345 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23346 emit_insn (gen_strset (destptr, dest, value));
23347 }
23348 emit_label (label);
23349 LABEL_NUSES (label) = 1;
23350 }
23351 if (max_size > 4)
23352 {
23353 rtx label = ix86_expand_aligntest (count, 4, true);
23354 dest = change_address (destmem, SImode, destptr);
23355 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23356 emit_label (label);
23357 LABEL_NUSES (label) = 1;
23358 }
23359 if (max_size > 2)
23360 {
23361 rtx label = ix86_expand_aligntest (count, 2, true);
23362 dest = change_address (destmem, HImode, destptr);
23363 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23364 emit_label (label);
23365 LABEL_NUSES (label) = 1;
23366 }
23367 if (max_size > 1)
23368 {
23369 rtx label = ix86_expand_aligntest (count, 1, true);
23370 dest = change_address (destmem, QImode, destptr);
23371 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23372 emit_label (label);
23373 LABEL_NUSES (label) = 1;
23374 }
23375 }
23376
23377 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23378 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23379 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23380 ignored.
23381 Return value is updated DESTMEM. */
23382 static rtx
23383 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23384 rtx destptr, rtx srcptr, rtx value,
23385 rtx vec_value, rtx count, int align,
23386 int desired_alignment, bool issetmem)
23387 {
23388 int i;
23389 for (i = 1; i < desired_alignment; i <<= 1)
23390 {
23391 if (align <= i)
23392 {
23393 rtx label = ix86_expand_aligntest (destptr, i, false);
23394 if (issetmem)
23395 {
23396 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23397 destmem = emit_memset (destmem, destptr, vec_value, i);
23398 else
23399 destmem = emit_memset (destmem, destptr, value, i);
23400 }
23401 else
23402 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23403 ix86_adjust_counter (count, i);
23404 emit_label (label);
23405 LABEL_NUSES (label) = 1;
23406 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23407 }
23408 }
23409 return destmem;
23410 }
23411
23412 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23413 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23414 and jump to DONE_LABEL. */
23415 static void
23416 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23417 rtx destptr, rtx srcptr,
23418 rtx value, rtx vec_value,
23419 rtx count, int size,
23420 rtx done_label, bool issetmem)
23421 {
23422 rtx label = ix86_expand_aligntest (count, size, false);
23423 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23424 rtx modesize;
23425 int n;
23426
23427 /* If we do not have vector value to copy, we must reduce size. */
23428 if (issetmem)
23429 {
23430 if (!vec_value)
23431 {
23432 if (GET_MODE (value) == VOIDmode && size > 8)
23433 mode = Pmode;
23434 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23435 mode = GET_MODE (value);
23436 }
23437 else
23438 mode = GET_MODE (vec_value), value = vec_value;
23439 }
23440 else
23441 {
23442 /* Choose appropriate vector mode. */
23443 if (size >= 32)
23444 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23445 else if (size >= 16)
23446 mode = TARGET_SSE ? V16QImode : DImode;
23447 srcmem = change_address (srcmem, mode, srcptr);
23448 }
23449 destmem = change_address (destmem, mode, destptr);
23450 modesize = GEN_INT (GET_MODE_SIZE (mode));
23451 gcc_assert (GET_MODE_SIZE (mode) <= size);
23452 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23453 {
23454 if (issetmem)
23455 emit_move_insn (destmem, gen_lowpart (mode, value));
23456 else
23457 {
23458 emit_move_insn (destmem, srcmem);
23459 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23460 }
23461 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23462 }
23463
23464 destmem = offset_address (destmem, count, 1);
23465 destmem = offset_address (destmem, GEN_INT (-2 * size),
23466 GET_MODE_SIZE (mode));
23467 if (!issetmem)
23468 {
23469 srcmem = offset_address (srcmem, count, 1);
23470 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23471 GET_MODE_SIZE (mode));
23472 }
23473 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23474 {
23475 if (issetmem)
23476 emit_move_insn (destmem, gen_lowpart (mode, value));
23477 else
23478 {
23479 emit_move_insn (destmem, srcmem);
23480 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23481 }
23482 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23483 }
23484 emit_jump_insn (gen_jump (done_label));
23485 emit_barrier ();
23486
23487 emit_label (label);
23488 LABEL_NUSES (label) = 1;
23489 }
23490
23491 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23492 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23493 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23494 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23495 DONE_LABEL is a label after the whole copying sequence. The label is created
23496 on demand if *DONE_LABEL is NULL.
23497 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23498 bounds after the initial copies.
23499
23500 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23501 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23502 we will dispatch to a library call for large blocks.
23503
23504 In pseudocode we do:
23505
23506 if (COUNT < SIZE)
23507 {
23508 Assume that SIZE is 4. Bigger sizes are handled analogously
23509 if (COUNT & 4)
23510 {
23511 copy 4 bytes from SRCPTR to DESTPTR
23512 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23513 goto done_label
23514 }
23515 if (!COUNT)
23516 goto done_label;
23517 copy 1 byte from SRCPTR to DESTPTR
23518 if (COUNT & 2)
23519 {
23520 copy 2 bytes from SRCPTR to DESTPTR
23521 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23522 }
23523 }
23524 else
23525 {
23526 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23527 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23528
23529 OLD_DESPTR = DESTPTR;
23530 Align DESTPTR up to DESIRED_ALIGN
23531 SRCPTR += DESTPTR - OLD_DESTPTR
23532 COUNT -= DEST_PTR - OLD_DESTPTR
23533 if (DYNAMIC_CHECK)
23534 Round COUNT down to multiple of SIZE
23535 << optional caller supplied zero size guard is here >>
23536 << optional caller suppplied dynamic check is here >>
23537 << caller supplied main copy loop is here >>
23538 }
23539 done_label:
23540 */
23541 static void
23542 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23543 rtx *destptr, rtx *srcptr,
23544 enum machine_mode mode,
23545 rtx value, rtx vec_value,
23546 rtx *count,
23547 rtx *done_label,
23548 int size,
23549 int desired_align,
23550 int align,
23551 unsigned HOST_WIDE_INT *min_size,
23552 bool dynamic_check,
23553 bool issetmem)
23554 {
23555 rtx loop_label = NULL, label;
23556 int n;
23557 rtx modesize;
23558 int prolog_size = 0;
23559 rtx mode_value;
23560
23561 /* Chose proper value to copy. */
23562 if (issetmem && VECTOR_MODE_P (mode))
23563 mode_value = vec_value;
23564 else
23565 mode_value = value;
23566 gcc_assert (GET_MODE_SIZE (mode) <= size);
23567
23568 /* See if block is big or small, handle small blocks. */
23569 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23570 {
23571 int size2 = size;
23572 loop_label = gen_label_rtx ();
23573
23574 if (!*done_label)
23575 *done_label = gen_label_rtx ();
23576
23577 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23578 1, loop_label);
23579 size2 >>= 1;
23580
23581 /* Handle sizes > 3. */
23582 for (;size2 > 2; size2 >>= 1)
23583 expand_small_movmem_or_setmem (destmem, srcmem,
23584 *destptr, *srcptr,
23585 value, vec_value,
23586 *count,
23587 size2, *done_label, issetmem);
23588 /* Nothing to copy? Jump to DONE_LABEL if so */
23589 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23590 1, *done_label);
23591
23592 /* Do a byte copy. */
23593 destmem = change_address (destmem, QImode, *destptr);
23594 if (issetmem)
23595 emit_move_insn (destmem, gen_lowpart (QImode, value));
23596 else
23597 {
23598 srcmem = change_address (srcmem, QImode, *srcptr);
23599 emit_move_insn (destmem, srcmem);
23600 }
23601
23602 /* Handle sizes 2 and 3. */
23603 label = ix86_expand_aligntest (*count, 2, false);
23604 destmem = change_address (destmem, HImode, *destptr);
23605 destmem = offset_address (destmem, *count, 1);
23606 destmem = offset_address (destmem, GEN_INT (-2), 2);
23607 if (issetmem)
23608 emit_move_insn (destmem, gen_lowpart (HImode, value));
23609 else
23610 {
23611 srcmem = change_address (srcmem, HImode, *srcptr);
23612 srcmem = offset_address (srcmem, *count, 1);
23613 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23614 emit_move_insn (destmem, srcmem);
23615 }
23616
23617 emit_label (label);
23618 LABEL_NUSES (label) = 1;
23619 emit_jump_insn (gen_jump (*done_label));
23620 emit_barrier ();
23621 }
23622 else
23623 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23624 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23625
23626 /* Start memcpy for COUNT >= SIZE. */
23627 if (loop_label)
23628 {
23629 emit_label (loop_label);
23630 LABEL_NUSES (loop_label) = 1;
23631 }
23632
23633 /* Copy first desired_align bytes. */
23634 if (!issetmem)
23635 srcmem = change_address (srcmem, mode, *srcptr);
23636 destmem = change_address (destmem, mode, *destptr);
23637 modesize = GEN_INT (GET_MODE_SIZE (mode));
23638 for (n = 0; prolog_size < desired_align - align; n++)
23639 {
23640 if (issetmem)
23641 emit_move_insn (destmem, mode_value);
23642 else
23643 {
23644 emit_move_insn (destmem, srcmem);
23645 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23646 }
23647 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23648 prolog_size += GET_MODE_SIZE (mode);
23649 }
23650
23651
23652 /* Copy last SIZE bytes. */
23653 destmem = offset_address (destmem, *count, 1);
23654 destmem = offset_address (destmem,
23655 GEN_INT (-size - prolog_size),
23656 1);
23657 if (issetmem)
23658 emit_move_insn (destmem, mode_value);
23659 else
23660 {
23661 srcmem = offset_address (srcmem, *count, 1);
23662 srcmem = offset_address (srcmem,
23663 GEN_INT (-size - prolog_size),
23664 1);
23665 emit_move_insn (destmem, srcmem);
23666 }
23667 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23668 {
23669 destmem = offset_address (destmem, modesize, 1);
23670 if (issetmem)
23671 emit_move_insn (destmem, mode_value);
23672 else
23673 {
23674 srcmem = offset_address (srcmem, modesize, 1);
23675 emit_move_insn (destmem, srcmem);
23676 }
23677 }
23678
23679 /* Align destination. */
23680 if (desired_align > 1 && desired_align > align)
23681 {
23682 rtx saveddest = *destptr;
23683
23684 gcc_assert (desired_align <= size);
23685 /* Align destptr up, place it to new register. */
23686 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23687 GEN_INT (prolog_size),
23688 NULL_RTX, 1, OPTAB_DIRECT);
23689 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23690 GEN_INT (-desired_align),
23691 *destptr, 1, OPTAB_DIRECT);
23692 /* See how many bytes we skipped. */
23693 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23694 *destptr,
23695 saveddest, 1, OPTAB_DIRECT);
23696 /* Adjust srcptr and count. */
23697 if (!issetmem)
23698 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23699 *srcptr, 1, OPTAB_DIRECT);
23700 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23701 saveddest, *count, 1, OPTAB_DIRECT);
23702 /* We copied at most size + prolog_size. */
23703 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23704 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23705 else
23706 *min_size = 0;
23707
23708 /* Our loops always round down the bock size, but for dispatch to library
23709 we need precise value. */
23710 if (dynamic_check)
23711 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23712 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23713 }
23714 else
23715 {
23716 gcc_assert (prolog_size == 0);
23717 /* Decrease count, so we won't end up copying last word twice. */
23718 if (!CONST_INT_P (*count))
23719 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23720 constm1_rtx, *count, 1, OPTAB_DIRECT);
23721 else
23722 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23723 if (*min_size)
23724 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23725 }
23726 }
23727
23728
23729 /* This function is like the previous one, except here we know how many bytes
23730 need to be copied. That allows us to update alignment not only of DST, which
23731 is returned, but also of SRC, which is passed as a pointer for that
23732 reason. */
23733 static rtx
23734 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23735 rtx srcreg, rtx value, rtx vec_value,
23736 int desired_align, int align_bytes,
23737 bool issetmem)
23738 {
23739 rtx src = NULL;
23740 rtx orig_dst = dst;
23741 rtx orig_src = NULL;
23742 int piece_size = 1;
23743 int copied_bytes = 0;
23744
23745 if (!issetmem)
23746 {
23747 gcc_assert (srcp != NULL);
23748 src = *srcp;
23749 orig_src = src;
23750 }
23751
23752 for (piece_size = 1;
23753 piece_size <= desired_align && copied_bytes < align_bytes;
23754 piece_size <<= 1)
23755 {
23756 if (align_bytes & piece_size)
23757 {
23758 if (issetmem)
23759 {
23760 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23761 dst = emit_memset (dst, destreg, vec_value, piece_size);
23762 else
23763 dst = emit_memset (dst, destreg, value, piece_size);
23764 }
23765 else
23766 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23767 copied_bytes += piece_size;
23768 }
23769 }
23770 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23771 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23772 if (MEM_SIZE_KNOWN_P (orig_dst))
23773 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23774
23775 if (!issetmem)
23776 {
23777 int src_align_bytes = get_mem_align_offset (src, desired_align
23778 * BITS_PER_UNIT);
23779 if (src_align_bytes >= 0)
23780 src_align_bytes = desired_align - src_align_bytes;
23781 if (src_align_bytes >= 0)
23782 {
23783 unsigned int src_align;
23784 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23785 {
23786 if ((src_align_bytes & (src_align - 1))
23787 == (align_bytes & (src_align - 1)))
23788 break;
23789 }
23790 if (src_align > (unsigned int) desired_align)
23791 src_align = desired_align;
23792 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23793 set_mem_align (src, src_align * BITS_PER_UNIT);
23794 }
23795 if (MEM_SIZE_KNOWN_P (orig_src))
23796 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23797 *srcp = src;
23798 }
23799
23800 return dst;
23801 }
23802
23803 /* Return true if ALG can be used in current context.
23804 Assume we expand memset if MEMSET is true. */
23805 static bool
23806 alg_usable_p (enum stringop_alg alg, bool memset)
23807 {
23808 if (alg == no_stringop)
23809 return false;
23810 if (alg == vector_loop)
23811 return TARGET_SSE || TARGET_AVX;
23812 /* Algorithms using the rep prefix want at least edi and ecx;
23813 additionally, memset wants eax and memcpy wants esi. Don't
23814 consider such algorithms if the user has appropriated those
23815 registers for their own purposes. */
23816 if (alg == rep_prefix_1_byte
23817 || alg == rep_prefix_4_byte
23818 || alg == rep_prefix_8_byte)
23819 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23820 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23821 return true;
23822 }
23823
23824 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23825 static enum stringop_alg
23826 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23827 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23828 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23829 {
23830 const struct stringop_algs * algs;
23831 bool optimize_for_speed;
23832 int max = -1;
23833 const struct processor_costs *cost;
23834 int i;
23835 bool any_alg_usable_p = false;
23836
23837 *noalign = false;
23838 *dynamic_check = -1;
23839
23840 /* Even if the string operation call is cold, we still might spend a lot
23841 of time processing large blocks. */
23842 if (optimize_function_for_size_p (cfun)
23843 || (optimize_insn_for_size_p ()
23844 && (max_size < 256
23845 || (expected_size != -1 && expected_size < 256))))
23846 optimize_for_speed = false;
23847 else
23848 optimize_for_speed = true;
23849
23850 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23851 if (memset)
23852 algs = &cost->memset[TARGET_64BIT != 0];
23853 else
23854 algs = &cost->memcpy[TARGET_64BIT != 0];
23855
23856 /* See maximal size for user defined algorithm. */
23857 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23858 {
23859 enum stringop_alg candidate = algs->size[i].alg;
23860 bool usable = alg_usable_p (candidate, memset);
23861 any_alg_usable_p |= usable;
23862
23863 if (candidate != libcall && candidate && usable)
23864 max = algs->size[i].max;
23865 }
23866
23867 /* If expected size is not known but max size is small enough
23868 so inline version is a win, set expected size into
23869 the range. */
23870 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23871 && expected_size == -1)
23872 expected_size = min_size / 2 + max_size / 2;
23873
23874 /* If user specified the algorithm, honnor it if possible. */
23875 if (ix86_stringop_alg != no_stringop
23876 && alg_usable_p (ix86_stringop_alg, memset))
23877 return ix86_stringop_alg;
23878 /* rep; movq or rep; movl is the smallest variant. */
23879 else if (!optimize_for_speed)
23880 {
23881 *noalign = true;
23882 if (!count || (count & 3) || (memset && !zero_memset))
23883 return alg_usable_p (rep_prefix_1_byte, memset)
23884 ? rep_prefix_1_byte : loop_1_byte;
23885 else
23886 return alg_usable_p (rep_prefix_4_byte, memset)
23887 ? rep_prefix_4_byte : loop;
23888 }
23889 /* Very tiny blocks are best handled via the loop, REP is expensive to
23890 setup. */
23891 else if (expected_size != -1 && expected_size < 4)
23892 return loop_1_byte;
23893 else if (expected_size != -1)
23894 {
23895 enum stringop_alg alg = libcall;
23896 bool alg_noalign = false;
23897 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23898 {
23899 /* We get here if the algorithms that were not libcall-based
23900 were rep-prefix based and we are unable to use rep prefixes
23901 based on global register usage. Break out of the loop and
23902 use the heuristic below. */
23903 if (algs->size[i].max == 0)
23904 break;
23905 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23906 {
23907 enum stringop_alg candidate = algs->size[i].alg;
23908
23909 if (candidate != libcall && alg_usable_p (candidate, memset))
23910 {
23911 alg = candidate;
23912 alg_noalign = algs->size[i].noalign;
23913 }
23914 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23915 last non-libcall inline algorithm. */
23916 if (TARGET_INLINE_ALL_STRINGOPS)
23917 {
23918 /* When the current size is best to be copied by a libcall,
23919 but we are still forced to inline, run the heuristic below
23920 that will pick code for medium sized blocks. */
23921 if (alg != libcall)
23922 {
23923 *noalign = alg_noalign;
23924 return alg;
23925 }
23926 break;
23927 }
23928 else if (alg_usable_p (candidate, memset))
23929 {
23930 *noalign = algs->size[i].noalign;
23931 return candidate;
23932 }
23933 }
23934 }
23935 }
23936 /* When asked to inline the call anyway, try to pick meaningful choice.
23937 We look for maximal size of block that is faster to copy by hand and
23938 take blocks of at most of that size guessing that average size will
23939 be roughly half of the block.
23940
23941 If this turns out to be bad, we might simply specify the preferred
23942 choice in ix86_costs. */
23943 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23944 && (algs->unknown_size == libcall
23945 || !alg_usable_p (algs->unknown_size, memset)))
23946 {
23947 enum stringop_alg alg;
23948
23949 /* If there aren't any usable algorithms, then recursing on
23950 smaller sizes isn't going to find anything. Just return the
23951 simple byte-at-a-time copy loop. */
23952 if (!any_alg_usable_p)
23953 {
23954 /* Pick something reasonable. */
23955 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23956 *dynamic_check = 128;
23957 return loop_1_byte;
23958 }
23959 if (max == -1)
23960 max = 4096;
23961 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23962 zero_memset, dynamic_check, noalign);
23963 gcc_assert (*dynamic_check == -1);
23964 gcc_assert (alg != libcall);
23965 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23966 *dynamic_check = max;
23967 return alg;
23968 }
23969 return (alg_usable_p (algs->unknown_size, memset)
23970 ? algs->unknown_size : libcall);
23971 }
23972
23973 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23974 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23975 static int
23976 decide_alignment (int align,
23977 enum stringop_alg alg,
23978 int expected_size,
23979 enum machine_mode move_mode)
23980 {
23981 int desired_align = 0;
23982
23983 gcc_assert (alg != no_stringop);
23984
23985 if (alg == libcall)
23986 return 0;
23987 if (move_mode == VOIDmode)
23988 return 0;
23989
23990 desired_align = GET_MODE_SIZE (move_mode);
23991 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23992 copying whole cacheline at once. */
23993 if (TARGET_PENTIUMPRO
23994 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23995 desired_align = 8;
23996
23997 if (optimize_size)
23998 desired_align = 1;
23999 if (desired_align < align)
24000 desired_align = align;
24001 if (expected_size != -1 && expected_size < 4)
24002 desired_align = align;
24003
24004 return desired_align;
24005 }
24006
24007
24008 /* Helper function for memcpy. For QImode value 0xXY produce
24009 0xXYXYXYXY of wide specified by MODE. This is essentially
24010 a * 0x10101010, but we can do slightly better than
24011 synth_mult by unwinding the sequence by hand on CPUs with
24012 slow multiply. */
24013 static rtx
24014 promote_duplicated_reg (enum machine_mode mode, rtx val)
24015 {
24016 enum machine_mode valmode = GET_MODE (val);
24017 rtx tmp;
24018 int nops = mode == DImode ? 3 : 2;
24019
24020 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24021 if (val == const0_rtx)
24022 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24023 if (CONST_INT_P (val))
24024 {
24025 HOST_WIDE_INT v = INTVAL (val) & 255;
24026
24027 v |= v << 8;
24028 v |= v << 16;
24029 if (mode == DImode)
24030 v |= (v << 16) << 16;
24031 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24032 }
24033
24034 if (valmode == VOIDmode)
24035 valmode = QImode;
24036 if (valmode != QImode)
24037 val = gen_lowpart (QImode, val);
24038 if (mode == QImode)
24039 return val;
24040 if (!TARGET_PARTIAL_REG_STALL)
24041 nops--;
24042 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24043 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24044 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24045 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24046 {
24047 rtx reg = convert_modes (mode, QImode, val, true);
24048 tmp = promote_duplicated_reg (mode, const1_rtx);
24049 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24050 OPTAB_DIRECT);
24051 }
24052 else
24053 {
24054 rtx reg = convert_modes (mode, QImode, val, true);
24055
24056 if (!TARGET_PARTIAL_REG_STALL)
24057 if (mode == SImode)
24058 emit_insn (gen_movsi_insv_1 (reg, reg));
24059 else
24060 emit_insn (gen_movdi_insv_1 (reg, reg));
24061 else
24062 {
24063 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24064 NULL, 1, OPTAB_DIRECT);
24065 reg =
24066 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24067 }
24068 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24069 NULL, 1, OPTAB_DIRECT);
24070 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24071 if (mode == SImode)
24072 return reg;
24073 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24074 NULL, 1, OPTAB_DIRECT);
24075 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24076 return reg;
24077 }
24078 }
24079
24080 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24081 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24082 alignment from ALIGN to DESIRED_ALIGN. */
24083 static rtx
24084 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24085 int align)
24086 {
24087 rtx promoted_val;
24088
24089 if (TARGET_64BIT
24090 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24091 promoted_val = promote_duplicated_reg (DImode, val);
24092 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24093 promoted_val = promote_duplicated_reg (SImode, val);
24094 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24095 promoted_val = promote_duplicated_reg (HImode, val);
24096 else
24097 promoted_val = val;
24098
24099 return promoted_val;
24100 }
24101
24102 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24103 operations when profitable. The code depends upon architecture, block size
24104 and alignment, but always has one of the following overall structures:
24105
24106 Aligned move sequence:
24107
24108 1) Prologue guard: Conditional that jumps up to epilogues for small
24109 blocks that can be handled by epilogue alone. This is faster
24110 but also needed for correctness, since prologue assume the block
24111 is larger than the desired alignment.
24112
24113 Optional dynamic check for size and libcall for large
24114 blocks is emitted here too, with -minline-stringops-dynamically.
24115
24116 2) Prologue: copy first few bytes in order to get destination
24117 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24118 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24119 copied. We emit either a jump tree on power of two sized
24120 blocks, or a byte loop.
24121
24122 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24123 with specified algorithm.
24124
24125 4) Epilogue: code copying tail of the block that is too small to be
24126 handled by main body (or up to size guarded by prologue guard).
24127
24128 Misaligned move sequence
24129
24130 1) missaligned move prologue/epilogue containing:
24131 a) Prologue handling small memory blocks and jumping to done_label
24132 (skipped if blocks are known to be large enough)
24133 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24134 needed by single possibly misaligned move
24135 (skipped if alignment is not needed)
24136 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24137
24138 2) Zero size guard dispatching to done_label, if needed
24139
24140 3) dispatch to library call, if needed,
24141
24142 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24143 with specified algorithm. */
24144 bool
24145 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24146 rtx align_exp, rtx expected_align_exp,
24147 rtx expected_size_exp, rtx min_size_exp,
24148 rtx max_size_exp, rtx probable_max_size_exp,
24149 bool issetmem)
24150 {
24151 rtx destreg;
24152 rtx srcreg = NULL;
24153 rtx label = NULL;
24154 rtx tmp;
24155 rtx jump_around_label = NULL;
24156 HOST_WIDE_INT align = 1;
24157 unsigned HOST_WIDE_INT count = 0;
24158 HOST_WIDE_INT expected_size = -1;
24159 int size_needed = 0, epilogue_size_needed;
24160 int desired_align = 0, align_bytes = 0;
24161 enum stringop_alg alg;
24162 rtx promoted_val = NULL;
24163 rtx vec_promoted_val = NULL;
24164 bool force_loopy_epilogue = false;
24165 int dynamic_check;
24166 bool need_zero_guard = false;
24167 bool noalign;
24168 enum machine_mode move_mode = VOIDmode;
24169 int unroll_factor = 1;
24170 /* TODO: Once value ranges are available, fill in proper data. */
24171 unsigned HOST_WIDE_INT min_size = 0;
24172 unsigned HOST_WIDE_INT max_size = -1;
24173 unsigned HOST_WIDE_INT probable_max_size = -1;
24174 bool misaligned_prologue_used = false;
24175
24176 if (CONST_INT_P (align_exp))
24177 align = INTVAL (align_exp);
24178 /* i386 can do misaligned access on reasonably increased cost. */
24179 if (CONST_INT_P (expected_align_exp)
24180 && INTVAL (expected_align_exp) > align)
24181 align = INTVAL (expected_align_exp);
24182 /* ALIGN is the minimum of destination and source alignment, but we care here
24183 just about destination alignment. */
24184 else if (!issetmem
24185 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24186 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24187
24188 if (CONST_INT_P (count_exp))
24189 {
24190 min_size = max_size = probable_max_size = count = expected_size
24191 = INTVAL (count_exp);
24192 /* When COUNT is 0, there is nothing to do. */
24193 if (!count)
24194 return true;
24195 }
24196 else
24197 {
24198 if (min_size_exp)
24199 min_size = INTVAL (min_size_exp);
24200 if (max_size_exp)
24201 max_size = INTVAL (max_size_exp);
24202 if (probable_max_size_exp)
24203 probable_max_size = INTVAL (probable_max_size_exp);
24204 if (CONST_INT_P (expected_size_exp))
24205 expected_size = INTVAL (expected_size_exp);
24206 }
24207
24208 /* Make sure we don't need to care about overflow later on. */
24209 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24210 return false;
24211
24212 /* Step 0: Decide on preferred algorithm, desired alignment and
24213 size of chunks to be copied by main loop. */
24214 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24215 issetmem,
24216 issetmem && val_exp == const0_rtx,
24217 &dynamic_check, &noalign);
24218 if (alg == libcall)
24219 return false;
24220 gcc_assert (alg != no_stringop);
24221
24222 /* For now vector-version of memset is generated only for memory zeroing, as
24223 creating of promoted vector value is very cheap in this case. */
24224 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24225 alg = unrolled_loop;
24226
24227 if (!count)
24228 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24229 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24230 if (!issetmem)
24231 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24232
24233 unroll_factor = 1;
24234 move_mode = word_mode;
24235 switch (alg)
24236 {
24237 case libcall:
24238 case no_stringop:
24239 case last_alg:
24240 gcc_unreachable ();
24241 case loop_1_byte:
24242 need_zero_guard = true;
24243 move_mode = QImode;
24244 break;
24245 case loop:
24246 need_zero_guard = true;
24247 break;
24248 case unrolled_loop:
24249 need_zero_guard = true;
24250 unroll_factor = (TARGET_64BIT ? 4 : 2);
24251 break;
24252 case vector_loop:
24253 need_zero_guard = true;
24254 unroll_factor = 4;
24255 /* Find the widest supported mode. */
24256 move_mode = word_mode;
24257 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24258 != CODE_FOR_nothing)
24259 move_mode = GET_MODE_WIDER_MODE (move_mode);
24260
24261 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24262 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24263 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24264 {
24265 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24266 move_mode = mode_for_vector (word_mode, nunits);
24267 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24268 move_mode = word_mode;
24269 }
24270 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24271 break;
24272 case rep_prefix_8_byte:
24273 move_mode = DImode;
24274 break;
24275 case rep_prefix_4_byte:
24276 move_mode = SImode;
24277 break;
24278 case rep_prefix_1_byte:
24279 move_mode = QImode;
24280 break;
24281 }
24282 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24283 epilogue_size_needed = size_needed;
24284
24285 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24286 if (!TARGET_ALIGN_STRINGOPS || noalign)
24287 align = desired_align;
24288
24289 /* Step 1: Prologue guard. */
24290
24291 /* Alignment code needs count to be in register. */
24292 if (CONST_INT_P (count_exp) && desired_align > align)
24293 {
24294 if (INTVAL (count_exp) > desired_align
24295 && INTVAL (count_exp) > size_needed)
24296 {
24297 align_bytes
24298 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24299 if (align_bytes <= 0)
24300 align_bytes = 0;
24301 else
24302 align_bytes = desired_align - align_bytes;
24303 }
24304 if (align_bytes == 0)
24305 count_exp = force_reg (counter_mode (count_exp), count_exp);
24306 }
24307 gcc_assert (desired_align >= 1 && align >= 1);
24308
24309 /* Misaligned move sequences handle both prologue and epilogue at once.
24310 Default code generation results in a smaller code for large alignments
24311 and also avoids redundant job when sizes are known precisely. */
24312 misaligned_prologue_used
24313 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24314 && MAX (desired_align, epilogue_size_needed) <= 32
24315 && desired_align <= epilogue_size_needed
24316 && ((desired_align > align && !align_bytes)
24317 || (!count && epilogue_size_needed > 1)));
24318
24319 /* Do the cheap promotion to allow better CSE across the
24320 main loop and epilogue (ie one load of the big constant in the
24321 front of all code.
24322 For now the misaligned move sequences do not have fast path
24323 without broadcasting. */
24324 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24325 {
24326 if (alg == vector_loop)
24327 {
24328 gcc_assert (val_exp == const0_rtx);
24329 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24330 promoted_val = promote_duplicated_reg_to_size (val_exp,
24331 GET_MODE_SIZE (word_mode),
24332 desired_align, align);
24333 }
24334 else
24335 {
24336 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24337 desired_align, align);
24338 }
24339 }
24340 /* Misaligned move sequences handles both prologues and epilogues at once.
24341 Default code generation results in smaller code for large alignments and
24342 also avoids redundant job when sizes are known precisely. */
24343 if (misaligned_prologue_used)
24344 {
24345 /* Misaligned move prologue handled small blocks by itself. */
24346 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24347 (dst, src, &destreg, &srcreg,
24348 move_mode, promoted_val, vec_promoted_val,
24349 &count_exp,
24350 &jump_around_label,
24351 desired_align < align
24352 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24353 desired_align, align, &min_size, dynamic_check, issetmem);
24354 if (!issetmem)
24355 src = change_address (src, BLKmode, srcreg);
24356 dst = change_address (dst, BLKmode, destreg);
24357 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24358 epilogue_size_needed = 0;
24359 if (need_zero_guard && !min_size)
24360 {
24361 /* It is possible that we copied enough so the main loop will not
24362 execute. */
24363 gcc_assert (size_needed > 1);
24364 if (jump_around_label == NULL_RTX)
24365 jump_around_label = gen_label_rtx ();
24366 emit_cmp_and_jump_insns (count_exp,
24367 GEN_INT (size_needed),
24368 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24369 if (expected_size == -1
24370 || expected_size < (desired_align - align) / 2 + size_needed)
24371 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24372 else
24373 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24374 }
24375 }
24376 /* Ensure that alignment prologue won't copy past end of block. */
24377 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24378 {
24379 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24380 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24381 Make sure it is power of 2. */
24382 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24383
24384 /* To improve performance of small blocks, we jump around the VAL
24385 promoting mode. This mean that if the promoted VAL is not constant,
24386 we might not use it in the epilogue and have to use byte
24387 loop variant. */
24388 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24389 force_loopy_epilogue = true;
24390 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24391 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24392 {
24393 /* If main algorithm works on QImode, no epilogue is needed.
24394 For small sizes just don't align anything. */
24395 if (size_needed == 1)
24396 desired_align = align;
24397 else
24398 goto epilogue;
24399 }
24400 else if (!count
24401 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24402 {
24403 label = gen_label_rtx ();
24404 emit_cmp_and_jump_insns (count_exp,
24405 GEN_INT (epilogue_size_needed),
24406 LTU, 0, counter_mode (count_exp), 1, label);
24407 if (expected_size == -1 || expected_size < epilogue_size_needed)
24408 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24409 else
24410 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24411 }
24412 }
24413
24414 /* Emit code to decide on runtime whether library call or inline should be
24415 used. */
24416 if (dynamic_check != -1)
24417 {
24418 if (!issetmem && CONST_INT_P (count_exp))
24419 {
24420 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24421 {
24422 emit_block_move_via_libcall (dst, src, count_exp, false);
24423 count_exp = const0_rtx;
24424 goto epilogue;
24425 }
24426 }
24427 else
24428 {
24429 rtx hot_label = gen_label_rtx ();
24430 if (jump_around_label == NULL_RTX)
24431 jump_around_label = gen_label_rtx ();
24432 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24433 LEU, 0, counter_mode (count_exp),
24434 1, hot_label);
24435 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24436 if (issetmem)
24437 set_storage_via_libcall (dst, count_exp, val_exp, false);
24438 else
24439 emit_block_move_via_libcall (dst, src, count_exp, false);
24440 emit_jump (jump_around_label);
24441 emit_label (hot_label);
24442 }
24443 }
24444
24445 /* Step 2: Alignment prologue. */
24446 /* Do the expensive promotion once we branched off the small blocks. */
24447 if (issetmem && !promoted_val)
24448 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24449 desired_align, align);
24450
24451 if (desired_align > align && !misaligned_prologue_used)
24452 {
24453 if (align_bytes == 0)
24454 {
24455 /* Except for the first move in prologue, we no longer know
24456 constant offset in aliasing info. It don't seems to worth
24457 the pain to maintain it for the first move, so throw away
24458 the info early. */
24459 dst = change_address (dst, BLKmode, destreg);
24460 if (!issetmem)
24461 src = change_address (src, BLKmode, srcreg);
24462 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24463 promoted_val, vec_promoted_val,
24464 count_exp, align, desired_align,
24465 issetmem);
24466 /* At most desired_align - align bytes are copied. */
24467 if (min_size < (unsigned)(desired_align - align))
24468 min_size = 0;
24469 else
24470 min_size -= desired_align - align;
24471 }
24472 else
24473 {
24474 /* If we know how many bytes need to be stored before dst is
24475 sufficiently aligned, maintain aliasing info accurately. */
24476 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24477 srcreg,
24478 promoted_val,
24479 vec_promoted_val,
24480 desired_align,
24481 align_bytes,
24482 issetmem);
24483
24484 count_exp = plus_constant (counter_mode (count_exp),
24485 count_exp, -align_bytes);
24486 count -= align_bytes;
24487 min_size -= align_bytes;
24488 max_size -= align_bytes;
24489 }
24490 if (need_zero_guard
24491 && !min_size
24492 && (count < (unsigned HOST_WIDE_INT) size_needed
24493 || (align_bytes == 0
24494 && count < ((unsigned HOST_WIDE_INT) size_needed
24495 + desired_align - align))))
24496 {
24497 /* It is possible that we copied enough so the main loop will not
24498 execute. */
24499 gcc_assert (size_needed > 1);
24500 if (label == NULL_RTX)
24501 label = gen_label_rtx ();
24502 emit_cmp_and_jump_insns (count_exp,
24503 GEN_INT (size_needed),
24504 LTU, 0, counter_mode (count_exp), 1, label);
24505 if (expected_size == -1
24506 || expected_size < (desired_align - align) / 2 + size_needed)
24507 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24508 else
24509 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24510 }
24511 }
24512 if (label && size_needed == 1)
24513 {
24514 emit_label (label);
24515 LABEL_NUSES (label) = 1;
24516 label = NULL;
24517 epilogue_size_needed = 1;
24518 if (issetmem)
24519 promoted_val = val_exp;
24520 }
24521 else if (label == NULL_RTX && !misaligned_prologue_used)
24522 epilogue_size_needed = size_needed;
24523
24524 /* Step 3: Main loop. */
24525
24526 switch (alg)
24527 {
24528 case libcall:
24529 case no_stringop:
24530 case last_alg:
24531 gcc_unreachable ();
24532 case loop_1_byte:
24533 case loop:
24534 case unrolled_loop:
24535 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24536 count_exp, move_mode, unroll_factor,
24537 expected_size, issetmem);
24538 break;
24539 case vector_loop:
24540 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24541 vec_promoted_val, count_exp, move_mode,
24542 unroll_factor, expected_size, issetmem);
24543 break;
24544 case rep_prefix_8_byte:
24545 case rep_prefix_4_byte:
24546 case rep_prefix_1_byte:
24547 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24548 val_exp, count_exp, move_mode, issetmem);
24549 break;
24550 }
24551 /* Adjust properly the offset of src and dest memory for aliasing. */
24552 if (CONST_INT_P (count_exp))
24553 {
24554 if (!issetmem)
24555 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24556 (count / size_needed) * size_needed);
24557 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24558 (count / size_needed) * size_needed);
24559 }
24560 else
24561 {
24562 if (!issetmem)
24563 src = change_address (src, BLKmode, srcreg);
24564 dst = change_address (dst, BLKmode, destreg);
24565 }
24566
24567 /* Step 4: Epilogue to copy the remaining bytes. */
24568 epilogue:
24569 if (label)
24570 {
24571 /* When the main loop is done, COUNT_EXP might hold original count,
24572 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24573 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24574 bytes. Compensate if needed. */
24575
24576 if (size_needed < epilogue_size_needed)
24577 {
24578 tmp =
24579 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24580 GEN_INT (size_needed - 1), count_exp, 1,
24581 OPTAB_DIRECT);
24582 if (tmp != count_exp)
24583 emit_move_insn (count_exp, tmp);
24584 }
24585 emit_label (label);
24586 LABEL_NUSES (label) = 1;
24587 }
24588
24589 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24590 {
24591 if (force_loopy_epilogue)
24592 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24593 epilogue_size_needed);
24594 else
24595 {
24596 if (issetmem)
24597 expand_setmem_epilogue (dst, destreg, promoted_val,
24598 vec_promoted_val, count_exp,
24599 epilogue_size_needed);
24600 else
24601 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24602 epilogue_size_needed);
24603 }
24604 }
24605 if (jump_around_label)
24606 emit_label (jump_around_label);
24607 return true;
24608 }
24609
24610
24611 /* Expand the appropriate insns for doing strlen if not just doing
24612 repnz; scasb
24613
24614 out = result, initialized with the start address
24615 align_rtx = alignment of the address.
24616 scratch = scratch register, initialized with the startaddress when
24617 not aligned, otherwise undefined
24618
24619 This is just the body. It needs the initializations mentioned above and
24620 some address computing at the end. These things are done in i386.md. */
24621
24622 static void
24623 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24624 {
24625 int align;
24626 rtx tmp;
24627 rtx align_2_label = NULL_RTX;
24628 rtx align_3_label = NULL_RTX;
24629 rtx align_4_label = gen_label_rtx ();
24630 rtx end_0_label = gen_label_rtx ();
24631 rtx mem;
24632 rtx tmpreg = gen_reg_rtx (SImode);
24633 rtx scratch = gen_reg_rtx (SImode);
24634 rtx cmp;
24635
24636 align = 0;
24637 if (CONST_INT_P (align_rtx))
24638 align = INTVAL (align_rtx);
24639
24640 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24641
24642 /* Is there a known alignment and is it less than 4? */
24643 if (align < 4)
24644 {
24645 rtx scratch1 = gen_reg_rtx (Pmode);
24646 emit_move_insn (scratch1, out);
24647 /* Is there a known alignment and is it not 2? */
24648 if (align != 2)
24649 {
24650 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24651 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24652
24653 /* Leave just the 3 lower bits. */
24654 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24655 NULL_RTX, 0, OPTAB_WIDEN);
24656
24657 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24658 Pmode, 1, align_4_label);
24659 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24660 Pmode, 1, align_2_label);
24661 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24662 Pmode, 1, align_3_label);
24663 }
24664 else
24665 {
24666 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24667 check if is aligned to 4 - byte. */
24668
24669 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24670 NULL_RTX, 0, OPTAB_WIDEN);
24671
24672 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24673 Pmode, 1, align_4_label);
24674 }
24675
24676 mem = change_address (src, QImode, out);
24677
24678 /* Now compare the bytes. */
24679
24680 /* Compare the first n unaligned byte on a byte per byte basis. */
24681 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24682 QImode, 1, end_0_label);
24683
24684 /* Increment the address. */
24685 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24686
24687 /* Not needed with an alignment of 2 */
24688 if (align != 2)
24689 {
24690 emit_label (align_2_label);
24691
24692 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24693 end_0_label);
24694
24695 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24696
24697 emit_label (align_3_label);
24698 }
24699
24700 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24701 end_0_label);
24702
24703 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24704 }
24705
24706 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24707 align this loop. It gives only huge programs, but does not help to
24708 speed up. */
24709 emit_label (align_4_label);
24710
24711 mem = change_address (src, SImode, out);
24712 emit_move_insn (scratch, mem);
24713 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24714
24715 /* This formula yields a nonzero result iff one of the bytes is zero.
24716 This saves three branches inside loop and many cycles. */
24717
24718 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24719 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24720 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24721 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24722 gen_int_mode (0x80808080, SImode)));
24723 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24724 align_4_label);
24725
24726 if (TARGET_CMOVE)
24727 {
24728 rtx reg = gen_reg_rtx (SImode);
24729 rtx reg2 = gen_reg_rtx (Pmode);
24730 emit_move_insn (reg, tmpreg);
24731 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24732
24733 /* If zero is not in the first two bytes, move two bytes forward. */
24734 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24735 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24736 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24737 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24738 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24739 reg,
24740 tmpreg)));
24741 /* Emit lea manually to avoid clobbering of flags. */
24742 emit_insn (gen_rtx_SET (SImode, reg2,
24743 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24744
24745 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24746 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24747 emit_insn (gen_rtx_SET (VOIDmode, out,
24748 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24749 reg2,
24750 out)));
24751 }
24752 else
24753 {
24754 rtx end_2_label = gen_label_rtx ();
24755 /* Is zero in the first two bytes? */
24756
24757 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24758 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24759 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24760 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24761 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24762 pc_rtx);
24763 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24764 JUMP_LABEL (tmp) = end_2_label;
24765
24766 /* Not in the first two. Move two bytes forward. */
24767 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24768 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24769
24770 emit_label (end_2_label);
24771
24772 }
24773
24774 /* Avoid branch in fixing the byte. */
24775 tmpreg = gen_lowpart (QImode, tmpreg);
24776 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24777 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24778 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24779 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24780
24781 emit_label (end_0_label);
24782 }
24783
24784 /* Expand strlen. */
24785
24786 bool
24787 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24788 {
24789 rtx addr, scratch1, scratch2, scratch3, scratch4;
24790
24791 /* The generic case of strlen expander is long. Avoid it's
24792 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24793
24794 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24795 && !TARGET_INLINE_ALL_STRINGOPS
24796 && !optimize_insn_for_size_p ()
24797 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24798 return false;
24799
24800 addr = force_reg (Pmode, XEXP (src, 0));
24801 scratch1 = gen_reg_rtx (Pmode);
24802
24803 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24804 && !optimize_insn_for_size_p ())
24805 {
24806 /* Well it seems that some optimizer does not combine a call like
24807 foo(strlen(bar), strlen(bar));
24808 when the move and the subtraction is done here. It does calculate
24809 the length just once when these instructions are done inside of
24810 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24811 often used and I use one fewer register for the lifetime of
24812 output_strlen_unroll() this is better. */
24813
24814 emit_move_insn (out, addr);
24815
24816 ix86_expand_strlensi_unroll_1 (out, src, align);
24817
24818 /* strlensi_unroll_1 returns the address of the zero at the end of
24819 the string, like memchr(), so compute the length by subtracting
24820 the start address. */
24821 emit_insn (ix86_gen_sub3 (out, out, addr));
24822 }
24823 else
24824 {
24825 rtx unspec;
24826
24827 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24828 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24829 return false;
24830
24831 scratch2 = gen_reg_rtx (Pmode);
24832 scratch3 = gen_reg_rtx (Pmode);
24833 scratch4 = force_reg (Pmode, constm1_rtx);
24834
24835 emit_move_insn (scratch3, addr);
24836 eoschar = force_reg (QImode, eoschar);
24837
24838 src = replace_equiv_address_nv (src, scratch3);
24839
24840 /* If .md starts supporting :P, this can be done in .md. */
24841 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24842 scratch4), UNSPEC_SCAS);
24843 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24844 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24845 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24846 }
24847 return true;
24848 }
24849
24850 /* For given symbol (function) construct code to compute address of it's PLT
24851 entry in large x86-64 PIC model. */
24852 static rtx
24853 construct_plt_address (rtx symbol)
24854 {
24855 rtx tmp, unspec;
24856
24857 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24858 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24859 gcc_assert (Pmode == DImode);
24860
24861 tmp = gen_reg_rtx (Pmode);
24862 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24863
24864 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24865 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24866 return tmp;
24867 }
24868
24869 rtx
24870 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24871 rtx callarg2,
24872 rtx pop, bool sibcall)
24873 {
24874 unsigned int const cregs_size
24875 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24876 rtx vec[3 + cregs_size];
24877 rtx use = NULL, call;
24878 unsigned int vec_len = 0;
24879
24880 if (pop == const0_rtx)
24881 pop = NULL;
24882 gcc_assert (!TARGET_64BIT || !pop);
24883
24884 if (TARGET_MACHO && !TARGET_64BIT)
24885 {
24886 #if TARGET_MACHO
24887 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24888 fnaddr = machopic_indirect_call_target (fnaddr);
24889 #endif
24890 }
24891 else
24892 {
24893 /* Static functions and indirect calls don't need the pic register. */
24894 if (flag_pic
24895 && (!TARGET_64BIT
24896 || (ix86_cmodel == CM_LARGE_PIC
24897 && DEFAULT_ABI != MS_ABI))
24898 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24899 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24900 use_reg (&use, pic_offset_table_rtx);
24901 }
24902
24903 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24904 {
24905 rtx al = gen_rtx_REG (QImode, AX_REG);
24906 emit_move_insn (al, callarg2);
24907 use_reg (&use, al);
24908 }
24909
24910 if (ix86_cmodel == CM_LARGE_PIC
24911 && !TARGET_PECOFF
24912 && MEM_P (fnaddr)
24913 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24914 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24915 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24916 else if (sibcall
24917 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24918 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24919 {
24920 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24921 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24922 }
24923
24924 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24925 if (retval)
24926 call = gen_rtx_SET (VOIDmode, retval, call);
24927 vec[vec_len++] = call;
24928
24929 if (pop)
24930 {
24931 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24932 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24933 vec[vec_len++] = pop;
24934 }
24935
24936 if (TARGET_64BIT_MS_ABI
24937 && (!callarg2 || INTVAL (callarg2) != -2))
24938 {
24939 unsigned i;
24940
24941 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24942 UNSPEC_MS_TO_SYSV_CALL);
24943
24944 for (i = 0; i < cregs_size; i++)
24945 {
24946 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24947 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24948
24949 vec[vec_len++]
24950 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24951 }
24952 }
24953
24954 if (vec_len > 1)
24955 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24956 call = emit_call_insn (call);
24957 if (use)
24958 CALL_INSN_FUNCTION_USAGE (call) = use;
24959
24960 return call;
24961 }
24962
24963 /* Output the assembly for a call instruction. */
24964
24965 const char *
24966 ix86_output_call_insn (rtx insn, rtx call_op)
24967 {
24968 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24969 bool seh_nop_p = false;
24970 const char *xasm;
24971
24972 if (SIBLING_CALL_P (insn))
24973 {
24974 if (direct_p)
24975 xasm = "jmp\t%P0";
24976 /* SEH epilogue detection requires the indirect branch case
24977 to include REX.W. */
24978 else if (TARGET_SEH)
24979 xasm = "rex.W jmp %A0";
24980 else
24981 xasm = "jmp\t%A0";
24982
24983 output_asm_insn (xasm, &call_op);
24984 return "";
24985 }
24986
24987 /* SEH unwinding can require an extra nop to be emitted in several
24988 circumstances. Determine if we have one of those. */
24989 if (TARGET_SEH)
24990 {
24991 rtx i;
24992
24993 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24994 {
24995 /* If we get to another real insn, we don't need the nop. */
24996 if (INSN_P (i))
24997 break;
24998
24999 /* If we get to the epilogue note, prevent a catch region from
25000 being adjacent to the standard epilogue sequence. If non-
25001 call-exceptions, we'll have done this during epilogue emission. */
25002 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25003 && !flag_non_call_exceptions
25004 && !can_throw_internal (insn))
25005 {
25006 seh_nop_p = true;
25007 break;
25008 }
25009 }
25010
25011 /* If we didn't find a real insn following the call, prevent the
25012 unwinder from looking into the next function. */
25013 if (i == NULL)
25014 seh_nop_p = true;
25015 }
25016
25017 if (direct_p)
25018 xasm = "call\t%P0";
25019 else
25020 xasm = "call\t%A0";
25021
25022 output_asm_insn (xasm, &call_op);
25023
25024 if (seh_nop_p)
25025 return "nop";
25026
25027 return "";
25028 }
25029 \f
25030 /* Clear stack slot assignments remembered from previous functions.
25031 This is called from INIT_EXPANDERS once before RTL is emitted for each
25032 function. */
25033
25034 static struct machine_function *
25035 ix86_init_machine_status (void)
25036 {
25037 struct machine_function *f;
25038
25039 f = ggc_cleared_alloc<machine_function> ();
25040 f->use_fast_prologue_epilogue_nregs = -1;
25041 f->call_abi = ix86_abi;
25042
25043 return f;
25044 }
25045
25046 /* Return a MEM corresponding to a stack slot with mode MODE.
25047 Allocate a new slot if necessary.
25048
25049 The RTL for a function can have several slots available: N is
25050 which slot to use. */
25051
25052 rtx
25053 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25054 {
25055 struct stack_local_entry *s;
25056
25057 gcc_assert (n < MAX_386_STACK_LOCALS);
25058
25059 for (s = ix86_stack_locals; s; s = s->next)
25060 if (s->mode == mode && s->n == n)
25061 return validize_mem (copy_rtx (s->rtl));
25062
25063 s = ggc_alloc<stack_local_entry> ();
25064 s->n = n;
25065 s->mode = mode;
25066 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25067
25068 s->next = ix86_stack_locals;
25069 ix86_stack_locals = s;
25070 return validize_mem (s->rtl);
25071 }
25072
25073 static void
25074 ix86_instantiate_decls (void)
25075 {
25076 struct stack_local_entry *s;
25077
25078 for (s = ix86_stack_locals; s; s = s->next)
25079 if (s->rtl != NULL_RTX)
25080 instantiate_decl_rtl (s->rtl);
25081 }
25082 \f
25083 /* Check whether x86 address PARTS is a pc-relative address. */
25084
25085 static bool
25086 rip_relative_addr_p (struct ix86_address *parts)
25087 {
25088 rtx base, index, disp;
25089
25090 base = parts->base;
25091 index = parts->index;
25092 disp = parts->disp;
25093
25094 if (disp && !base && !index)
25095 {
25096 if (TARGET_64BIT)
25097 {
25098 rtx symbol = disp;
25099
25100 if (GET_CODE (disp) == CONST)
25101 symbol = XEXP (disp, 0);
25102 if (GET_CODE (symbol) == PLUS
25103 && CONST_INT_P (XEXP (symbol, 1)))
25104 symbol = XEXP (symbol, 0);
25105
25106 if (GET_CODE (symbol) == LABEL_REF
25107 || (GET_CODE (symbol) == SYMBOL_REF
25108 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25109 || (GET_CODE (symbol) == UNSPEC
25110 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25111 || XINT (symbol, 1) == UNSPEC_PCREL
25112 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25113 return true;
25114 }
25115 }
25116 return false;
25117 }
25118
25119 /* Calculate the length of the memory address in the instruction encoding.
25120 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25121 or other prefixes. We never generate addr32 prefix for LEA insn. */
25122
25123 int
25124 memory_address_length (rtx addr, bool lea)
25125 {
25126 struct ix86_address parts;
25127 rtx base, index, disp;
25128 int len;
25129 int ok;
25130
25131 if (GET_CODE (addr) == PRE_DEC
25132 || GET_CODE (addr) == POST_INC
25133 || GET_CODE (addr) == PRE_MODIFY
25134 || GET_CODE (addr) == POST_MODIFY)
25135 return 0;
25136
25137 ok = ix86_decompose_address (addr, &parts);
25138 gcc_assert (ok);
25139
25140 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25141
25142 /* If this is not LEA instruction, add the length of addr32 prefix. */
25143 if (TARGET_64BIT && !lea
25144 && (SImode_address_operand (addr, VOIDmode)
25145 || (parts.base && GET_MODE (parts.base) == SImode)
25146 || (parts.index && GET_MODE (parts.index) == SImode)))
25147 len++;
25148
25149 base = parts.base;
25150 index = parts.index;
25151 disp = parts.disp;
25152
25153 if (base && GET_CODE (base) == SUBREG)
25154 base = SUBREG_REG (base);
25155 if (index && GET_CODE (index) == SUBREG)
25156 index = SUBREG_REG (index);
25157
25158 gcc_assert (base == NULL_RTX || REG_P (base));
25159 gcc_assert (index == NULL_RTX || REG_P (index));
25160
25161 /* Rule of thumb:
25162 - esp as the base always wants an index,
25163 - ebp as the base always wants a displacement,
25164 - r12 as the base always wants an index,
25165 - r13 as the base always wants a displacement. */
25166
25167 /* Register Indirect. */
25168 if (base && !index && !disp)
25169 {
25170 /* esp (for its index) and ebp (for its displacement) need
25171 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25172 code. */
25173 if (base == arg_pointer_rtx
25174 || base == frame_pointer_rtx
25175 || REGNO (base) == SP_REG
25176 || REGNO (base) == BP_REG
25177 || REGNO (base) == R12_REG
25178 || REGNO (base) == R13_REG)
25179 len++;
25180 }
25181
25182 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25183 is not disp32, but disp32(%rip), so for disp32
25184 SIB byte is needed, unless print_operand_address
25185 optimizes it into disp32(%rip) or (%rip) is implied
25186 by UNSPEC. */
25187 else if (disp && !base && !index)
25188 {
25189 len += 4;
25190 if (rip_relative_addr_p (&parts))
25191 len++;
25192 }
25193 else
25194 {
25195 /* Find the length of the displacement constant. */
25196 if (disp)
25197 {
25198 if (base && satisfies_constraint_K (disp))
25199 len += 1;
25200 else
25201 len += 4;
25202 }
25203 /* ebp always wants a displacement. Similarly r13. */
25204 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25205 len++;
25206
25207 /* An index requires the two-byte modrm form.... */
25208 if (index
25209 /* ...like esp (or r12), which always wants an index. */
25210 || base == arg_pointer_rtx
25211 || base == frame_pointer_rtx
25212 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25213 len++;
25214 }
25215
25216 return len;
25217 }
25218
25219 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25220 is set, expect that insn have 8bit immediate alternative. */
25221 int
25222 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25223 {
25224 int len = 0;
25225 int i;
25226 extract_insn_cached (insn);
25227 for (i = recog_data.n_operands - 1; i >= 0; --i)
25228 if (CONSTANT_P (recog_data.operand[i]))
25229 {
25230 enum attr_mode mode = get_attr_mode (insn);
25231
25232 gcc_assert (!len);
25233 if (shortform && CONST_INT_P (recog_data.operand[i]))
25234 {
25235 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25236 switch (mode)
25237 {
25238 case MODE_QI:
25239 len = 1;
25240 continue;
25241 case MODE_HI:
25242 ival = trunc_int_for_mode (ival, HImode);
25243 break;
25244 case MODE_SI:
25245 ival = trunc_int_for_mode (ival, SImode);
25246 break;
25247 default:
25248 break;
25249 }
25250 if (IN_RANGE (ival, -128, 127))
25251 {
25252 len = 1;
25253 continue;
25254 }
25255 }
25256 switch (mode)
25257 {
25258 case MODE_QI:
25259 len = 1;
25260 break;
25261 case MODE_HI:
25262 len = 2;
25263 break;
25264 case MODE_SI:
25265 len = 4;
25266 break;
25267 /* Immediates for DImode instructions are encoded
25268 as 32bit sign extended values. */
25269 case MODE_DI:
25270 len = 4;
25271 break;
25272 default:
25273 fatal_insn ("unknown insn mode", insn);
25274 }
25275 }
25276 return len;
25277 }
25278
25279 /* Compute default value for "length_address" attribute. */
25280 int
25281 ix86_attr_length_address_default (rtx insn)
25282 {
25283 int i;
25284
25285 if (get_attr_type (insn) == TYPE_LEA)
25286 {
25287 rtx set = PATTERN (insn), addr;
25288
25289 if (GET_CODE (set) == PARALLEL)
25290 set = XVECEXP (set, 0, 0);
25291
25292 gcc_assert (GET_CODE (set) == SET);
25293
25294 addr = SET_SRC (set);
25295
25296 return memory_address_length (addr, true);
25297 }
25298
25299 extract_insn_cached (insn);
25300 for (i = recog_data.n_operands - 1; i >= 0; --i)
25301 if (MEM_P (recog_data.operand[i]))
25302 {
25303 constrain_operands_cached (reload_completed);
25304 if (which_alternative != -1)
25305 {
25306 const char *constraints = recog_data.constraints[i];
25307 int alt = which_alternative;
25308
25309 while (*constraints == '=' || *constraints == '+')
25310 constraints++;
25311 while (alt-- > 0)
25312 while (*constraints++ != ',')
25313 ;
25314 /* Skip ignored operands. */
25315 if (*constraints == 'X')
25316 continue;
25317 }
25318 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25319 }
25320 return 0;
25321 }
25322
25323 /* Compute default value for "length_vex" attribute. It includes
25324 2 or 3 byte VEX prefix and 1 opcode byte. */
25325
25326 int
25327 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25328 {
25329 int i;
25330
25331 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25332 byte VEX prefix. */
25333 if (!has_0f_opcode || has_vex_w)
25334 return 3 + 1;
25335
25336 /* We can always use 2 byte VEX prefix in 32bit. */
25337 if (!TARGET_64BIT)
25338 return 2 + 1;
25339
25340 extract_insn_cached (insn);
25341
25342 for (i = recog_data.n_operands - 1; i >= 0; --i)
25343 if (REG_P (recog_data.operand[i]))
25344 {
25345 /* REX.W bit uses 3 byte VEX prefix. */
25346 if (GET_MODE (recog_data.operand[i]) == DImode
25347 && GENERAL_REG_P (recog_data.operand[i]))
25348 return 3 + 1;
25349 }
25350 else
25351 {
25352 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25353 if (MEM_P (recog_data.operand[i])
25354 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25355 return 3 + 1;
25356 }
25357
25358 return 2 + 1;
25359 }
25360 \f
25361 /* Return the maximum number of instructions a cpu can issue. */
25362
25363 static int
25364 ix86_issue_rate (void)
25365 {
25366 switch (ix86_tune)
25367 {
25368 case PROCESSOR_PENTIUM:
25369 case PROCESSOR_BONNELL:
25370 case PROCESSOR_SILVERMONT:
25371 case PROCESSOR_INTEL:
25372 case PROCESSOR_K6:
25373 case PROCESSOR_BTVER2:
25374 case PROCESSOR_PENTIUM4:
25375 case PROCESSOR_NOCONA:
25376 return 2;
25377
25378 case PROCESSOR_PENTIUMPRO:
25379 case PROCESSOR_ATHLON:
25380 case PROCESSOR_K8:
25381 case PROCESSOR_AMDFAM10:
25382 case PROCESSOR_GENERIC:
25383 case PROCESSOR_BTVER1:
25384 return 3;
25385
25386 case PROCESSOR_BDVER1:
25387 case PROCESSOR_BDVER2:
25388 case PROCESSOR_BDVER3:
25389 case PROCESSOR_BDVER4:
25390 case PROCESSOR_CORE2:
25391 case PROCESSOR_NEHALEM:
25392 case PROCESSOR_SANDYBRIDGE:
25393 case PROCESSOR_HASWELL:
25394 return 4;
25395
25396 default:
25397 return 1;
25398 }
25399 }
25400
25401 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25402 by DEP_INSN and nothing set by DEP_INSN. */
25403
25404 static bool
25405 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25406 {
25407 rtx set, set2;
25408
25409 /* Simplify the test for uninteresting insns. */
25410 if (insn_type != TYPE_SETCC
25411 && insn_type != TYPE_ICMOV
25412 && insn_type != TYPE_FCMOV
25413 && insn_type != TYPE_IBR)
25414 return false;
25415
25416 if ((set = single_set (dep_insn)) != 0)
25417 {
25418 set = SET_DEST (set);
25419 set2 = NULL_RTX;
25420 }
25421 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25422 && XVECLEN (PATTERN (dep_insn), 0) == 2
25423 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25424 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25425 {
25426 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25427 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25428 }
25429 else
25430 return false;
25431
25432 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25433 return false;
25434
25435 /* This test is true if the dependent insn reads the flags but
25436 not any other potentially set register. */
25437 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25438 return false;
25439
25440 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25441 return false;
25442
25443 return true;
25444 }
25445
25446 /* Return true iff USE_INSN has a memory address with operands set by
25447 SET_INSN. */
25448
25449 bool
25450 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25451 {
25452 int i;
25453 extract_insn_cached (use_insn);
25454 for (i = recog_data.n_operands - 1; i >= 0; --i)
25455 if (MEM_P (recog_data.operand[i]))
25456 {
25457 rtx addr = XEXP (recog_data.operand[i], 0);
25458 return modified_in_p (addr, set_insn) != 0;
25459 }
25460 return false;
25461 }
25462
25463 /* Helper function for exact_store_load_dependency.
25464 Return true if addr is found in insn. */
25465 static bool
25466 exact_dependency_1 (rtx addr, rtx insn)
25467 {
25468 enum rtx_code code;
25469 const char *format_ptr;
25470 int i, j;
25471
25472 code = GET_CODE (insn);
25473 switch (code)
25474 {
25475 case MEM:
25476 if (rtx_equal_p (addr, insn))
25477 return true;
25478 break;
25479 case REG:
25480 CASE_CONST_ANY:
25481 case SYMBOL_REF:
25482 case CODE_LABEL:
25483 case PC:
25484 case CC0:
25485 case EXPR_LIST:
25486 return false;
25487 default:
25488 break;
25489 }
25490
25491 format_ptr = GET_RTX_FORMAT (code);
25492 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25493 {
25494 switch (*format_ptr++)
25495 {
25496 case 'e':
25497 if (exact_dependency_1 (addr, XEXP (insn, i)))
25498 return true;
25499 break;
25500 case 'E':
25501 for (j = 0; j < XVECLEN (insn, i); j++)
25502 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25503 return true;
25504 break;
25505 }
25506 }
25507 return false;
25508 }
25509
25510 /* Return true if there exists exact dependency for store & load, i.e.
25511 the same memory address is used in them. */
25512 static bool
25513 exact_store_load_dependency (rtx store, rtx load)
25514 {
25515 rtx set1, set2;
25516
25517 set1 = single_set (store);
25518 if (!set1)
25519 return false;
25520 if (!MEM_P (SET_DEST (set1)))
25521 return false;
25522 set2 = single_set (load);
25523 if (!set2)
25524 return false;
25525 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25526 return true;
25527 return false;
25528 }
25529
25530 static int
25531 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25532 {
25533 enum attr_type insn_type, dep_insn_type;
25534 enum attr_memory memory;
25535 rtx set, set2;
25536 int dep_insn_code_number;
25537
25538 /* Anti and output dependencies have zero cost on all CPUs. */
25539 if (REG_NOTE_KIND (link) != 0)
25540 return 0;
25541
25542 dep_insn_code_number = recog_memoized (dep_insn);
25543
25544 /* If we can't recognize the insns, we can't really do anything. */
25545 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25546 return cost;
25547
25548 insn_type = get_attr_type (insn);
25549 dep_insn_type = get_attr_type (dep_insn);
25550
25551 switch (ix86_tune)
25552 {
25553 case PROCESSOR_PENTIUM:
25554 /* Address Generation Interlock adds a cycle of latency. */
25555 if (insn_type == TYPE_LEA)
25556 {
25557 rtx addr = PATTERN (insn);
25558
25559 if (GET_CODE (addr) == PARALLEL)
25560 addr = XVECEXP (addr, 0, 0);
25561
25562 gcc_assert (GET_CODE (addr) == SET);
25563
25564 addr = SET_SRC (addr);
25565 if (modified_in_p (addr, dep_insn))
25566 cost += 1;
25567 }
25568 else if (ix86_agi_dependent (dep_insn, insn))
25569 cost += 1;
25570
25571 /* ??? Compares pair with jump/setcc. */
25572 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25573 cost = 0;
25574
25575 /* Floating point stores require value to be ready one cycle earlier. */
25576 if (insn_type == TYPE_FMOV
25577 && get_attr_memory (insn) == MEMORY_STORE
25578 && !ix86_agi_dependent (dep_insn, insn))
25579 cost += 1;
25580 break;
25581
25582 case PROCESSOR_PENTIUMPRO:
25583 /* INT->FP conversion is expensive. */
25584 if (get_attr_fp_int_src (dep_insn))
25585 cost += 5;
25586
25587 /* There is one cycle extra latency between an FP op and a store. */
25588 if (insn_type == TYPE_FMOV
25589 && (set = single_set (dep_insn)) != NULL_RTX
25590 && (set2 = single_set (insn)) != NULL_RTX
25591 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25592 && MEM_P (SET_DEST (set2)))
25593 cost += 1;
25594
25595 memory = get_attr_memory (insn);
25596
25597 /* Show ability of reorder buffer to hide latency of load by executing
25598 in parallel with previous instruction in case
25599 previous instruction is not needed to compute the address. */
25600 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25601 && !ix86_agi_dependent (dep_insn, insn))
25602 {
25603 /* Claim moves to take one cycle, as core can issue one load
25604 at time and the next load can start cycle later. */
25605 if (dep_insn_type == TYPE_IMOV
25606 || dep_insn_type == TYPE_FMOV)
25607 cost = 1;
25608 else if (cost > 1)
25609 cost--;
25610 }
25611 break;
25612
25613 case PROCESSOR_K6:
25614 /* The esp dependency is resolved before
25615 the instruction is really finished. */
25616 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25617 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25618 return 1;
25619
25620 /* INT->FP conversion is expensive. */
25621 if (get_attr_fp_int_src (dep_insn))
25622 cost += 5;
25623
25624 memory = get_attr_memory (insn);
25625
25626 /* Show ability of reorder buffer to hide latency of load by executing
25627 in parallel with previous instruction in case
25628 previous instruction is not needed to compute the address. */
25629 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25630 && !ix86_agi_dependent (dep_insn, insn))
25631 {
25632 /* Claim moves to take one cycle, as core can issue one load
25633 at time and the next load can start cycle later. */
25634 if (dep_insn_type == TYPE_IMOV
25635 || dep_insn_type == TYPE_FMOV)
25636 cost = 1;
25637 else if (cost > 2)
25638 cost -= 2;
25639 else
25640 cost = 1;
25641 }
25642 break;
25643
25644 case PROCESSOR_AMDFAM10:
25645 case PROCESSOR_BDVER1:
25646 case PROCESSOR_BDVER2:
25647 case PROCESSOR_BDVER3:
25648 case PROCESSOR_BDVER4:
25649 case PROCESSOR_BTVER1:
25650 case PROCESSOR_BTVER2:
25651 case PROCESSOR_GENERIC:
25652 /* Stack engine allows to execute push&pop instructions in parall. */
25653 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25654 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25655 return 0;
25656 /* FALLTHRU */
25657
25658 case PROCESSOR_ATHLON:
25659 case PROCESSOR_K8:
25660 memory = get_attr_memory (insn);
25661
25662 /* Show ability of reorder buffer to hide latency of load by executing
25663 in parallel with previous instruction in case
25664 previous instruction is not needed to compute the address. */
25665 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25666 && !ix86_agi_dependent (dep_insn, insn))
25667 {
25668 enum attr_unit unit = get_attr_unit (insn);
25669 int loadcost = 3;
25670
25671 /* Because of the difference between the length of integer and
25672 floating unit pipeline preparation stages, the memory operands
25673 for floating point are cheaper.
25674
25675 ??? For Athlon it the difference is most probably 2. */
25676 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25677 loadcost = 3;
25678 else
25679 loadcost = TARGET_ATHLON ? 2 : 0;
25680
25681 if (cost >= loadcost)
25682 cost -= loadcost;
25683 else
25684 cost = 0;
25685 }
25686 break;
25687
25688 case PROCESSOR_CORE2:
25689 case PROCESSOR_NEHALEM:
25690 case PROCESSOR_SANDYBRIDGE:
25691 case PROCESSOR_HASWELL:
25692 /* Stack engine allows to execute push&pop instructions in parall. */
25693 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25694 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25695 return 0;
25696
25697 memory = get_attr_memory (insn);
25698
25699 /* Show ability of reorder buffer to hide latency of load by executing
25700 in parallel with previous instruction in case
25701 previous instruction is not needed to compute the address. */
25702 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25703 && !ix86_agi_dependent (dep_insn, insn))
25704 {
25705 if (cost >= 4)
25706 cost -= 4;
25707 else
25708 cost = 0;
25709 }
25710 break;
25711
25712 case PROCESSOR_SILVERMONT:
25713 case PROCESSOR_INTEL:
25714 if (!reload_completed)
25715 return cost;
25716
25717 /* Increase cost of integer loads. */
25718 memory = get_attr_memory (dep_insn);
25719 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25720 {
25721 enum attr_unit unit = get_attr_unit (dep_insn);
25722 if (unit == UNIT_INTEGER && cost == 1)
25723 {
25724 if (memory == MEMORY_LOAD)
25725 cost = 3;
25726 else
25727 {
25728 /* Increase cost of ld/st for short int types only
25729 because of store forwarding issue. */
25730 rtx set = single_set (dep_insn);
25731 if (set && (GET_MODE (SET_DEST (set)) == QImode
25732 || GET_MODE (SET_DEST (set)) == HImode))
25733 {
25734 /* Increase cost of store/load insn if exact
25735 dependence exists and it is load insn. */
25736 enum attr_memory insn_memory = get_attr_memory (insn);
25737 if (insn_memory == MEMORY_LOAD
25738 && exact_store_load_dependency (dep_insn, insn))
25739 cost = 3;
25740 }
25741 }
25742 }
25743 }
25744
25745 default:
25746 break;
25747 }
25748
25749 return cost;
25750 }
25751
25752 /* How many alternative schedules to try. This should be as wide as the
25753 scheduling freedom in the DFA, but no wider. Making this value too
25754 large results extra work for the scheduler. */
25755
25756 static int
25757 ia32_multipass_dfa_lookahead (void)
25758 {
25759 switch (ix86_tune)
25760 {
25761 case PROCESSOR_PENTIUM:
25762 return 2;
25763
25764 case PROCESSOR_PENTIUMPRO:
25765 case PROCESSOR_K6:
25766 return 1;
25767
25768 case PROCESSOR_BDVER1:
25769 case PROCESSOR_BDVER2:
25770 case PROCESSOR_BDVER3:
25771 case PROCESSOR_BDVER4:
25772 /* We use lookahead value 4 for BD both before and after reload
25773 schedules. Plan is to have value 8 included for O3. */
25774 return 4;
25775
25776 case PROCESSOR_CORE2:
25777 case PROCESSOR_NEHALEM:
25778 case PROCESSOR_SANDYBRIDGE:
25779 case PROCESSOR_HASWELL:
25780 case PROCESSOR_BONNELL:
25781 case PROCESSOR_SILVERMONT:
25782 case PROCESSOR_INTEL:
25783 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25784 as many instructions can be executed on a cycle, i.e.,
25785 issue_rate. I wonder why tuning for many CPUs does not do this. */
25786 if (reload_completed)
25787 return ix86_issue_rate ();
25788 /* Don't use lookahead for pre-reload schedule to save compile time. */
25789 return 0;
25790
25791 default:
25792 return 0;
25793 }
25794 }
25795
25796 /* Return true if target platform supports macro-fusion. */
25797
25798 static bool
25799 ix86_macro_fusion_p ()
25800 {
25801 return TARGET_FUSE_CMP_AND_BRANCH;
25802 }
25803
25804 /* Check whether current microarchitecture support macro fusion
25805 for insn pair "CONDGEN + CONDJMP". Refer to
25806 "Intel Architectures Optimization Reference Manual". */
25807
25808 static bool
25809 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25810 {
25811 rtx src, dest;
25812 rtx single_set = single_set (condgen);
25813 enum rtx_code ccode;
25814 rtx compare_set = NULL_RTX, test_if, cond;
25815 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25816
25817 if (get_attr_type (condgen) != TYPE_TEST
25818 && get_attr_type (condgen) != TYPE_ICMP
25819 && get_attr_type (condgen) != TYPE_INCDEC
25820 && get_attr_type (condgen) != TYPE_ALU)
25821 return false;
25822
25823 if (single_set == NULL_RTX
25824 && !TARGET_FUSE_ALU_AND_BRANCH)
25825 return false;
25826
25827 if (single_set != NULL_RTX)
25828 compare_set = single_set;
25829 else
25830 {
25831 int i;
25832 rtx pat = PATTERN (condgen);
25833 for (i = 0; i < XVECLEN (pat, 0); i++)
25834 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25835 {
25836 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25837 if (GET_CODE (set_src) == COMPARE)
25838 compare_set = XVECEXP (pat, 0, i);
25839 else
25840 alu_set = XVECEXP (pat, 0, i);
25841 }
25842 }
25843 if (compare_set == NULL_RTX)
25844 return false;
25845 src = SET_SRC (compare_set);
25846 if (GET_CODE (src) != COMPARE)
25847 return false;
25848
25849 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25850 supported. */
25851 if ((MEM_P (XEXP (src, 0))
25852 && CONST_INT_P (XEXP (src, 1)))
25853 || (MEM_P (XEXP (src, 1))
25854 && CONST_INT_P (XEXP (src, 0))))
25855 return false;
25856
25857 /* No fusion for RIP-relative address. */
25858 if (MEM_P (XEXP (src, 0)))
25859 addr = XEXP (XEXP (src, 0), 0);
25860 else if (MEM_P (XEXP (src, 1)))
25861 addr = XEXP (XEXP (src, 1), 0);
25862
25863 if (addr) {
25864 ix86_address parts;
25865 int ok = ix86_decompose_address (addr, &parts);
25866 gcc_assert (ok);
25867
25868 if (rip_relative_addr_p (&parts))
25869 return false;
25870 }
25871
25872 test_if = SET_SRC (pc_set (condjmp));
25873 cond = XEXP (test_if, 0);
25874 ccode = GET_CODE (cond);
25875 /* Check whether conditional jump use Sign or Overflow Flags. */
25876 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25877 && (ccode == GE
25878 || ccode == GT
25879 || ccode == LE
25880 || ccode == LT))
25881 return false;
25882
25883 /* Return true for TYPE_TEST and TYPE_ICMP. */
25884 if (get_attr_type (condgen) == TYPE_TEST
25885 || get_attr_type (condgen) == TYPE_ICMP)
25886 return true;
25887
25888 /* The following is the case that macro-fusion for alu + jmp. */
25889 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25890 return false;
25891
25892 /* No fusion for alu op with memory destination operand. */
25893 dest = SET_DEST (alu_set);
25894 if (MEM_P (dest))
25895 return false;
25896
25897 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25898 supported. */
25899 if (get_attr_type (condgen) == TYPE_INCDEC
25900 && (ccode == GEU
25901 || ccode == GTU
25902 || ccode == LEU
25903 || ccode == LTU))
25904 return false;
25905
25906 return true;
25907 }
25908
25909 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25910 execution. It is applied if
25911 (1) IMUL instruction is on the top of list;
25912 (2) There exists the only producer of independent IMUL instruction in
25913 ready list.
25914 Return index of IMUL producer if it was found and -1 otherwise. */
25915 static int
25916 do_reorder_for_imul (rtx *ready, int n_ready)
25917 {
25918 rtx insn, set, insn1, insn2;
25919 sd_iterator_def sd_it;
25920 dep_t dep;
25921 int index = -1;
25922 int i;
25923
25924 if (!TARGET_BONNELL)
25925 return index;
25926
25927 /* Check that IMUL instruction is on the top of ready list. */
25928 insn = ready[n_ready - 1];
25929 set = single_set (insn);
25930 if (!set)
25931 return index;
25932 if (!(GET_CODE (SET_SRC (set)) == MULT
25933 && GET_MODE (SET_SRC (set)) == SImode))
25934 return index;
25935
25936 /* Search for producer of independent IMUL instruction. */
25937 for (i = n_ready - 2; i >= 0; i--)
25938 {
25939 insn = ready[i];
25940 if (!NONDEBUG_INSN_P (insn))
25941 continue;
25942 /* Skip IMUL instruction. */
25943 insn2 = PATTERN (insn);
25944 if (GET_CODE (insn2) == PARALLEL)
25945 insn2 = XVECEXP (insn2, 0, 0);
25946 if (GET_CODE (insn2) == SET
25947 && GET_CODE (SET_SRC (insn2)) == MULT
25948 && GET_MODE (SET_SRC (insn2)) == SImode)
25949 continue;
25950
25951 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25952 {
25953 rtx con;
25954 con = DEP_CON (dep);
25955 if (!NONDEBUG_INSN_P (con))
25956 continue;
25957 insn1 = PATTERN (con);
25958 if (GET_CODE (insn1) == PARALLEL)
25959 insn1 = XVECEXP (insn1, 0, 0);
25960
25961 if (GET_CODE (insn1) == SET
25962 && GET_CODE (SET_SRC (insn1)) == MULT
25963 && GET_MODE (SET_SRC (insn1)) == SImode)
25964 {
25965 sd_iterator_def sd_it1;
25966 dep_t dep1;
25967 /* Check if there is no other dependee for IMUL. */
25968 index = i;
25969 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25970 {
25971 rtx pro;
25972 pro = DEP_PRO (dep1);
25973 if (!NONDEBUG_INSN_P (pro))
25974 continue;
25975 if (pro != insn)
25976 index = -1;
25977 }
25978 if (index >= 0)
25979 break;
25980 }
25981 }
25982 if (index >= 0)
25983 break;
25984 }
25985 return index;
25986 }
25987
25988 /* Try to find the best candidate on the top of ready list if two insns
25989 have the same priority - candidate is best if its dependees were
25990 scheduled earlier. Applied for Silvermont only.
25991 Return true if top 2 insns must be interchanged. */
25992 static bool
25993 swap_top_of_ready_list (rtx *ready, int n_ready)
25994 {
25995 rtx top = ready[n_ready - 1];
25996 rtx next = ready[n_ready - 2];
25997 rtx set;
25998 sd_iterator_def sd_it;
25999 dep_t dep;
26000 int clock1 = -1;
26001 int clock2 = -1;
26002 #define INSN_TICK(INSN) (HID (INSN)->tick)
26003
26004 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26005 return false;
26006
26007 if (!NONDEBUG_INSN_P (top))
26008 return false;
26009 if (!NONJUMP_INSN_P (top))
26010 return false;
26011 if (!NONDEBUG_INSN_P (next))
26012 return false;
26013 if (!NONJUMP_INSN_P (next))
26014 return false;
26015 set = single_set (top);
26016 if (!set)
26017 return false;
26018 set = single_set (next);
26019 if (!set)
26020 return false;
26021
26022 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26023 {
26024 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26025 return false;
26026 /* Determine winner more precise. */
26027 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26028 {
26029 rtx pro;
26030 pro = DEP_PRO (dep);
26031 if (!NONDEBUG_INSN_P (pro))
26032 continue;
26033 if (INSN_TICK (pro) > clock1)
26034 clock1 = INSN_TICK (pro);
26035 }
26036 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26037 {
26038 rtx pro;
26039 pro = DEP_PRO (dep);
26040 if (!NONDEBUG_INSN_P (pro))
26041 continue;
26042 if (INSN_TICK (pro) > clock2)
26043 clock2 = INSN_TICK (pro);
26044 }
26045
26046 if (clock1 == clock2)
26047 {
26048 /* Determine winner - load must win. */
26049 enum attr_memory memory1, memory2;
26050 memory1 = get_attr_memory (top);
26051 memory2 = get_attr_memory (next);
26052 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26053 return true;
26054 }
26055 return (bool) (clock2 < clock1);
26056 }
26057 return false;
26058 #undef INSN_TICK
26059 }
26060
26061 /* Perform possible reodering of ready list for Atom/Silvermont only.
26062 Return issue rate. */
26063 static int
26064 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26065 int clock_var)
26066 {
26067 int issue_rate = -1;
26068 int n_ready = *pn_ready;
26069 int i;
26070 rtx insn;
26071 int index = -1;
26072
26073 /* Set up issue rate. */
26074 issue_rate = ix86_issue_rate ();
26075
26076 /* Do reodering for BONNELL/SILVERMONT only. */
26077 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26078 return issue_rate;
26079
26080 /* Nothing to do if ready list contains only 1 instruction. */
26081 if (n_ready <= 1)
26082 return issue_rate;
26083
26084 /* Do reodering for post-reload scheduler only. */
26085 if (!reload_completed)
26086 return issue_rate;
26087
26088 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26089 {
26090 if (sched_verbose > 1)
26091 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26092 INSN_UID (ready[index]));
26093
26094 /* Put IMUL producer (ready[index]) at the top of ready list. */
26095 insn = ready[index];
26096 for (i = index; i < n_ready - 1; i++)
26097 ready[i] = ready[i + 1];
26098 ready[n_ready - 1] = insn;
26099 return issue_rate;
26100 }
26101 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26102 {
26103 if (sched_verbose > 1)
26104 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26105 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26106 /* Swap 2 top elements of ready list. */
26107 insn = ready[n_ready - 1];
26108 ready[n_ready - 1] = ready[n_ready - 2];
26109 ready[n_ready - 2] = insn;
26110 }
26111 return issue_rate;
26112 }
26113
26114 static bool
26115 ix86_class_likely_spilled_p (reg_class_t);
26116
26117 /* Returns true if lhs of insn is HW function argument register and set up
26118 is_spilled to true if it is likely spilled HW register. */
26119 static bool
26120 insn_is_function_arg (rtx insn, bool* is_spilled)
26121 {
26122 rtx dst;
26123
26124 if (!NONDEBUG_INSN_P (insn))
26125 return false;
26126 /* Call instructions are not movable, ignore it. */
26127 if (CALL_P (insn))
26128 return false;
26129 insn = PATTERN (insn);
26130 if (GET_CODE (insn) == PARALLEL)
26131 insn = XVECEXP (insn, 0, 0);
26132 if (GET_CODE (insn) != SET)
26133 return false;
26134 dst = SET_DEST (insn);
26135 if (REG_P (dst) && HARD_REGISTER_P (dst)
26136 && ix86_function_arg_regno_p (REGNO (dst)))
26137 {
26138 /* Is it likely spilled HW register? */
26139 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26140 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26141 *is_spilled = true;
26142 return true;
26143 }
26144 return false;
26145 }
26146
26147 /* Add output dependencies for chain of function adjacent arguments if only
26148 there is a move to likely spilled HW register. Return first argument
26149 if at least one dependence was added or NULL otherwise. */
26150 static rtx
26151 add_parameter_dependencies (rtx call, rtx head)
26152 {
26153 rtx insn;
26154 rtx last = call;
26155 rtx first_arg = NULL;
26156 bool is_spilled = false;
26157
26158 head = PREV_INSN (head);
26159
26160 /* Find nearest to call argument passing instruction. */
26161 while (true)
26162 {
26163 last = PREV_INSN (last);
26164 if (last == head)
26165 return NULL;
26166 if (!NONDEBUG_INSN_P (last))
26167 continue;
26168 if (insn_is_function_arg (last, &is_spilled))
26169 break;
26170 return NULL;
26171 }
26172
26173 first_arg = last;
26174 while (true)
26175 {
26176 insn = PREV_INSN (last);
26177 if (!INSN_P (insn))
26178 break;
26179 if (insn == head)
26180 break;
26181 if (!NONDEBUG_INSN_P (insn))
26182 {
26183 last = insn;
26184 continue;
26185 }
26186 if (insn_is_function_arg (insn, &is_spilled))
26187 {
26188 /* Add output depdendence between two function arguments if chain
26189 of output arguments contains likely spilled HW registers. */
26190 if (is_spilled)
26191 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26192 first_arg = last = insn;
26193 }
26194 else
26195 break;
26196 }
26197 if (!is_spilled)
26198 return NULL;
26199 return first_arg;
26200 }
26201
26202 /* Add output or anti dependency from insn to first_arg to restrict its code
26203 motion. */
26204 static void
26205 avoid_func_arg_motion (rtx first_arg, rtx insn)
26206 {
26207 rtx set;
26208 rtx tmp;
26209
26210 set = single_set (insn);
26211 if (!set)
26212 return;
26213 tmp = SET_DEST (set);
26214 if (REG_P (tmp))
26215 {
26216 /* Add output dependency to the first function argument. */
26217 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26218 return;
26219 }
26220 /* Add anti dependency. */
26221 add_dependence (first_arg, insn, REG_DEP_ANTI);
26222 }
26223
26224 /* Avoid cross block motion of function argument through adding dependency
26225 from the first non-jump instruction in bb. */
26226 static void
26227 add_dependee_for_func_arg (rtx arg, basic_block bb)
26228 {
26229 rtx insn = BB_END (bb);
26230
26231 while (insn)
26232 {
26233 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26234 {
26235 rtx set = single_set (insn);
26236 if (set)
26237 {
26238 avoid_func_arg_motion (arg, insn);
26239 return;
26240 }
26241 }
26242 if (insn == BB_HEAD (bb))
26243 return;
26244 insn = PREV_INSN (insn);
26245 }
26246 }
26247
26248 /* Hook for pre-reload schedule - avoid motion of function arguments
26249 passed in likely spilled HW registers. */
26250 static void
26251 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26252 {
26253 rtx insn;
26254 rtx first_arg = NULL;
26255 if (reload_completed)
26256 return;
26257 while (head != tail && DEBUG_INSN_P (head))
26258 head = NEXT_INSN (head);
26259 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26260 if (INSN_P (insn) && CALL_P (insn))
26261 {
26262 first_arg = add_parameter_dependencies (insn, head);
26263 if (first_arg)
26264 {
26265 /* Add dependee for first argument to predecessors if only
26266 region contains more than one block. */
26267 basic_block bb = BLOCK_FOR_INSN (insn);
26268 int rgn = CONTAINING_RGN (bb->index);
26269 int nr_blks = RGN_NR_BLOCKS (rgn);
26270 /* Skip trivial regions and region head blocks that can have
26271 predecessors outside of region. */
26272 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26273 {
26274 edge e;
26275 edge_iterator ei;
26276
26277 /* Regions are SCCs with the exception of selective
26278 scheduling with pipelining of outer blocks enabled.
26279 So also check that immediate predecessors of a non-head
26280 block are in the same region. */
26281 FOR_EACH_EDGE (e, ei, bb->preds)
26282 {
26283 /* Avoid creating of loop-carried dependencies through
26284 using topological ordering in the region. */
26285 if (rgn == CONTAINING_RGN (e->src->index)
26286 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26287 add_dependee_for_func_arg (first_arg, e->src);
26288 }
26289 }
26290 insn = first_arg;
26291 if (insn == head)
26292 break;
26293 }
26294 }
26295 else if (first_arg)
26296 avoid_func_arg_motion (first_arg, insn);
26297 }
26298
26299 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26300 HW registers to maximum, to schedule them at soon as possible. These are
26301 moves from function argument registers at the top of the function entry
26302 and moves from function return value registers after call. */
26303 static int
26304 ix86_adjust_priority (rtx insn, int priority)
26305 {
26306 rtx set;
26307
26308 if (reload_completed)
26309 return priority;
26310
26311 if (!NONDEBUG_INSN_P (insn))
26312 return priority;
26313
26314 set = single_set (insn);
26315 if (set)
26316 {
26317 rtx tmp = SET_SRC (set);
26318 if (REG_P (tmp)
26319 && HARD_REGISTER_P (tmp)
26320 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26321 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26322 return current_sched_info->sched_max_insns_priority;
26323 }
26324
26325 return priority;
26326 }
26327
26328 /* Model decoder of Core 2/i7.
26329 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26330 track the instruction fetch block boundaries and make sure that long
26331 (9+ bytes) instructions are assigned to D0. */
26332
26333 /* Maximum length of an insn that can be handled by
26334 a secondary decoder unit. '8' for Core 2/i7. */
26335 static int core2i7_secondary_decoder_max_insn_size;
26336
26337 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26338 '16' for Core 2/i7. */
26339 static int core2i7_ifetch_block_size;
26340
26341 /* Maximum number of instructions decoder can handle per cycle.
26342 '6' for Core 2/i7. */
26343 static int core2i7_ifetch_block_max_insns;
26344
26345 typedef struct ix86_first_cycle_multipass_data_ *
26346 ix86_first_cycle_multipass_data_t;
26347 typedef const struct ix86_first_cycle_multipass_data_ *
26348 const_ix86_first_cycle_multipass_data_t;
26349
26350 /* A variable to store target state across calls to max_issue within
26351 one cycle. */
26352 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26353 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26354
26355 /* Initialize DATA. */
26356 static void
26357 core2i7_first_cycle_multipass_init (void *_data)
26358 {
26359 ix86_first_cycle_multipass_data_t data
26360 = (ix86_first_cycle_multipass_data_t) _data;
26361
26362 data->ifetch_block_len = 0;
26363 data->ifetch_block_n_insns = 0;
26364 data->ready_try_change = NULL;
26365 data->ready_try_change_size = 0;
26366 }
26367
26368 /* Advancing the cycle; reset ifetch block counts. */
26369 static void
26370 core2i7_dfa_post_advance_cycle (void)
26371 {
26372 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26373
26374 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26375
26376 data->ifetch_block_len = 0;
26377 data->ifetch_block_n_insns = 0;
26378 }
26379
26380 static int min_insn_size (rtx);
26381
26382 /* Filter out insns from ready_try that the core will not be able to issue
26383 on current cycle due to decoder. */
26384 static void
26385 core2i7_first_cycle_multipass_filter_ready_try
26386 (const_ix86_first_cycle_multipass_data_t data,
26387 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26388 {
26389 while (n_ready--)
26390 {
26391 rtx insn;
26392 int insn_size;
26393
26394 if (ready_try[n_ready])
26395 continue;
26396
26397 insn = get_ready_element (n_ready);
26398 insn_size = min_insn_size (insn);
26399
26400 if (/* If this is a too long an insn for a secondary decoder ... */
26401 (!first_cycle_insn_p
26402 && insn_size > core2i7_secondary_decoder_max_insn_size)
26403 /* ... or it would not fit into the ifetch block ... */
26404 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26405 /* ... or the decoder is full already ... */
26406 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26407 /* ... mask the insn out. */
26408 {
26409 ready_try[n_ready] = 1;
26410
26411 if (data->ready_try_change)
26412 bitmap_set_bit (data->ready_try_change, n_ready);
26413 }
26414 }
26415 }
26416
26417 /* Prepare for a new round of multipass lookahead scheduling. */
26418 static void
26419 core2i7_first_cycle_multipass_begin (void *_data,
26420 signed char *ready_try, int n_ready,
26421 bool first_cycle_insn_p)
26422 {
26423 ix86_first_cycle_multipass_data_t data
26424 = (ix86_first_cycle_multipass_data_t) _data;
26425 const_ix86_first_cycle_multipass_data_t prev_data
26426 = ix86_first_cycle_multipass_data;
26427
26428 /* Restore the state from the end of the previous round. */
26429 data->ifetch_block_len = prev_data->ifetch_block_len;
26430 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26431
26432 /* Filter instructions that cannot be issued on current cycle due to
26433 decoder restrictions. */
26434 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26435 first_cycle_insn_p);
26436 }
26437
26438 /* INSN is being issued in current solution. Account for its impact on
26439 the decoder model. */
26440 static void
26441 core2i7_first_cycle_multipass_issue (void *_data,
26442 signed char *ready_try, int n_ready,
26443 rtx insn, const void *_prev_data)
26444 {
26445 ix86_first_cycle_multipass_data_t data
26446 = (ix86_first_cycle_multipass_data_t) _data;
26447 const_ix86_first_cycle_multipass_data_t prev_data
26448 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26449
26450 int insn_size = min_insn_size (insn);
26451
26452 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26453 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26454 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26455 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26456
26457 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26458 if (!data->ready_try_change)
26459 {
26460 data->ready_try_change = sbitmap_alloc (n_ready);
26461 data->ready_try_change_size = n_ready;
26462 }
26463 else if (data->ready_try_change_size < n_ready)
26464 {
26465 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26466 n_ready, 0);
26467 data->ready_try_change_size = n_ready;
26468 }
26469 bitmap_clear (data->ready_try_change);
26470
26471 /* Filter out insns from ready_try that the core will not be able to issue
26472 on current cycle due to decoder. */
26473 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26474 false);
26475 }
26476
26477 /* Revert the effect on ready_try. */
26478 static void
26479 core2i7_first_cycle_multipass_backtrack (const void *_data,
26480 signed char *ready_try,
26481 int n_ready ATTRIBUTE_UNUSED)
26482 {
26483 const_ix86_first_cycle_multipass_data_t data
26484 = (const_ix86_first_cycle_multipass_data_t) _data;
26485 unsigned int i = 0;
26486 sbitmap_iterator sbi;
26487
26488 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26489 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26490 {
26491 ready_try[i] = 0;
26492 }
26493 }
26494
26495 /* Save the result of multipass lookahead scheduling for the next round. */
26496 static void
26497 core2i7_first_cycle_multipass_end (const void *_data)
26498 {
26499 const_ix86_first_cycle_multipass_data_t data
26500 = (const_ix86_first_cycle_multipass_data_t) _data;
26501 ix86_first_cycle_multipass_data_t next_data
26502 = ix86_first_cycle_multipass_data;
26503
26504 if (data != NULL)
26505 {
26506 next_data->ifetch_block_len = data->ifetch_block_len;
26507 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26508 }
26509 }
26510
26511 /* Deallocate target data. */
26512 static void
26513 core2i7_first_cycle_multipass_fini (void *_data)
26514 {
26515 ix86_first_cycle_multipass_data_t data
26516 = (ix86_first_cycle_multipass_data_t) _data;
26517
26518 if (data->ready_try_change)
26519 {
26520 sbitmap_free (data->ready_try_change);
26521 data->ready_try_change = NULL;
26522 data->ready_try_change_size = 0;
26523 }
26524 }
26525
26526 /* Prepare for scheduling pass. */
26527 static void
26528 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26529 int verbose ATTRIBUTE_UNUSED,
26530 int max_uid ATTRIBUTE_UNUSED)
26531 {
26532 /* Install scheduling hooks for current CPU. Some of these hooks are used
26533 in time-critical parts of the scheduler, so we only set them up when
26534 they are actually used. */
26535 switch (ix86_tune)
26536 {
26537 case PROCESSOR_CORE2:
26538 case PROCESSOR_NEHALEM:
26539 case PROCESSOR_SANDYBRIDGE:
26540 case PROCESSOR_HASWELL:
26541 /* Do not perform multipass scheduling for pre-reload schedule
26542 to save compile time. */
26543 if (reload_completed)
26544 {
26545 targetm.sched.dfa_post_advance_cycle
26546 = core2i7_dfa_post_advance_cycle;
26547 targetm.sched.first_cycle_multipass_init
26548 = core2i7_first_cycle_multipass_init;
26549 targetm.sched.first_cycle_multipass_begin
26550 = core2i7_first_cycle_multipass_begin;
26551 targetm.sched.first_cycle_multipass_issue
26552 = core2i7_first_cycle_multipass_issue;
26553 targetm.sched.first_cycle_multipass_backtrack
26554 = core2i7_first_cycle_multipass_backtrack;
26555 targetm.sched.first_cycle_multipass_end
26556 = core2i7_first_cycle_multipass_end;
26557 targetm.sched.first_cycle_multipass_fini
26558 = core2i7_first_cycle_multipass_fini;
26559
26560 /* Set decoder parameters. */
26561 core2i7_secondary_decoder_max_insn_size = 8;
26562 core2i7_ifetch_block_size = 16;
26563 core2i7_ifetch_block_max_insns = 6;
26564 break;
26565 }
26566 /* ... Fall through ... */
26567 default:
26568 targetm.sched.dfa_post_advance_cycle = NULL;
26569 targetm.sched.first_cycle_multipass_init = NULL;
26570 targetm.sched.first_cycle_multipass_begin = NULL;
26571 targetm.sched.first_cycle_multipass_issue = NULL;
26572 targetm.sched.first_cycle_multipass_backtrack = NULL;
26573 targetm.sched.first_cycle_multipass_end = NULL;
26574 targetm.sched.first_cycle_multipass_fini = NULL;
26575 break;
26576 }
26577 }
26578
26579 \f
26580 /* Compute the alignment given to a constant that is being placed in memory.
26581 EXP is the constant and ALIGN is the alignment that the object would
26582 ordinarily have.
26583 The value of this function is used instead of that alignment to align
26584 the object. */
26585
26586 int
26587 ix86_constant_alignment (tree exp, int align)
26588 {
26589 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26590 || TREE_CODE (exp) == INTEGER_CST)
26591 {
26592 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26593 return 64;
26594 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26595 return 128;
26596 }
26597 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26598 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26599 return BITS_PER_WORD;
26600
26601 return align;
26602 }
26603
26604 /* Compute the alignment for a static variable.
26605 TYPE is the data type, and ALIGN is the alignment that
26606 the object would ordinarily have. The value of this function is used
26607 instead of that alignment to align the object. */
26608
26609 int
26610 ix86_data_alignment (tree type, int align, bool opt)
26611 {
26612 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26613 for symbols from other compilation units or symbols that don't need
26614 to bind locally. In order to preserve some ABI compatibility with
26615 those compilers, ensure we don't decrease alignment from what we
26616 used to assume. */
26617
26618 int max_align_compat
26619 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26620
26621 /* A data structure, equal or greater than the size of a cache line
26622 (64 bytes in the Pentium 4 and other recent Intel processors, including
26623 processors based on Intel Core microarchitecture) should be aligned
26624 so that its base address is a multiple of a cache line size. */
26625
26626 int max_align
26627 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26628
26629 if (max_align < BITS_PER_WORD)
26630 max_align = BITS_PER_WORD;
26631
26632 if (opt
26633 && AGGREGATE_TYPE_P (type)
26634 && TYPE_SIZE (type)
26635 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26636 {
26637 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26638 && align < max_align_compat)
26639 align = max_align_compat;
26640 if (wi::geu_p (TYPE_SIZE (type), max_align)
26641 && align < max_align)
26642 align = max_align;
26643 }
26644
26645 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26646 to 16byte boundary. */
26647 if (TARGET_64BIT)
26648 {
26649 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26650 && TYPE_SIZE (type)
26651 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26652 && wi::geu_p (TYPE_SIZE (type), 128)
26653 && align < 128)
26654 return 128;
26655 }
26656
26657 if (!opt)
26658 return align;
26659
26660 if (TREE_CODE (type) == ARRAY_TYPE)
26661 {
26662 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26663 return 64;
26664 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26665 return 128;
26666 }
26667 else if (TREE_CODE (type) == COMPLEX_TYPE)
26668 {
26669
26670 if (TYPE_MODE (type) == DCmode && align < 64)
26671 return 64;
26672 if ((TYPE_MODE (type) == XCmode
26673 || TYPE_MODE (type) == TCmode) && align < 128)
26674 return 128;
26675 }
26676 else if ((TREE_CODE (type) == RECORD_TYPE
26677 || TREE_CODE (type) == UNION_TYPE
26678 || TREE_CODE (type) == QUAL_UNION_TYPE)
26679 && TYPE_FIELDS (type))
26680 {
26681 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26682 return 64;
26683 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26684 return 128;
26685 }
26686 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26687 || TREE_CODE (type) == INTEGER_TYPE)
26688 {
26689 if (TYPE_MODE (type) == DFmode && align < 64)
26690 return 64;
26691 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26692 return 128;
26693 }
26694
26695 return align;
26696 }
26697
26698 /* Compute the alignment for a local variable or a stack slot. EXP is
26699 the data type or decl itself, MODE is the widest mode available and
26700 ALIGN is the alignment that the object would ordinarily have. The
26701 value of this macro is used instead of that alignment to align the
26702 object. */
26703
26704 unsigned int
26705 ix86_local_alignment (tree exp, enum machine_mode mode,
26706 unsigned int align)
26707 {
26708 tree type, decl;
26709
26710 if (exp && DECL_P (exp))
26711 {
26712 type = TREE_TYPE (exp);
26713 decl = exp;
26714 }
26715 else
26716 {
26717 type = exp;
26718 decl = NULL;
26719 }
26720
26721 /* Don't do dynamic stack realignment for long long objects with
26722 -mpreferred-stack-boundary=2. */
26723 if (!TARGET_64BIT
26724 && align == 64
26725 && ix86_preferred_stack_boundary < 64
26726 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26727 && (!type || !TYPE_USER_ALIGN (type))
26728 && (!decl || !DECL_USER_ALIGN (decl)))
26729 align = 32;
26730
26731 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26732 register in MODE. We will return the largest alignment of XF
26733 and DF. */
26734 if (!type)
26735 {
26736 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26737 align = GET_MODE_ALIGNMENT (DFmode);
26738 return align;
26739 }
26740
26741 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26742 to 16byte boundary. Exact wording is:
26743
26744 An array uses the same alignment as its elements, except that a local or
26745 global array variable of length at least 16 bytes or
26746 a C99 variable-length array variable always has alignment of at least 16 bytes.
26747
26748 This was added to allow use of aligned SSE instructions at arrays. This
26749 rule is meant for static storage (where compiler can not do the analysis
26750 by itself). We follow it for automatic variables only when convenient.
26751 We fully control everything in the function compiled and functions from
26752 other unit can not rely on the alignment.
26753
26754 Exclude va_list type. It is the common case of local array where
26755 we can not benefit from the alignment.
26756
26757 TODO: Probably one should optimize for size only when var is not escaping. */
26758 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26759 && TARGET_SSE)
26760 {
26761 if (AGGREGATE_TYPE_P (type)
26762 && (va_list_type_node == NULL_TREE
26763 || (TYPE_MAIN_VARIANT (type)
26764 != TYPE_MAIN_VARIANT (va_list_type_node)))
26765 && TYPE_SIZE (type)
26766 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26767 && wi::geu_p (TYPE_SIZE (type), 16)
26768 && align < 128)
26769 return 128;
26770 }
26771 if (TREE_CODE (type) == ARRAY_TYPE)
26772 {
26773 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26774 return 64;
26775 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26776 return 128;
26777 }
26778 else if (TREE_CODE (type) == COMPLEX_TYPE)
26779 {
26780 if (TYPE_MODE (type) == DCmode && align < 64)
26781 return 64;
26782 if ((TYPE_MODE (type) == XCmode
26783 || TYPE_MODE (type) == TCmode) && align < 128)
26784 return 128;
26785 }
26786 else if ((TREE_CODE (type) == RECORD_TYPE
26787 || TREE_CODE (type) == UNION_TYPE
26788 || TREE_CODE (type) == QUAL_UNION_TYPE)
26789 && TYPE_FIELDS (type))
26790 {
26791 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26792 return 64;
26793 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26794 return 128;
26795 }
26796 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26797 || TREE_CODE (type) == INTEGER_TYPE)
26798 {
26799
26800 if (TYPE_MODE (type) == DFmode && align < 64)
26801 return 64;
26802 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26803 return 128;
26804 }
26805 return align;
26806 }
26807
26808 /* Compute the minimum required alignment for dynamic stack realignment
26809 purposes for a local variable, parameter or a stack slot. EXP is
26810 the data type or decl itself, MODE is its mode and ALIGN is the
26811 alignment that the object would ordinarily have. */
26812
26813 unsigned int
26814 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26815 unsigned int align)
26816 {
26817 tree type, decl;
26818
26819 if (exp && DECL_P (exp))
26820 {
26821 type = TREE_TYPE (exp);
26822 decl = exp;
26823 }
26824 else
26825 {
26826 type = exp;
26827 decl = NULL;
26828 }
26829
26830 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26831 return align;
26832
26833 /* Don't do dynamic stack realignment for long long objects with
26834 -mpreferred-stack-boundary=2. */
26835 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26836 && (!type || !TYPE_USER_ALIGN (type))
26837 && (!decl || !DECL_USER_ALIGN (decl)))
26838 return 32;
26839
26840 return align;
26841 }
26842 \f
26843 /* Find a location for the static chain incoming to a nested function.
26844 This is a register, unless all free registers are used by arguments. */
26845
26846 static rtx
26847 ix86_static_chain (const_tree fndecl, bool incoming_p)
26848 {
26849 unsigned regno;
26850
26851 if (!DECL_STATIC_CHAIN (fndecl))
26852 return NULL;
26853
26854 if (TARGET_64BIT)
26855 {
26856 /* We always use R10 in 64-bit mode. */
26857 regno = R10_REG;
26858 }
26859 else
26860 {
26861 tree fntype;
26862 unsigned int ccvt;
26863
26864 /* By default in 32-bit mode we use ECX to pass the static chain. */
26865 regno = CX_REG;
26866
26867 fntype = TREE_TYPE (fndecl);
26868 ccvt = ix86_get_callcvt (fntype);
26869 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26870 {
26871 /* Fastcall functions use ecx/edx for arguments, which leaves
26872 us with EAX for the static chain.
26873 Thiscall functions use ecx for arguments, which also
26874 leaves us with EAX for the static chain. */
26875 regno = AX_REG;
26876 }
26877 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26878 {
26879 /* Thiscall functions use ecx for arguments, which leaves
26880 us with EAX and EDX for the static chain.
26881 We are using for abi-compatibility EAX. */
26882 regno = AX_REG;
26883 }
26884 else if (ix86_function_regparm (fntype, fndecl) == 3)
26885 {
26886 /* For regparm 3, we have no free call-clobbered registers in
26887 which to store the static chain. In order to implement this,
26888 we have the trampoline push the static chain to the stack.
26889 However, we can't push a value below the return address when
26890 we call the nested function directly, so we have to use an
26891 alternate entry point. For this we use ESI, and have the
26892 alternate entry point push ESI, so that things appear the
26893 same once we're executing the nested function. */
26894 if (incoming_p)
26895 {
26896 if (fndecl == current_function_decl)
26897 ix86_static_chain_on_stack = true;
26898 return gen_frame_mem (SImode,
26899 plus_constant (Pmode,
26900 arg_pointer_rtx, -8));
26901 }
26902 regno = SI_REG;
26903 }
26904 }
26905
26906 return gen_rtx_REG (Pmode, regno);
26907 }
26908
26909 /* Emit RTL insns to initialize the variable parts of a trampoline.
26910 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26911 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26912 to be passed to the target function. */
26913
26914 static void
26915 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26916 {
26917 rtx mem, fnaddr;
26918 int opcode;
26919 int offset = 0;
26920
26921 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26922
26923 if (TARGET_64BIT)
26924 {
26925 int size;
26926
26927 /* Load the function address to r11. Try to load address using
26928 the shorter movl instead of movabs. We may want to support
26929 movq for kernel mode, but kernel does not use trampolines at
26930 the moment. FNADDR is a 32bit address and may not be in
26931 DImode when ptr_mode == SImode. Always use movl in this
26932 case. */
26933 if (ptr_mode == SImode
26934 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26935 {
26936 fnaddr = copy_addr_to_reg (fnaddr);
26937
26938 mem = adjust_address (m_tramp, HImode, offset);
26939 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26940
26941 mem = adjust_address (m_tramp, SImode, offset + 2);
26942 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26943 offset += 6;
26944 }
26945 else
26946 {
26947 mem = adjust_address (m_tramp, HImode, offset);
26948 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26949
26950 mem = adjust_address (m_tramp, DImode, offset + 2);
26951 emit_move_insn (mem, fnaddr);
26952 offset += 10;
26953 }
26954
26955 /* Load static chain using movabs to r10. Use the shorter movl
26956 instead of movabs when ptr_mode == SImode. */
26957 if (ptr_mode == SImode)
26958 {
26959 opcode = 0xba41;
26960 size = 6;
26961 }
26962 else
26963 {
26964 opcode = 0xba49;
26965 size = 10;
26966 }
26967
26968 mem = adjust_address (m_tramp, HImode, offset);
26969 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26970
26971 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26972 emit_move_insn (mem, chain_value);
26973 offset += size;
26974
26975 /* Jump to r11; the last (unused) byte is a nop, only there to
26976 pad the write out to a single 32-bit store. */
26977 mem = adjust_address (m_tramp, SImode, offset);
26978 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26979 offset += 4;
26980 }
26981 else
26982 {
26983 rtx disp, chain;
26984
26985 /* Depending on the static chain location, either load a register
26986 with a constant, or push the constant to the stack. All of the
26987 instructions are the same size. */
26988 chain = ix86_static_chain (fndecl, true);
26989 if (REG_P (chain))
26990 {
26991 switch (REGNO (chain))
26992 {
26993 case AX_REG:
26994 opcode = 0xb8; break;
26995 case CX_REG:
26996 opcode = 0xb9; break;
26997 default:
26998 gcc_unreachable ();
26999 }
27000 }
27001 else
27002 opcode = 0x68;
27003
27004 mem = adjust_address (m_tramp, QImode, offset);
27005 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27006
27007 mem = adjust_address (m_tramp, SImode, offset + 1);
27008 emit_move_insn (mem, chain_value);
27009 offset += 5;
27010
27011 mem = adjust_address (m_tramp, QImode, offset);
27012 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27013
27014 mem = adjust_address (m_tramp, SImode, offset + 1);
27015
27016 /* Compute offset from the end of the jmp to the target function.
27017 In the case in which the trampoline stores the static chain on
27018 the stack, we need to skip the first insn which pushes the
27019 (call-saved) register static chain; this push is 1 byte. */
27020 offset += 5;
27021 disp = expand_binop (SImode, sub_optab, fnaddr,
27022 plus_constant (Pmode, XEXP (m_tramp, 0),
27023 offset - (MEM_P (chain) ? 1 : 0)),
27024 NULL_RTX, 1, OPTAB_DIRECT);
27025 emit_move_insn (mem, disp);
27026 }
27027
27028 gcc_assert (offset <= TRAMPOLINE_SIZE);
27029
27030 #ifdef HAVE_ENABLE_EXECUTE_STACK
27031 #ifdef CHECK_EXECUTE_STACK_ENABLED
27032 if (CHECK_EXECUTE_STACK_ENABLED)
27033 #endif
27034 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27035 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27036 #endif
27037 }
27038 \f
27039 /* The following file contains several enumerations and data structures
27040 built from the definitions in i386-builtin-types.def. */
27041
27042 #include "i386-builtin-types.inc"
27043
27044 /* Table for the ix86 builtin non-function types. */
27045 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27046
27047 /* Retrieve an element from the above table, building some of
27048 the types lazily. */
27049
27050 static tree
27051 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27052 {
27053 unsigned int index;
27054 tree type, itype;
27055
27056 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27057
27058 type = ix86_builtin_type_tab[(int) tcode];
27059 if (type != NULL)
27060 return type;
27061
27062 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27063 if (tcode <= IX86_BT_LAST_VECT)
27064 {
27065 enum machine_mode mode;
27066
27067 index = tcode - IX86_BT_LAST_PRIM - 1;
27068 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27069 mode = ix86_builtin_type_vect_mode[index];
27070
27071 type = build_vector_type_for_mode (itype, mode);
27072 }
27073 else
27074 {
27075 int quals;
27076
27077 index = tcode - IX86_BT_LAST_VECT - 1;
27078 if (tcode <= IX86_BT_LAST_PTR)
27079 quals = TYPE_UNQUALIFIED;
27080 else
27081 quals = TYPE_QUAL_CONST;
27082
27083 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27084 if (quals != TYPE_UNQUALIFIED)
27085 itype = build_qualified_type (itype, quals);
27086
27087 type = build_pointer_type (itype);
27088 }
27089
27090 ix86_builtin_type_tab[(int) tcode] = type;
27091 return type;
27092 }
27093
27094 /* Table for the ix86 builtin function types. */
27095 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27096
27097 /* Retrieve an element from the above table, building some of
27098 the types lazily. */
27099
27100 static tree
27101 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27102 {
27103 tree type;
27104
27105 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27106
27107 type = ix86_builtin_func_type_tab[(int) tcode];
27108 if (type != NULL)
27109 return type;
27110
27111 if (tcode <= IX86_BT_LAST_FUNC)
27112 {
27113 unsigned start = ix86_builtin_func_start[(int) tcode];
27114 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27115 tree rtype, atype, args = void_list_node;
27116 unsigned i;
27117
27118 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27119 for (i = after - 1; i > start; --i)
27120 {
27121 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27122 args = tree_cons (NULL, atype, args);
27123 }
27124
27125 type = build_function_type (rtype, args);
27126 }
27127 else
27128 {
27129 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27130 enum ix86_builtin_func_type icode;
27131
27132 icode = ix86_builtin_func_alias_base[index];
27133 type = ix86_get_builtin_func_type (icode);
27134 }
27135
27136 ix86_builtin_func_type_tab[(int) tcode] = type;
27137 return type;
27138 }
27139
27140
27141 /* Codes for all the SSE/MMX builtins. */
27142 enum ix86_builtins
27143 {
27144 IX86_BUILTIN_ADDPS,
27145 IX86_BUILTIN_ADDSS,
27146 IX86_BUILTIN_DIVPS,
27147 IX86_BUILTIN_DIVSS,
27148 IX86_BUILTIN_MULPS,
27149 IX86_BUILTIN_MULSS,
27150 IX86_BUILTIN_SUBPS,
27151 IX86_BUILTIN_SUBSS,
27152
27153 IX86_BUILTIN_CMPEQPS,
27154 IX86_BUILTIN_CMPLTPS,
27155 IX86_BUILTIN_CMPLEPS,
27156 IX86_BUILTIN_CMPGTPS,
27157 IX86_BUILTIN_CMPGEPS,
27158 IX86_BUILTIN_CMPNEQPS,
27159 IX86_BUILTIN_CMPNLTPS,
27160 IX86_BUILTIN_CMPNLEPS,
27161 IX86_BUILTIN_CMPNGTPS,
27162 IX86_BUILTIN_CMPNGEPS,
27163 IX86_BUILTIN_CMPORDPS,
27164 IX86_BUILTIN_CMPUNORDPS,
27165 IX86_BUILTIN_CMPEQSS,
27166 IX86_BUILTIN_CMPLTSS,
27167 IX86_BUILTIN_CMPLESS,
27168 IX86_BUILTIN_CMPNEQSS,
27169 IX86_BUILTIN_CMPNLTSS,
27170 IX86_BUILTIN_CMPNLESS,
27171 IX86_BUILTIN_CMPORDSS,
27172 IX86_BUILTIN_CMPUNORDSS,
27173
27174 IX86_BUILTIN_COMIEQSS,
27175 IX86_BUILTIN_COMILTSS,
27176 IX86_BUILTIN_COMILESS,
27177 IX86_BUILTIN_COMIGTSS,
27178 IX86_BUILTIN_COMIGESS,
27179 IX86_BUILTIN_COMINEQSS,
27180 IX86_BUILTIN_UCOMIEQSS,
27181 IX86_BUILTIN_UCOMILTSS,
27182 IX86_BUILTIN_UCOMILESS,
27183 IX86_BUILTIN_UCOMIGTSS,
27184 IX86_BUILTIN_UCOMIGESS,
27185 IX86_BUILTIN_UCOMINEQSS,
27186
27187 IX86_BUILTIN_CVTPI2PS,
27188 IX86_BUILTIN_CVTPS2PI,
27189 IX86_BUILTIN_CVTSI2SS,
27190 IX86_BUILTIN_CVTSI642SS,
27191 IX86_BUILTIN_CVTSS2SI,
27192 IX86_BUILTIN_CVTSS2SI64,
27193 IX86_BUILTIN_CVTTPS2PI,
27194 IX86_BUILTIN_CVTTSS2SI,
27195 IX86_BUILTIN_CVTTSS2SI64,
27196
27197 IX86_BUILTIN_MAXPS,
27198 IX86_BUILTIN_MAXSS,
27199 IX86_BUILTIN_MINPS,
27200 IX86_BUILTIN_MINSS,
27201
27202 IX86_BUILTIN_LOADUPS,
27203 IX86_BUILTIN_STOREUPS,
27204 IX86_BUILTIN_MOVSS,
27205
27206 IX86_BUILTIN_MOVHLPS,
27207 IX86_BUILTIN_MOVLHPS,
27208 IX86_BUILTIN_LOADHPS,
27209 IX86_BUILTIN_LOADLPS,
27210 IX86_BUILTIN_STOREHPS,
27211 IX86_BUILTIN_STORELPS,
27212
27213 IX86_BUILTIN_MASKMOVQ,
27214 IX86_BUILTIN_MOVMSKPS,
27215 IX86_BUILTIN_PMOVMSKB,
27216
27217 IX86_BUILTIN_MOVNTPS,
27218 IX86_BUILTIN_MOVNTQ,
27219
27220 IX86_BUILTIN_LOADDQU,
27221 IX86_BUILTIN_STOREDQU,
27222
27223 IX86_BUILTIN_PACKSSWB,
27224 IX86_BUILTIN_PACKSSDW,
27225 IX86_BUILTIN_PACKUSWB,
27226
27227 IX86_BUILTIN_PADDB,
27228 IX86_BUILTIN_PADDW,
27229 IX86_BUILTIN_PADDD,
27230 IX86_BUILTIN_PADDQ,
27231 IX86_BUILTIN_PADDSB,
27232 IX86_BUILTIN_PADDSW,
27233 IX86_BUILTIN_PADDUSB,
27234 IX86_BUILTIN_PADDUSW,
27235 IX86_BUILTIN_PSUBB,
27236 IX86_BUILTIN_PSUBW,
27237 IX86_BUILTIN_PSUBD,
27238 IX86_BUILTIN_PSUBQ,
27239 IX86_BUILTIN_PSUBSB,
27240 IX86_BUILTIN_PSUBSW,
27241 IX86_BUILTIN_PSUBUSB,
27242 IX86_BUILTIN_PSUBUSW,
27243
27244 IX86_BUILTIN_PAND,
27245 IX86_BUILTIN_PANDN,
27246 IX86_BUILTIN_POR,
27247 IX86_BUILTIN_PXOR,
27248
27249 IX86_BUILTIN_PAVGB,
27250 IX86_BUILTIN_PAVGW,
27251
27252 IX86_BUILTIN_PCMPEQB,
27253 IX86_BUILTIN_PCMPEQW,
27254 IX86_BUILTIN_PCMPEQD,
27255 IX86_BUILTIN_PCMPGTB,
27256 IX86_BUILTIN_PCMPGTW,
27257 IX86_BUILTIN_PCMPGTD,
27258
27259 IX86_BUILTIN_PMADDWD,
27260
27261 IX86_BUILTIN_PMAXSW,
27262 IX86_BUILTIN_PMAXUB,
27263 IX86_BUILTIN_PMINSW,
27264 IX86_BUILTIN_PMINUB,
27265
27266 IX86_BUILTIN_PMULHUW,
27267 IX86_BUILTIN_PMULHW,
27268 IX86_BUILTIN_PMULLW,
27269
27270 IX86_BUILTIN_PSADBW,
27271 IX86_BUILTIN_PSHUFW,
27272
27273 IX86_BUILTIN_PSLLW,
27274 IX86_BUILTIN_PSLLD,
27275 IX86_BUILTIN_PSLLQ,
27276 IX86_BUILTIN_PSRAW,
27277 IX86_BUILTIN_PSRAD,
27278 IX86_BUILTIN_PSRLW,
27279 IX86_BUILTIN_PSRLD,
27280 IX86_BUILTIN_PSRLQ,
27281 IX86_BUILTIN_PSLLWI,
27282 IX86_BUILTIN_PSLLDI,
27283 IX86_BUILTIN_PSLLQI,
27284 IX86_BUILTIN_PSRAWI,
27285 IX86_BUILTIN_PSRADI,
27286 IX86_BUILTIN_PSRLWI,
27287 IX86_BUILTIN_PSRLDI,
27288 IX86_BUILTIN_PSRLQI,
27289
27290 IX86_BUILTIN_PUNPCKHBW,
27291 IX86_BUILTIN_PUNPCKHWD,
27292 IX86_BUILTIN_PUNPCKHDQ,
27293 IX86_BUILTIN_PUNPCKLBW,
27294 IX86_BUILTIN_PUNPCKLWD,
27295 IX86_BUILTIN_PUNPCKLDQ,
27296
27297 IX86_BUILTIN_SHUFPS,
27298
27299 IX86_BUILTIN_RCPPS,
27300 IX86_BUILTIN_RCPSS,
27301 IX86_BUILTIN_RSQRTPS,
27302 IX86_BUILTIN_RSQRTPS_NR,
27303 IX86_BUILTIN_RSQRTSS,
27304 IX86_BUILTIN_RSQRTF,
27305 IX86_BUILTIN_SQRTPS,
27306 IX86_BUILTIN_SQRTPS_NR,
27307 IX86_BUILTIN_SQRTSS,
27308
27309 IX86_BUILTIN_UNPCKHPS,
27310 IX86_BUILTIN_UNPCKLPS,
27311
27312 IX86_BUILTIN_ANDPS,
27313 IX86_BUILTIN_ANDNPS,
27314 IX86_BUILTIN_ORPS,
27315 IX86_BUILTIN_XORPS,
27316
27317 IX86_BUILTIN_EMMS,
27318 IX86_BUILTIN_LDMXCSR,
27319 IX86_BUILTIN_STMXCSR,
27320 IX86_BUILTIN_SFENCE,
27321
27322 IX86_BUILTIN_FXSAVE,
27323 IX86_BUILTIN_FXRSTOR,
27324 IX86_BUILTIN_FXSAVE64,
27325 IX86_BUILTIN_FXRSTOR64,
27326
27327 IX86_BUILTIN_XSAVE,
27328 IX86_BUILTIN_XRSTOR,
27329 IX86_BUILTIN_XSAVE64,
27330 IX86_BUILTIN_XRSTOR64,
27331
27332 IX86_BUILTIN_XSAVEOPT,
27333 IX86_BUILTIN_XSAVEOPT64,
27334
27335 IX86_BUILTIN_XSAVEC,
27336 IX86_BUILTIN_XSAVEC64,
27337
27338 IX86_BUILTIN_XSAVES,
27339 IX86_BUILTIN_XRSTORS,
27340 IX86_BUILTIN_XSAVES64,
27341 IX86_BUILTIN_XRSTORS64,
27342
27343 /* 3DNow! Original */
27344 IX86_BUILTIN_FEMMS,
27345 IX86_BUILTIN_PAVGUSB,
27346 IX86_BUILTIN_PF2ID,
27347 IX86_BUILTIN_PFACC,
27348 IX86_BUILTIN_PFADD,
27349 IX86_BUILTIN_PFCMPEQ,
27350 IX86_BUILTIN_PFCMPGE,
27351 IX86_BUILTIN_PFCMPGT,
27352 IX86_BUILTIN_PFMAX,
27353 IX86_BUILTIN_PFMIN,
27354 IX86_BUILTIN_PFMUL,
27355 IX86_BUILTIN_PFRCP,
27356 IX86_BUILTIN_PFRCPIT1,
27357 IX86_BUILTIN_PFRCPIT2,
27358 IX86_BUILTIN_PFRSQIT1,
27359 IX86_BUILTIN_PFRSQRT,
27360 IX86_BUILTIN_PFSUB,
27361 IX86_BUILTIN_PFSUBR,
27362 IX86_BUILTIN_PI2FD,
27363 IX86_BUILTIN_PMULHRW,
27364
27365 /* 3DNow! Athlon Extensions */
27366 IX86_BUILTIN_PF2IW,
27367 IX86_BUILTIN_PFNACC,
27368 IX86_BUILTIN_PFPNACC,
27369 IX86_BUILTIN_PI2FW,
27370 IX86_BUILTIN_PSWAPDSI,
27371 IX86_BUILTIN_PSWAPDSF,
27372
27373 /* SSE2 */
27374 IX86_BUILTIN_ADDPD,
27375 IX86_BUILTIN_ADDSD,
27376 IX86_BUILTIN_DIVPD,
27377 IX86_BUILTIN_DIVSD,
27378 IX86_BUILTIN_MULPD,
27379 IX86_BUILTIN_MULSD,
27380 IX86_BUILTIN_SUBPD,
27381 IX86_BUILTIN_SUBSD,
27382
27383 IX86_BUILTIN_CMPEQPD,
27384 IX86_BUILTIN_CMPLTPD,
27385 IX86_BUILTIN_CMPLEPD,
27386 IX86_BUILTIN_CMPGTPD,
27387 IX86_BUILTIN_CMPGEPD,
27388 IX86_BUILTIN_CMPNEQPD,
27389 IX86_BUILTIN_CMPNLTPD,
27390 IX86_BUILTIN_CMPNLEPD,
27391 IX86_BUILTIN_CMPNGTPD,
27392 IX86_BUILTIN_CMPNGEPD,
27393 IX86_BUILTIN_CMPORDPD,
27394 IX86_BUILTIN_CMPUNORDPD,
27395 IX86_BUILTIN_CMPEQSD,
27396 IX86_BUILTIN_CMPLTSD,
27397 IX86_BUILTIN_CMPLESD,
27398 IX86_BUILTIN_CMPNEQSD,
27399 IX86_BUILTIN_CMPNLTSD,
27400 IX86_BUILTIN_CMPNLESD,
27401 IX86_BUILTIN_CMPORDSD,
27402 IX86_BUILTIN_CMPUNORDSD,
27403
27404 IX86_BUILTIN_COMIEQSD,
27405 IX86_BUILTIN_COMILTSD,
27406 IX86_BUILTIN_COMILESD,
27407 IX86_BUILTIN_COMIGTSD,
27408 IX86_BUILTIN_COMIGESD,
27409 IX86_BUILTIN_COMINEQSD,
27410 IX86_BUILTIN_UCOMIEQSD,
27411 IX86_BUILTIN_UCOMILTSD,
27412 IX86_BUILTIN_UCOMILESD,
27413 IX86_BUILTIN_UCOMIGTSD,
27414 IX86_BUILTIN_UCOMIGESD,
27415 IX86_BUILTIN_UCOMINEQSD,
27416
27417 IX86_BUILTIN_MAXPD,
27418 IX86_BUILTIN_MAXSD,
27419 IX86_BUILTIN_MINPD,
27420 IX86_BUILTIN_MINSD,
27421
27422 IX86_BUILTIN_ANDPD,
27423 IX86_BUILTIN_ANDNPD,
27424 IX86_BUILTIN_ORPD,
27425 IX86_BUILTIN_XORPD,
27426
27427 IX86_BUILTIN_SQRTPD,
27428 IX86_BUILTIN_SQRTSD,
27429
27430 IX86_BUILTIN_UNPCKHPD,
27431 IX86_BUILTIN_UNPCKLPD,
27432
27433 IX86_BUILTIN_SHUFPD,
27434
27435 IX86_BUILTIN_LOADUPD,
27436 IX86_BUILTIN_STOREUPD,
27437 IX86_BUILTIN_MOVSD,
27438
27439 IX86_BUILTIN_LOADHPD,
27440 IX86_BUILTIN_LOADLPD,
27441
27442 IX86_BUILTIN_CVTDQ2PD,
27443 IX86_BUILTIN_CVTDQ2PS,
27444
27445 IX86_BUILTIN_CVTPD2DQ,
27446 IX86_BUILTIN_CVTPD2PI,
27447 IX86_BUILTIN_CVTPD2PS,
27448 IX86_BUILTIN_CVTTPD2DQ,
27449 IX86_BUILTIN_CVTTPD2PI,
27450
27451 IX86_BUILTIN_CVTPI2PD,
27452 IX86_BUILTIN_CVTSI2SD,
27453 IX86_BUILTIN_CVTSI642SD,
27454
27455 IX86_BUILTIN_CVTSD2SI,
27456 IX86_BUILTIN_CVTSD2SI64,
27457 IX86_BUILTIN_CVTSD2SS,
27458 IX86_BUILTIN_CVTSS2SD,
27459 IX86_BUILTIN_CVTTSD2SI,
27460 IX86_BUILTIN_CVTTSD2SI64,
27461
27462 IX86_BUILTIN_CVTPS2DQ,
27463 IX86_BUILTIN_CVTPS2PD,
27464 IX86_BUILTIN_CVTTPS2DQ,
27465
27466 IX86_BUILTIN_MOVNTI,
27467 IX86_BUILTIN_MOVNTI64,
27468 IX86_BUILTIN_MOVNTPD,
27469 IX86_BUILTIN_MOVNTDQ,
27470
27471 IX86_BUILTIN_MOVQ128,
27472
27473 /* SSE2 MMX */
27474 IX86_BUILTIN_MASKMOVDQU,
27475 IX86_BUILTIN_MOVMSKPD,
27476 IX86_BUILTIN_PMOVMSKB128,
27477
27478 IX86_BUILTIN_PACKSSWB128,
27479 IX86_BUILTIN_PACKSSDW128,
27480 IX86_BUILTIN_PACKUSWB128,
27481
27482 IX86_BUILTIN_PADDB128,
27483 IX86_BUILTIN_PADDW128,
27484 IX86_BUILTIN_PADDD128,
27485 IX86_BUILTIN_PADDQ128,
27486 IX86_BUILTIN_PADDSB128,
27487 IX86_BUILTIN_PADDSW128,
27488 IX86_BUILTIN_PADDUSB128,
27489 IX86_BUILTIN_PADDUSW128,
27490 IX86_BUILTIN_PSUBB128,
27491 IX86_BUILTIN_PSUBW128,
27492 IX86_BUILTIN_PSUBD128,
27493 IX86_BUILTIN_PSUBQ128,
27494 IX86_BUILTIN_PSUBSB128,
27495 IX86_BUILTIN_PSUBSW128,
27496 IX86_BUILTIN_PSUBUSB128,
27497 IX86_BUILTIN_PSUBUSW128,
27498
27499 IX86_BUILTIN_PAND128,
27500 IX86_BUILTIN_PANDN128,
27501 IX86_BUILTIN_POR128,
27502 IX86_BUILTIN_PXOR128,
27503
27504 IX86_BUILTIN_PAVGB128,
27505 IX86_BUILTIN_PAVGW128,
27506
27507 IX86_BUILTIN_PCMPEQB128,
27508 IX86_BUILTIN_PCMPEQW128,
27509 IX86_BUILTIN_PCMPEQD128,
27510 IX86_BUILTIN_PCMPGTB128,
27511 IX86_BUILTIN_PCMPGTW128,
27512 IX86_BUILTIN_PCMPGTD128,
27513
27514 IX86_BUILTIN_PMADDWD128,
27515
27516 IX86_BUILTIN_PMAXSW128,
27517 IX86_BUILTIN_PMAXUB128,
27518 IX86_BUILTIN_PMINSW128,
27519 IX86_BUILTIN_PMINUB128,
27520
27521 IX86_BUILTIN_PMULUDQ,
27522 IX86_BUILTIN_PMULUDQ128,
27523 IX86_BUILTIN_PMULHUW128,
27524 IX86_BUILTIN_PMULHW128,
27525 IX86_BUILTIN_PMULLW128,
27526
27527 IX86_BUILTIN_PSADBW128,
27528 IX86_BUILTIN_PSHUFHW,
27529 IX86_BUILTIN_PSHUFLW,
27530 IX86_BUILTIN_PSHUFD,
27531
27532 IX86_BUILTIN_PSLLDQI128,
27533 IX86_BUILTIN_PSLLWI128,
27534 IX86_BUILTIN_PSLLDI128,
27535 IX86_BUILTIN_PSLLQI128,
27536 IX86_BUILTIN_PSRAWI128,
27537 IX86_BUILTIN_PSRADI128,
27538 IX86_BUILTIN_PSRLDQI128,
27539 IX86_BUILTIN_PSRLWI128,
27540 IX86_BUILTIN_PSRLDI128,
27541 IX86_BUILTIN_PSRLQI128,
27542
27543 IX86_BUILTIN_PSLLDQ128,
27544 IX86_BUILTIN_PSLLW128,
27545 IX86_BUILTIN_PSLLD128,
27546 IX86_BUILTIN_PSLLQ128,
27547 IX86_BUILTIN_PSRAW128,
27548 IX86_BUILTIN_PSRAD128,
27549 IX86_BUILTIN_PSRLW128,
27550 IX86_BUILTIN_PSRLD128,
27551 IX86_BUILTIN_PSRLQ128,
27552
27553 IX86_BUILTIN_PUNPCKHBW128,
27554 IX86_BUILTIN_PUNPCKHWD128,
27555 IX86_BUILTIN_PUNPCKHDQ128,
27556 IX86_BUILTIN_PUNPCKHQDQ128,
27557 IX86_BUILTIN_PUNPCKLBW128,
27558 IX86_BUILTIN_PUNPCKLWD128,
27559 IX86_BUILTIN_PUNPCKLDQ128,
27560 IX86_BUILTIN_PUNPCKLQDQ128,
27561
27562 IX86_BUILTIN_CLFLUSH,
27563 IX86_BUILTIN_MFENCE,
27564 IX86_BUILTIN_LFENCE,
27565 IX86_BUILTIN_PAUSE,
27566
27567 IX86_BUILTIN_FNSTENV,
27568 IX86_BUILTIN_FLDENV,
27569 IX86_BUILTIN_FNSTSW,
27570 IX86_BUILTIN_FNCLEX,
27571
27572 IX86_BUILTIN_BSRSI,
27573 IX86_BUILTIN_BSRDI,
27574 IX86_BUILTIN_RDPMC,
27575 IX86_BUILTIN_RDTSC,
27576 IX86_BUILTIN_RDTSCP,
27577 IX86_BUILTIN_ROLQI,
27578 IX86_BUILTIN_ROLHI,
27579 IX86_BUILTIN_RORQI,
27580 IX86_BUILTIN_RORHI,
27581
27582 /* SSE3. */
27583 IX86_BUILTIN_ADDSUBPS,
27584 IX86_BUILTIN_HADDPS,
27585 IX86_BUILTIN_HSUBPS,
27586 IX86_BUILTIN_MOVSHDUP,
27587 IX86_BUILTIN_MOVSLDUP,
27588 IX86_BUILTIN_ADDSUBPD,
27589 IX86_BUILTIN_HADDPD,
27590 IX86_BUILTIN_HSUBPD,
27591 IX86_BUILTIN_LDDQU,
27592
27593 IX86_BUILTIN_MONITOR,
27594 IX86_BUILTIN_MWAIT,
27595
27596 /* SSSE3. */
27597 IX86_BUILTIN_PHADDW,
27598 IX86_BUILTIN_PHADDD,
27599 IX86_BUILTIN_PHADDSW,
27600 IX86_BUILTIN_PHSUBW,
27601 IX86_BUILTIN_PHSUBD,
27602 IX86_BUILTIN_PHSUBSW,
27603 IX86_BUILTIN_PMADDUBSW,
27604 IX86_BUILTIN_PMULHRSW,
27605 IX86_BUILTIN_PSHUFB,
27606 IX86_BUILTIN_PSIGNB,
27607 IX86_BUILTIN_PSIGNW,
27608 IX86_BUILTIN_PSIGND,
27609 IX86_BUILTIN_PALIGNR,
27610 IX86_BUILTIN_PABSB,
27611 IX86_BUILTIN_PABSW,
27612 IX86_BUILTIN_PABSD,
27613
27614 IX86_BUILTIN_PHADDW128,
27615 IX86_BUILTIN_PHADDD128,
27616 IX86_BUILTIN_PHADDSW128,
27617 IX86_BUILTIN_PHSUBW128,
27618 IX86_BUILTIN_PHSUBD128,
27619 IX86_BUILTIN_PHSUBSW128,
27620 IX86_BUILTIN_PMADDUBSW128,
27621 IX86_BUILTIN_PMULHRSW128,
27622 IX86_BUILTIN_PSHUFB128,
27623 IX86_BUILTIN_PSIGNB128,
27624 IX86_BUILTIN_PSIGNW128,
27625 IX86_BUILTIN_PSIGND128,
27626 IX86_BUILTIN_PALIGNR128,
27627 IX86_BUILTIN_PABSB128,
27628 IX86_BUILTIN_PABSW128,
27629 IX86_BUILTIN_PABSD128,
27630
27631 /* AMDFAM10 - SSE4A New Instructions. */
27632 IX86_BUILTIN_MOVNTSD,
27633 IX86_BUILTIN_MOVNTSS,
27634 IX86_BUILTIN_EXTRQI,
27635 IX86_BUILTIN_EXTRQ,
27636 IX86_BUILTIN_INSERTQI,
27637 IX86_BUILTIN_INSERTQ,
27638
27639 /* SSE4.1. */
27640 IX86_BUILTIN_BLENDPD,
27641 IX86_BUILTIN_BLENDPS,
27642 IX86_BUILTIN_BLENDVPD,
27643 IX86_BUILTIN_BLENDVPS,
27644 IX86_BUILTIN_PBLENDVB128,
27645 IX86_BUILTIN_PBLENDW128,
27646
27647 IX86_BUILTIN_DPPD,
27648 IX86_BUILTIN_DPPS,
27649
27650 IX86_BUILTIN_INSERTPS128,
27651
27652 IX86_BUILTIN_MOVNTDQA,
27653 IX86_BUILTIN_MPSADBW128,
27654 IX86_BUILTIN_PACKUSDW128,
27655 IX86_BUILTIN_PCMPEQQ,
27656 IX86_BUILTIN_PHMINPOSUW128,
27657
27658 IX86_BUILTIN_PMAXSB128,
27659 IX86_BUILTIN_PMAXSD128,
27660 IX86_BUILTIN_PMAXUD128,
27661 IX86_BUILTIN_PMAXUW128,
27662
27663 IX86_BUILTIN_PMINSB128,
27664 IX86_BUILTIN_PMINSD128,
27665 IX86_BUILTIN_PMINUD128,
27666 IX86_BUILTIN_PMINUW128,
27667
27668 IX86_BUILTIN_PMOVSXBW128,
27669 IX86_BUILTIN_PMOVSXBD128,
27670 IX86_BUILTIN_PMOVSXBQ128,
27671 IX86_BUILTIN_PMOVSXWD128,
27672 IX86_BUILTIN_PMOVSXWQ128,
27673 IX86_BUILTIN_PMOVSXDQ128,
27674
27675 IX86_BUILTIN_PMOVZXBW128,
27676 IX86_BUILTIN_PMOVZXBD128,
27677 IX86_BUILTIN_PMOVZXBQ128,
27678 IX86_BUILTIN_PMOVZXWD128,
27679 IX86_BUILTIN_PMOVZXWQ128,
27680 IX86_BUILTIN_PMOVZXDQ128,
27681
27682 IX86_BUILTIN_PMULDQ128,
27683 IX86_BUILTIN_PMULLD128,
27684
27685 IX86_BUILTIN_ROUNDSD,
27686 IX86_BUILTIN_ROUNDSS,
27687
27688 IX86_BUILTIN_ROUNDPD,
27689 IX86_BUILTIN_ROUNDPS,
27690
27691 IX86_BUILTIN_FLOORPD,
27692 IX86_BUILTIN_CEILPD,
27693 IX86_BUILTIN_TRUNCPD,
27694 IX86_BUILTIN_RINTPD,
27695 IX86_BUILTIN_ROUNDPD_AZ,
27696
27697 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27698 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27699 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27700
27701 IX86_BUILTIN_FLOORPS,
27702 IX86_BUILTIN_CEILPS,
27703 IX86_BUILTIN_TRUNCPS,
27704 IX86_BUILTIN_RINTPS,
27705 IX86_BUILTIN_ROUNDPS_AZ,
27706
27707 IX86_BUILTIN_FLOORPS_SFIX,
27708 IX86_BUILTIN_CEILPS_SFIX,
27709 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27710
27711 IX86_BUILTIN_PTESTZ,
27712 IX86_BUILTIN_PTESTC,
27713 IX86_BUILTIN_PTESTNZC,
27714
27715 IX86_BUILTIN_VEC_INIT_V2SI,
27716 IX86_BUILTIN_VEC_INIT_V4HI,
27717 IX86_BUILTIN_VEC_INIT_V8QI,
27718 IX86_BUILTIN_VEC_EXT_V2DF,
27719 IX86_BUILTIN_VEC_EXT_V2DI,
27720 IX86_BUILTIN_VEC_EXT_V4SF,
27721 IX86_BUILTIN_VEC_EXT_V4SI,
27722 IX86_BUILTIN_VEC_EXT_V8HI,
27723 IX86_BUILTIN_VEC_EXT_V2SI,
27724 IX86_BUILTIN_VEC_EXT_V4HI,
27725 IX86_BUILTIN_VEC_EXT_V16QI,
27726 IX86_BUILTIN_VEC_SET_V2DI,
27727 IX86_BUILTIN_VEC_SET_V4SF,
27728 IX86_BUILTIN_VEC_SET_V4SI,
27729 IX86_BUILTIN_VEC_SET_V8HI,
27730 IX86_BUILTIN_VEC_SET_V4HI,
27731 IX86_BUILTIN_VEC_SET_V16QI,
27732
27733 IX86_BUILTIN_VEC_PACK_SFIX,
27734 IX86_BUILTIN_VEC_PACK_SFIX256,
27735
27736 /* SSE4.2. */
27737 IX86_BUILTIN_CRC32QI,
27738 IX86_BUILTIN_CRC32HI,
27739 IX86_BUILTIN_CRC32SI,
27740 IX86_BUILTIN_CRC32DI,
27741
27742 IX86_BUILTIN_PCMPESTRI128,
27743 IX86_BUILTIN_PCMPESTRM128,
27744 IX86_BUILTIN_PCMPESTRA128,
27745 IX86_BUILTIN_PCMPESTRC128,
27746 IX86_BUILTIN_PCMPESTRO128,
27747 IX86_BUILTIN_PCMPESTRS128,
27748 IX86_BUILTIN_PCMPESTRZ128,
27749 IX86_BUILTIN_PCMPISTRI128,
27750 IX86_BUILTIN_PCMPISTRM128,
27751 IX86_BUILTIN_PCMPISTRA128,
27752 IX86_BUILTIN_PCMPISTRC128,
27753 IX86_BUILTIN_PCMPISTRO128,
27754 IX86_BUILTIN_PCMPISTRS128,
27755 IX86_BUILTIN_PCMPISTRZ128,
27756
27757 IX86_BUILTIN_PCMPGTQ,
27758
27759 /* AES instructions */
27760 IX86_BUILTIN_AESENC128,
27761 IX86_BUILTIN_AESENCLAST128,
27762 IX86_BUILTIN_AESDEC128,
27763 IX86_BUILTIN_AESDECLAST128,
27764 IX86_BUILTIN_AESIMC128,
27765 IX86_BUILTIN_AESKEYGENASSIST128,
27766
27767 /* PCLMUL instruction */
27768 IX86_BUILTIN_PCLMULQDQ128,
27769
27770 /* AVX */
27771 IX86_BUILTIN_ADDPD256,
27772 IX86_BUILTIN_ADDPS256,
27773 IX86_BUILTIN_ADDSUBPD256,
27774 IX86_BUILTIN_ADDSUBPS256,
27775 IX86_BUILTIN_ANDPD256,
27776 IX86_BUILTIN_ANDPS256,
27777 IX86_BUILTIN_ANDNPD256,
27778 IX86_BUILTIN_ANDNPS256,
27779 IX86_BUILTIN_BLENDPD256,
27780 IX86_BUILTIN_BLENDPS256,
27781 IX86_BUILTIN_BLENDVPD256,
27782 IX86_BUILTIN_BLENDVPS256,
27783 IX86_BUILTIN_DIVPD256,
27784 IX86_BUILTIN_DIVPS256,
27785 IX86_BUILTIN_DPPS256,
27786 IX86_BUILTIN_HADDPD256,
27787 IX86_BUILTIN_HADDPS256,
27788 IX86_BUILTIN_HSUBPD256,
27789 IX86_BUILTIN_HSUBPS256,
27790 IX86_BUILTIN_MAXPD256,
27791 IX86_BUILTIN_MAXPS256,
27792 IX86_BUILTIN_MINPD256,
27793 IX86_BUILTIN_MINPS256,
27794 IX86_BUILTIN_MULPD256,
27795 IX86_BUILTIN_MULPS256,
27796 IX86_BUILTIN_ORPD256,
27797 IX86_BUILTIN_ORPS256,
27798 IX86_BUILTIN_SHUFPD256,
27799 IX86_BUILTIN_SHUFPS256,
27800 IX86_BUILTIN_SUBPD256,
27801 IX86_BUILTIN_SUBPS256,
27802 IX86_BUILTIN_XORPD256,
27803 IX86_BUILTIN_XORPS256,
27804 IX86_BUILTIN_CMPSD,
27805 IX86_BUILTIN_CMPSS,
27806 IX86_BUILTIN_CMPPD,
27807 IX86_BUILTIN_CMPPS,
27808 IX86_BUILTIN_CMPPD256,
27809 IX86_BUILTIN_CMPPS256,
27810 IX86_BUILTIN_CVTDQ2PD256,
27811 IX86_BUILTIN_CVTDQ2PS256,
27812 IX86_BUILTIN_CVTPD2PS256,
27813 IX86_BUILTIN_CVTPS2DQ256,
27814 IX86_BUILTIN_CVTPS2PD256,
27815 IX86_BUILTIN_CVTTPD2DQ256,
27816 IX86_BUILTIN_CVTPD2DQ256,
27817 IX86_BUILTIN_CVTTPS2DQ256,
27818 IX86_BUILTIN_EXTRACTF128PD256,
27819 IX86_BUILTIN_EXTRACTF128PS256,
27820 IX86_BUILTIN_EXTRACTF128SI256,
27821 IX86_BUILTIN_VZEROALL,
27822 IX86_BUILTIN_VZEROUPPER,
27823 IX86_BUILTIN_VPERMILVARPD,
27824 IX86_BUILTIN_VPERMILVARPS,
27825 IX86_BUILTIN_VPERMILVARPD256,
27826 IX86_BUILTIN_VPERMILVARPS256,
27827 IX86_BUILTIN_VPERMILPD,
27828 IX86_BUILTIN_VPERMILPS,
27829 IX86_BUILTIN_VPERMILPD256,
27830 IX86_BUILTIN_VPERMILPS256,
27831 IX86_BUILTIN_VPERMIL2PD,
27832 IX86_BUILTIN_VPERMIL2PS,
27833 IX86_BUILTIN_VPERMIL2PD256,
27834 IX86_BUILTIN_VPERMIL2PS256,
27835 IX86_BUILTIN_VPERM2F128PD256,
27836 IX86_BUILTIN_VPERM2F128PS256,
27837 IX86_BUILTIN_VPERM2F128SI256,
27838 IX86_BUILTIN_VBROADCASTSS,
27839 IX86_BUILTIN_VBROADCASTSD256,
27840 IX86_BUILTIN_VBROADCASTSS256,
27841 IX86_BUILTIN_VBROADCASTPD256,
27842 IX86_BUILTIN_VBROADCASTPS256,
27843 IX86_BUILTIN_VINSERTF128PD256,
27844 IX86_BUILTIN_VINSERTF128PS256,
27845 IX86_BUILTIN_VINSERTF128SI256,
27846 IX86_BUILTIN_LOADUPD256,
27847 IX86_BUILTIN_LOADUPS256,
27848 IX86_BUILTIN_STOREUPD256,
27849 IX86_BUILTIN_STOREUPS256,
27850 IX86_BUILTIN_LDDQU256,
27851 IX86_BUILTIN_MOVNTDQ256,
27852 IX86_BUILTIN_MOVNTPD256,
27853 IX86_BUILTIN_MOVNTPS256,
27854 IX86_BUILTIN_LOADDQU256,
27855 IX86_BUILTIN_STOREDQU256,
27856 IX86_BUILTIN_MASKLOADPD,
27857 IX86_BUILTIN_MASKLOADPS,
27858 IX86_BUILTIN_MASKSTOREPD,
27859 IX86_BUILTIN_MASKSTOREPS,
27860 IX86_BUILTIN_MASKLOADPD256,
27861 IX86_BUILTIN_MASKLOADPS256,
27862 IX86_BUILTIN_MASKSTOREPD256,
27863 IX86_BUILTIN_MASKSTOREPS256,
27864 IX86_BUILTIN_MOVSHDUP256,
27865 IX86_BUILTIN_MOVSLDUP256,
27866 IX86_BUILTIN_MOVDDUP256,
27867
27868 IX86_BUILTIN_SQRTPD256,
27869 IX86_BUILTIN_SQRTPS256,
27870 IX86_BUILTIN_SQRTPS_NR256,
27871 IX86_BUILTIN_RSQRTPS256,
27872 IX86_BUILTIN_RSQRTPS_NR256,
27873
27874 IX86_BUILTIN_RCPPS256,
27875
27876 IX86_BUILTIN_ROUNDPD256,
27877 IX86_BUILTIN_ROUNDPS256,
27878
27879 IX86_BUILTIN_FLOORPD256,
27880 IX86_BUILTIN_CEILPD256,
27881 IX86_BUILTIN_TRUNCPD256,
27882 IX86_BUILTIN_RINTPD256,
27883 IX86_BUILTIN_ROUNDPD_AZ256,
27884
27885 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27886 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27887 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27888
27889 IX86_BUILTIN_FLOORPS256,
27890 IX86_BUILTIN_CEILPS256,
27891 IX86_BUILTIN_TRUNCPS256,
27892 IX86_BUILTIN_RINTPS256,
27893 IX86_BUILTIN_ROUNDPS_AZ256,
27894
27895 IX86_BUILTIN_FLOORPS_SFIX256,
27896 IX86_BUILTIN_CEILPS_SFIX256,
27897 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27898
27899 IX86_BUILTIN_UNPCKHPD256,
27900 IX86_BUILTIN_UNPCKLPD256,
27901 IX86_BUILTIN_UNPCKHPS256,
27902 IX86_BUILTIN_UNPCKLPS256,
27903
27904 IX86_BUILTIN_SI256_SI,
27905 IX86_BUILTIN_PS256_PS,
27906 IX86_BUILTIN_PD256_PD,
27907 IX86_BUILTIN_SI_SI256,
27908 IX86_BUILTIN_PS_PS256,
27909 IX86_BUILTIN_PD_PD256,
27910
27911 IX86_BUILTIN_VTESTZPD,
27912 IX86_BUILTIN_VTESTCPD,
27913 IX86_BUILTIN_VTESTNZCPD,
27914 IX86_BUILTIN_VTESTZPS,
27915 IX86_BUILTIN_VTESTCPS,
27916 IX86_BUILTIN_VTESTNZCPS,
27917 IX86_BUILTIN_VTESTZPD256,
27918 IX86_BUILTIN_VTESTCPD256,
27919 IX86_BUILTIN_VTESTNZCPD256,
27920 IX86_BUILTIN_VTESTZPS256,
27921 IX86_BUILTIN_VTESTCPS256,
27922 IX86_BUILTIN_VTESTNZCPS256,
27923 IX86_BUILTIN_PTESTZ256,
27924 IX86_BUILTIN_PTESTC256,
27925 IX86_BUILTIN_PTESTNZC256,
27926
27927 IX86_BUILTIN_MOVMSKPD256,
27928 IX86_BUILTIN_MOVMSKPS256,
27929
27930 /* AVX2 */
27931 IX86_BUILTIN_MPSADBW256,
27932 IX86_BUILTIN_PABSB256,
27933 IX86_BUILTIN_PABSW256,
27934 IX86_BUILTIN_PABSD256,
27935 IX86_BUILTIN_PACKSSDW256,
27936 IX86_BUILTIN_PACKSSWB256,
27937 IX86_BUILTIN_PACKUSDW256,
27938 IX86_BUILTIN_PACKUSWB256,
27939 IX86_BUILTIN_PADDB256,
27940 IX86_BUILTIN_PADDW256,
27941 IX86_BUILTIN_PADDD256,
27942 IX86_BUILTIN_PADDQ256,
27943 IX86_BUILTIN_PADDSB256,
27944 IX86_BUILTIN_PADDSW256,
27945 IX86_BUILTIN_PADDUSB256,
27946 IX86_BUILTIN_PADDUSW256,
27947 IX86_BUILTIN_PALIGNR256,
27948 IX86_BUILTIN_AND256I,
27949 IX86_BUILTIN_ANDNOT256I,
27950 IX86_BUILTIN_PAVGB256,
27951 IX86_BUILTIN_PAVGW256,
27952 IX86_BUILTIN_PBLENDVB256,
27953 IX86_BUILTIN_PBLENDVW256,
27954 IX86_BUILTIN_PCMPEQB256,
27955 IX86_BUILTIN_PCMPEQW256,
27956 IX86_BUILTIN_PCMPEQD256,
27957 IX86_BUILTIN_PCMPEQQ256,
27958 IX86_BUILTIN_PCMPGTB256,
27959 IX86_BUILTIN_PCMPGTW256,
27960 IX86_BUILTIN_PCMPGTD256,
27961 IX86_BUILTIN_PCMPGTQ256,
27962 IX86_BUILTIN_PHADDW256,
27963 IX86_BUILTIN_PHADDD256,
27964 IX86_BUILTIN_PHADDSW256,
27965 IX86_BUILTIN_PHSUBW256,
27966 IX86_BUILTIN_PHSUBD256,
27967 IX86_BUILTIN_PHSUBSW256,
27968 IX86_BUILTIN_PMADDUBSW256,
27969 IX86_BUILTIN_PMADDWD256,
27970 IX86_BUILTIN_PMAXSB256,
27971 IX86_BUILTIN_PMAXSW256,
27972 IX86_BUILTIN_PMAXSD256,
27973 IX86_BUILTIN_PMAXUB256,
27974 IX86_BUILTIN_PMAXUW256,
27975 IX86_BUILTIN_PMAXUD256,
27976 IX86_BUILTIN_PMINSB256,
27977 IX86_BUILTIN_PMINSW256,
27978 IX86_BUILTIN_PMINSD256,
27979 IX86_BUILTIN_PMINUB256,
27980 IX86_BUILTIN_PMINUW256,
27981 IX86_BUILTIN_PMINUD256,
27982 IX86_BUILTIN_PMOVMSKB256,
27983 IX86_BUILTIN_PMOVSXBW256,
27984 IX86_BUILTIN_PMOVSXBD256,
27985 IX86_BUILTIN_PMOVSXBQ256,
27986 IX86_BUILTIN_PMOVSXWD256,
27987 IX86_BUILTIN_PMOVSXWQ256,
27988 IX86_BUILTIN_PMOVSXDQ256,
27989 IX86_BUILTIN_PMOVZXBW256,
27990 IX86_BUILTIN_PMOVZXBD256,
27991 IX86_BUILTIN_PMOVZXBQ256,
27992 IX86_BUILTIN_PMOVZXWD256,
27993 IX86_BUILTIN_PMOVZXWQ256,
27994 IX86_BUILTIN_PMOVZXDQ256,
27995 IX86_BUILTIN_PMULDQ256,
27996 IX86_BUILTIN_PMULHRSW256,
27997 IX86_BUILTIN_PMULHUW256,
27998 IX86_BUILTIN_PMULHW256,
27999 IX86_BUILTIN_PMULLW256,
28000 IX86_BUILTIN_PMULLD256,
28001 IX86_BUILTIN_PMULUDQ256,
28002 IX86_BUILTIN_POR256,
28003 IX86_BUILTIN_PSADBW256,
28004 IX86_BUILTIN_PSHUFB256,
28005 IX86_BUILTIN_PSHUFD256,
28006 IX86_BUILTIN_PSHUFHW256,
28007 IX86_BUILTIN_PSHUFLW256,
28008 IX86_BUILTIN_PSIGNB256,
28009 IX86_BUILTIN_PSIGNW256,
28010 IX86_BUILTIN_PSIGND256,
28011 IX86_BUILTIN_PSLLDQI256,
28012 IX86_BUILTIN_PSLLWI256,
28013 IX86_BUILTIN_PSLLW256,
28014 IX86_BUILTIN_PSLLDI256,
28015 IX86_BUILTIN_PSLLD256,
28016 IX86_BUILTIN_PSLLQI256,
28017 IX86_BUILTIN_PSLLQ256,
28018 IX86_BUILTIN_PSRAWI256,
28019 IX86_BUILTIN_PSRAW256,
28020 IX86_BUILTIN_PSRADI256,
28021 IX86_BUILTIN_PSRAD256,
28022 IX86_BUILTIN_PSRLDQI256,
28023 IX86_BUILTIN_PSRLWI256,
28024 IX86_BUILTIN_PSRLW256,
28025 IX86_BUILTIN_PSRLDI256,
28026 IX86_BUILTIN_PSRLD256,
28027 IX86_BUILTIN_PSRLQI256,
28028 IX86_BUILTIN_PSRLQ256,
28029 IX86_BUILTIN_PSUBB256,
28030 IX86_BUILTIN_PSUBW256,
28031 IX86_BUILTIN_PSUBD256,
28032 IX86_BUILTIN_PSUBQ256,
28033 IX86_BUILTIN_PSUBSB256,
28034 IX86_BUILTIN_PSUBSW256,
28035 IX86_BUILTIN_PSUBUSB256,
28036 IX86_BUILTIN_PSUBUSW256,
28037 IX86_BUILTIN_PUNPCKHBW256,
28038 IX86_BUILTIN_PUNPCKHWD256,
28039 IX86_BUILTIN_PUNPCKHDQ256,
28040 IX86_BUILTIN_PUNPCKHQDQ256,
28041 IX86_BUILTIN_PUNPCKLBW256,
28042 IX86_BUILTIN_PUNPCKLWD256,
28043 IX86_BUILTIN_PUNPCKLDQ256,
28044 IX86_BUILTIN_PUNPCKLQDQ256,
28045 IX86_BUILTIN_PXOR256,
28046 IX86_BUILTIN_MOVNTDQA256,
28047 IX86_BUILTIN_VBROADCASTSS_PS,
28048 IX86_BUILTIN_VBROADCASTSS_PS256,
28049 IX86_BUILTIN_VBROADCASTSD_PD256,
28050 IX86_BUILTIN_VBROADCASTSI256,
28051 IX86_BUILTIN_PBLENDD256,
28052 IX86_BUILTIN_PBLENDD128,
28053 IX86_BUILTIN_PBROADCASTB256,
28054 IX86_BUILTIN_PBROADCASTW256,
28055 IX86_BUILTIN_PBROADCASTD256,
28056 IX86_BUILTIN_PBROADCASTQ256,
28057 IX86_BUILTIN_PBROADCASTB128,
28058 IX86_BUILTIN_PBROADCASTW128,
28059 IX86_BUILTIN_PBROADCASTD128,
28060 IX86_BUILTIN_PBROADCASTQ128,
28061 IX86_BUILTIN_VPERMVARSI256,
28062 IX86_BUILTIN_VPERMDF256,
28063 IX86_BUILTIN_VPERMVARSF256,
28064 IX86_BUILTIN_VPERMDI256,
28065 IX86_BUILTIN_VPERMTI256,
28066 IX86_BUILTIN_VEXTRACT128I256,
28067 IX86_BUILTIN_VINSERT128I256,
28068 IX86_BUILTIN_MASKLOADD,
28069 IX86_BUILTIN_MASKLOADQ,
28070 IX86_BUILTIN_MASKLOADD256,
28071 IX86_BUILTIN_MASKLOADQ256,
28072 IX86_BUILTIN_MASKSTORED,
28073 IX86_BUILTIN_MASKSTOREQ,
28074 IX86_BUILTIN_MASKSTORED256,
28075 IX86_BUILTIN_MASKSTOREQ256,
28076 IX86_BUILTIN_PSLLVV4DI,
28077 IX86_BUILTIN_PSLLVV2DI,
28078 IX86_BUILTIN_PSLLVV8SI,
28079 IX86_BUILTIN_PSLLVV4SI,
28080 IX86_BUILTIN_PSRAVV8SI,
28081 IX86_BUILTIN_PSRAVV4SI,
28082 IX86_BUILTIN_PSRLVV4DI,
28083 IX86_BUILTIN_PSRLVV2DI,
28084 IX86_BUILTIN_PSRLVV8SI,
28085 IX86_BUILTIN_PSRLVV4SI,
28086
28087 IX86_BUILTIN_GATHERSIV2DF,
28088 IX86_BUILTIN_GATHERSIV4DF,
28089 IX86_BUILTIN_GATHERDIV2DF,
28090 IX86_BUILTIN_GATHERDIV4DF,
28091 IX86_BUILTIN_GATHERSIV4SF,
28092 IX86_BUILTIN_GATHERSIV8SF,
28093 IX86_BUILTIN_GATHERDIV4SF,
28094 IX86_BUILTIN_GATHERDIV8SF,
28095 IX86_BUILTIN_GATHERSIV2DI,
28096 IX86_BUILTIN_GATHERSIV4DI,
28097 IX86_BUILTIN_GATHERDIV2DI,
28098 IX86_BUILTIN_GATHERDIV4DI,
28099 IX86_BUILTIN_GATHERSIV4SI,
28100 IX86_BUILTIN_GATHERSIV8SI,
28101 IX86_BUILTIN_GATHERDIV4SI,
28102 IX86_BUILTIN_GATHERDIV8SI,
28103
28104 /* AVX512F */
28105 IX86_BUILTIN_ADDPD512,
28106 IX86_BUILTIN_ADDPS512,
28107 IX86_BUILTIN_ADDSD_ROUND,
28108 IX86_BUILTIN_ADDSS_ROUND,
28109 IX86_BUILTIN_ALIGND512,
28110 IX86_BUILTIN_ALIGNQ512,
28111 IX86_BUILTIN_BLENDMD512,
28112 IX86_BUILTIN_BLENDMPD512,
28113 IX86_BUILTIN_BLENDMPS512,
28114 IX86_BUILTIN_BLENDMQ512,
28115 IX86_BUILTIN_BROADCASTF32X4_512,
28116 IX86_BUILTIN_BROADCASTF64X4_512,
28117 IX86_BUILTIN_BROADCASTI32X4_512,
28118 IX86_BUILTIN_BROADCASTI64X4_512,
28119 IX86_BUILTIN_BROADCASTSD512,
28120 IX86_BUILTIN_BROADCASTSS512,
28121 IX86_BUILTIN_CMPD512,
28122 IX86_BUILTIN_CMPPD512,
28123 IX86_BUILTIN_CMPPS512,
28124 IX86_BUILTIN_CMPQ512,
28125 IX86_BUILTIN_CMPSD_MASK,
28126 IX86_BUILTIN_CMPSS_MASK,
28127 IX86_BUILTIN_COMIDF,
28128 IX86_BUILTIN_COMISF,
28129 IX86_BUILTIN_COMPRESSPD512,
28130 IX86_BUILTIN_COMPRESSPDSTORE512,
28131 IX86_BUILTIN_COMPRESSPS512,
28132 IX86_BUILTIN_COMPRESSPSSTORE512,
28133 IX86_BUILTIN_CVTDQ2PD512,
28134 IX86_BUILTIN_CVTDQ2PS512,
28135 IX86_BUILTIN_CVTPD2DQ512,
28136 IX86_BUILTIN_CVTPD2PS512,
28137 IX86_BUILTIN_CVTPD2UDQ512,
28138 IX86_BUILTIN_CVTPH2PS512,
28139 IX86_BUILTIN_CVTPS2DQ512,
28140 IX86_BUILTIN_CVTPS2PD512,
28141 IX86_BUILTIN_CVTPS2PH512,
28142 IX86_BUILTIN_CVTPS2UDQ512,
28143 IX86_BUILTIN_CVTSD2SS_ROUND,
28144 IX86_BUILTIN_CVTSI2SD64,
28145 IX86_BUILTIN_CVTSI2SS32,
28146 IX86_BUILTIN_CVTSI2SS64,
28147 IX86_BUILTIN_CVTSS2SD_ROUND,
28148 IX86_BUILTIN_CVTTPD2DQ512,
28149 IX86_BUILTIN_CVTTPD2UDQ512,
28150 IX86_BUILTIN_CVTTPS2DQ512,
28151 IX86_BUILTIN_CVTTPS2UDQ512,
28152 IX86_BUILTIN_CVTUDQ2PD512,
28153 IX86_BUILTIN_CVTUDQ2PS512,
28154 IX86_BUILTIN_CVTUSI2SD32,
28155 IX86_BUILTIN_CVTUSI2SD64,
28156 IX86_BUILTIN_CVTUSI2SS32,
28157 IX86_BUILTIN_CVTUSI2SS64,
28158 IX86_BUILTIN_DIVPD512,
28159 IX86_BUILTIN_DIVPS512,
28160 IX86_BUILTIN_DIVSD_ROUND,
28161 IX86_BUILTIN_DIVSS_ROUND,
28162 IX86_BUILTIN_EXPANDPD512,
28163 IX86_BUILTIN_EXPANDPD512Z,
28164 IX86_BUILTIN_EXPANDPDLOAD512,
28165 IX86_BUILTIN_EXPANDPDLOAD512Z,
28166 IX86_BUILTIN_EXPANDPS512,
28167 IX86_BUILTIN_EXPANDPS512Z,
28168 IX86_BUILTIN_EXPANDPSLOAD512,
28169 IX86_BUILTIN_EXPANDPSLOAD512Z,
28170 IX86_BUILTIN_EXTRACTF32X4,
28171 IX86_BUILTIN_EXTRACTF64X4,
28172 IX86_BUILTIN_EXTRACTI32X4,
28173 IX86_BUILTIN_EXTRACTI64X4,
28174 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28175 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28176 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28177 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28178 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28179 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28180 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28181 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28182 IX86_BUILTIN_GETEXPPD512,
28183 IX86_BUILTIN_GETEXPPS512,
28184 IX86_BUILTIN_GETEXPSD128,
28185 IX86_BUILTIN_GETEXPSS128,
28186 IX86_BUILTIN_GETMANTPD512,
28187 IX86_BUILTIN_GETMANTPS512,
28188 IX86_BUILTIN_GETMANTSD128,
28189 IX86_BUILTIN_GETMANTSS128,
28190 IX86_BUILTIN_INSERTF32X4,
28191 IX86_BUILTIN_INSERTF64X4,
28192 IX86_BUILTIN_INSERTI32X4,
28193 IX86_BUILTIN_INSERTI64X4,
28194 IX86_BUILTIN_LOADAPD512,
28195 IX86_BUILTIN_LOADAPS512,
28196 IX86_BUILTIN_LOADDQUDI512,
28197 IX86_BUILTIN_LOADDQUSI512,
28198 IX86_BUILTIN_LOADUPD512,
28199 IX86_BUILTIN_LOADUPS512,
28200 IX86_BUILTIN_MAXPD512,
28201 IX86_BUILTIN_MAXPS512,
28202 IX86_BUILTIN_MAXSD_ROUND,
28203 IX86_BUILTIN_MAXSS_ROUND,
28204 IX86_BUILTIN_MINPD512,
28205 IX86_BUILTIN_MINPS512,
28206 IX86_BUILTIN_MINSD_ROUND,
28207 IX86_BUILTIN_MINSS_ROUND,
28208 IX86_BUILTIN_MOVAPD512,
28209 IX86_BUILTIN_MOVAPS512,
28210 IX86_BUILTIN_MOVDDUP512,
28211 IX86_BUILTIN_MOVDQA32LOAD512,
28212 IX86_BUILTIN_MOVDQA32STORE512,
28213 IX86_BUILTIN_MOVDQA32_512,
28214 IX86_BUILTIN_MOVDQA64LOAD512,
28215 IX86_BUILTIN_MOVDQA64STORE512,
28216 IX86_BUILTIN_MOVDQA64_512,
28217 IX86_BUILTIN_MOVNTDQ512,
28218 IX86_BUILTIN_MOVNTDQA512,
28219 IX86_BUILTIN_MOVNTPD512,
28220 IX86_BUILTIN_MOVNTPS512,
28221 IX86_BUILTIN_MOVSHDUP512,
28222 IX86_BUILTIN_MOVSLDUP512,
28223 IX86_BUILTIN_MULPD512,
28224 IX86_BUILTIN_MULPS512,
28225 IX86_BUILTIN_MULSD_ROUND,
28226 IX86_BUILTIN_MULSS_ROUND,
28227 IX86_BUILTIN_PABSD512,
28228 IX86_BUILTIN_PABSQ512,
28229 IX86_BUILTIN_PADDD512,
28230 IX86_BUILTIN_PADDQ512,
28231 IX86_BUILTIN_PANDD512,
28232 IX86_BUILTIN_PANDND512,
28233 IX86_BUILTIN_PANDNQ512,
28234 IX86_BUILTIN_PANDQ512,
28235 IX86_BUILTIN_PBROADCASTD512,
28236 IX86_BUILTIN_PBROADCASTD512_GPR,
28237 IX86_BUILTIN_PBROADCASTMB512,
28238 IX86_BUILTIN_PBROADCASTMW512,
28239 IX86_BUILTIN_PBROADCASTQ512,
28240 IX86_BUILTIN_PBROADCASTQ512_GPR,
28241 IX86_BUILTIN_PBROADCASTQ512_MEM,
28242 IX86_BUILTIN_PCMPEQD512_MASK,
28243 IX86_BUILTIN_PCMPEQQ512_MASK,
28244 IX86_BUILTIN_PCMPGTD512_MASK,
28245 IX86_BUILTIN_PCMPGTQ512_MASK,
28246 IX86_BUILTIN_PCOMPRESSD512,
28247 IX86_BUILTIN_PCOMPRESSDSTORE512,
28248 IX86_BUILTIN_PCOMPRESSQ512,
28249 IX86_BUILTIN_PCOMPRESSQSTORE512,
28250 IX86_BUILTIN_PEXPANDD512,
28251 IX86_BUILTIN_PEXPANDD512Z,
28252 IX86_BUILTIN_PEXPANDDLOAD512,
28253 IX86_BUILTIN_PEXPANDDLOAD512Z,
28254 IX86_BUILTIN_PEXPANDQ512,
28255 IX86_BUILTIN_PEXPANDQ512Z,
28256 IX86_BUILTIN_PEXPANDQLOAD512,
28257 IX86_BUILTIN_PEXPANDQLOAD512Z,
28258 IX86_BUILTIN_PMAXSD512,
28259 IX86_BUILTIN_PMAXSQ512,
28260 IX86_BUILTIN_PMAXUD512,
28261 IX86_BUILTIN_PMAXUQ512,
28262 IX86_BUILTIN_PMINSD512,
28263 IX86_BUILTIN_PMINSQ512,
28264 IX86_BUILTIN_PMINUD512,
28265 IX86_BUILTIN_PMINUQ512,
28266 IX86_BUILTIN_PMOVDB512,
28267 IX86_BUILTIN_PMOVDB512_MEM,
28268 IX86_BUILTIN_PMOVDW512,
28269 IX86_BUILTIN_PMOVDW512_MEM,
28270 IX86_BUILTIN_PMOVQB512,
28271 IX86_BUILTIN_PMOVQB512_MEM,
28272 IX86_BUILTIN_PMOVQD512,
28273 IX86_BUILTIN_PMOVQD512_MEM,
28274 IX86_BUILTIN_PMOVQW512,
28275 IX86_BUILTIN_PMOVQW512_MEM,
28276 IX86_BUILTIN_PMOVSDB512,
28277 IX86_BUILTIN_PMOVSDB512_MEM,
28278 IX86_BUILTIN_PMOVSDW512,
28279 IX86_BUILTIN_PMOVSDW512_MEM,
28280 IX86_BUILTIN_PMOVSQB512,
28281 IX86_BUILTIN_PMOVSQB512_MEM,
28282 IX86_BUILTIN_PMOVSQD512,
28283 IX86_BUILTIN_PMOVSQD512_MEM,
28284 IX86_BUILTIN_PMOVSQW512,
28285 IX86_BUILTIN_PMOVSQW512_MEM,
28286 IX86_BUILTIN_PMOVSXBD512,
28287 IX86_BUILTIN_PMOVSXBQ512,
28288 IX86_BUILTIN_PMOVSXDQ512,
28289 IX86_BUILTIN_PMOVSXWD512,
28290 IX86_BUILTIN_PMOVSXWQ512,
28291 IX86_BUILTIN_PMOVUSDB512,
28292 IX86_BUILTIN_PMOVUSDB512_MEM,
28293 IX86_BUILTIN_PMOVUSDW512,
28294 IX86_BUILTIN_PMOVUSDW512_MEM,
28295 IX86_BUILTIN_PMOVUSQB512,
28296 IX86_BUILTIN_PMOVUSQB512_MEM,
28297 IX86_BUILTIN_PMOVUSQD512,
28298 IX86_BUILTIN_PMOVUSQD512_MEM,
28299 IX86_BUILTIN_PMOVUSQW512,
28300 IX86_BUILTIN_PMOVUSQW512_MEM,
28301 IX86_BUILTIN_PMOVZXBD512,
28302 IX86_BUILTIN_PMOVZXBQ512,
28303 IX86_BUILTIN_PMOVZXDQ512,
28304 IX86_BUILTIN_PMOVZXWD512,
28305 IX86_BUILTIN_PMOVZXWQ512,
28306 IX86_BUILTIN_PMULDQ512,
28307 IX86_BUILTIN_PMULLD512,
28308 IX86_BUILTIN_PMULUDQ512,
28309 IX86_BUILTIN_PORD512,
28310 IX86_BUILTIN_PORQ512,
28311 IX86_BUILTIN_PROLD512,
28312 IX86_BUILTIN_PROLQ512,
28313 IX86_BUILTIN_PROLVD512,
28314 IX86_BUILTIN_PROLVQ512,
28315 IX86_BUILTIN_PRORD512,
28316 IX86_BUILTIN_PRORQ512,
28317 IX86_BUILTIN_PRORVD512,
28318 IX86_BUILTIN_PRORVQ512,
28319 IX86_BUILTIN_PSHUFD512,
28320 IX86_BUILTIN_PSLLD512,
28321 IX86_BUILTIN_PSLLDI512,
28322 IX86_BUILTIN_PSLLQ512,
28323 IX86_BUILTIN_PSLLQI512,
28324 IX86_BUILTIN_PSLLVV16SI,
28325 IX86_BUILTIN_PSLLVV8DI,
28326 IX86_BUILTIN_PSRAD512,
28327 IX86_BUILTIN_PSRADI512,
28328 IX86_BUILTIN_PSRAQ512,
28329 IX86_BUILTIN_PSRAQI512,
28330 IX86_BUILTIN_PSRAVV16SI,
28331 IX86_BUILTIN_PSRAVV8DI,
28332 IX86_BUILTIN_PSRLD512,
28333 IX86_BUILTIN_PSRLDI512,
28334 IX86_BUILTIN_PSRLQ512,
28335 IX86_BUILTIN_PSRLQI512,
28336 IX86_BUILTIN_PSRLVV16SI,
28337 IX86_BUILTIN_PSRLVV8DI,
28338 IX86_BUILTIN_PSUBD512,
28339 IX86_BUILTIN_PSUBQ512,
28340 IX86_BUILTIN_PTESTMD512,
28341 IX86_BUILTIN_PTESTMQ512,
28342 IX86_BUILTIN_PTESTNMD512,
28343 IX86_BUILTIN_PTESTNMQ512,
28344 IX86_BUILTIN_PUNPCKHDQ512,
28345 IX86_BUILTIN_PUNPCKHQDQ512,
28346 IX86_BUILTIN_PUNPCKLDQ512,
28347 IX86_BUILTIN_PUNPCKLQDQ512,
28348 IX86_BUILTIN_PXORD512,
28349 IX86_BUILTIN_PXORQ512,
28350 IX86_BUILTIN_RCP14PD512,
28351 IX86_BUILTIN_RCP14PS512,
28352 IX86_BUILTIN_RCP14SD,
28353 IX86_BUILTIN_RCP14SS,
28354 IX86_BUILTIN_RNDSCALEPD,
28355 IX86_BUILTIN_RNDSCALEPS,
28356 IX86_BUILTIN_RNDSCALESD,
28357 IX86_BUILTIN_RNDSCALESS,
28358 IX86_BUILTIN_RSQRT14PD512,
28359 IX86_BUILTIN_RSQRT14PS512,
28360 IX86_BUILTIN_RSQRT14SD,
28361 IX86_BUILTIN_RSQRT14SS,
28362 IX86_BUILTIN_SCALEFPD512,
28363 IX86_BUILTIN_SCALEFPS512,
28364 IX86_BUILTIN_SCALEFSD,
28365 IX86_BUILTIN_SCALEFSS,
28366 IX86_BUILTIN_SHUFPD512,
28367 IX86_BUILTIN_SHUFPS512,
28368 IX86_BUILTIN_SHUF_F32x4,
28369 IX86_BUILTIN_SHUF_F64x2,
28370 IX86_BUILTIN_SHUF_I32x4,
28371 IX86_BUILTIN_SHUF_I64x2,
28372 IX86_BUILTIN_SQRTPD512,
28373 IX86_BUILTIN_SQRTPD512_MASK,
28374 IX86_BUILTIN_SQRTPS512_MASK,
28375 IX86_BUILTIN_SQRTPS_NR512,
28376 IX86_BUILTIN_SQRTSD_ROUND,
28377 IX86_BUILTIN_SQRTSS_ROUND,
28378 IX86_BUILTIN_STOREAPD512,
28379 IX86_BUILTIN_STOREAPS512,
28380 IX86_BUILTIN_STOREDQUDI512,
28381 IX86_BUILTIN_STOREDQUSI512,
28382 IX86_BUILTIN_STOREUPD512,
28383 IX86_BUILTIN_STOREUPS512,
28384 IX86_BUILTIN_SUBPD512,
28385 IX86_BUILTIN_SUBPS512,
28386 IX86_BUILTIN_SUBSD_ROUND,
28387 IX86_BUILTIN_SUBSS_ROUND,
28388 IX86_BUILTIN_UCMPD512,
28389 IX86_BUILTIN_UCMPQ512,
28390 IX86_BUILTIN_UNPCKHPD512,
28391 IX86_BUILTIN_UNPCKHPS512,
28392 IX86_BUILTIN_UNPCKLPD512,
28393 IX86_BUILTIN_UNPCKLPS512,
28394 IX86_BUILTIN_VCVTSD2SI32,
28395 IX86_BUILTIN_VCVTSD2SI64,
28396 IX86_BUILTIN_VCVTSD2USI32,
28397 IX86_BUILTIN_VCVTSD2USI64,
28398 IX86_BUILTIN_VCVTSS2SI32,
28399 IX86_BUILTIN_VCVTSS2SI64,
28400 IX86_BUILTIN_VCVTSS2USI32,
28401 IX86_BUILTIN_VCVTSS2USI64,
28402 IX86_BUILTIN_VCVTTSD2SI32,
28403 IX86_BUILTIN_VCVTTSD2SI64,
28404 IX86_BUILTIN_VCVTTSD2USI32,
28405 IX86_BUILTIN_VCVTTSD2USI64,
28406 IX86_BUILTIN_VCVTTSS2SI32,
28407 IX86_BUILTIN_VCVTTSS2SI64,
28408 IX86_BUILTIN_VCVTTSS2USI32,
28409 IX86_BUILTIN_VCVTTSS2USI64,
28410 IX86_BUILTIN_VFMADDPD512_MASK,
28411 IX86_BUILTIN_VFMADDPD512_MASK3,
28412 IX86_BUILTIN_VFMADDPD512_MASKZ,
28413 IX86_BUILTIN_VFMADDPS512_MASK,
28414 IX86_BUILTIN_VFMADDPS512_MASK3,
28415 IX86_BUILTIN_VFMADDPS512_MASKZ,
28416 IX86_BUILTIN_VFMADDSD3_ROUND,
28417 IX86_BUILTIN_VFMADDSS3_ROUND,
28418 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28419 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28420 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28421 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28422 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28423 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28424 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28425 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28426 IX86_BUILTIN_VFMSUBPD512_MASK3,
28427 IX86_BUILTIN_VFMSUBPS512_MASK3,
28428 IX86_BUILTIN_VFMSUBSD3_MASK3,
28429 IX86_BUILTIN_VFMSUBSS3_MASK3,
28430 IX86_BUILTIN_VFNMADDPD512_MASK,
28431 IX86_BUILTIN_VFNMADDPS512_MASK,
28432 IX86_BUILTIN_VFNMSUBPD512_MASK,
28433 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28434 IX86_BUILTIN_VFNMSUBPS512_MASK,
28435 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28436 IX86_BUILTIN_VPCLZCNTD512,
28437 IX86_BUILTIN_VPCLZCNTQ512,
28438 IX86_BUILTIN_VPCONFLICTD512,
28439 IX86_BUILTIN_VPCONFLICTQ512,
28440 IX86_BUILTIN_VPERMDF512,
28441 IX86_BUILTIN_VPERMDI512,
28442 IX86_BUILTIN_VPERMI2VARD512,
28443 IX86_BUILTIN_VPERMI2VARPD512,
28444 IX86_BUILTIN_VPERMI2VARPS512,
28445 IX86_BUILTIN_VPERMI2VARQ512,
28446 IX86_BUILTIN_VPERMILPD512,
28447 IX86_BUILTIN_VPERMILPS512,
28448 IX86_BUILTIN_VPERMILVARPD512,
28449 IX86_BUILTIN_VPERMILVARPS512,
28450 IX86_BUILTIN_VPERMT2VARD512,
28451 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28452 IX86_BUILTIN_VPERMT2VARPD512,
28453 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28454 IX86_BUILTIN_VPERMT2VARPS512,
28455 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28456 IX86_BUILTIN_VPERMT2VARQ512,
28457 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28458 IX86_BUILTIN_VPERMVARDF512,
28459 IX86_BUILTIN_VPERMVARDI512,
28460 IX86_BUILTIN_VPERMVARSF512,
28461 IX86_BUILTIN_VPERMVARSI512,
28462 IX86_BUILTIN_VTERNLOGD512_MASK,
28463 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28464 IX86_BUILTIN_VTERNLOGQ512_MASK,
28465 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28466
28467 /* Mask arithmetic operations */
28468 IX86_BUILTIN_KAND16,
28469 IX86_BUILTIN_KANDN16,
28470 IX86_BUILTIN_KNOT16,
28471 IX86_BUILTIN_KOR16,
28472 IX86_BUILTIN_KORTESTC16,
28473 IX86_BUILTIN_KORTESTZ16,
28474 IX86_BUILTIN_KUNPCKBW,
28475 IX86_BUILTIN_KXNOR16,
28476 IX86_BUILTIN_KXOR16,
28477 IX86_BUILTIN_KMOV16,
28478
28479 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28480 where all operands are 32-byte or 64-byte wide respectively. */
28481 IX86_BUILTIN_GATHERALTSIV4DF,
28482 IX86_BUILTIN_GATHERALTDIV8SF,
28483 IX86_BUILTIN_GATHERALTSIV4DI,
28484 IX86_BUILTIN_GATHERALTDIV8SI,
28485 IX86_BUILTIN_GATHER3ALTDIV16SF,
28486 IX86_BUILTIN_GATHER3ALTDIV16SI,
28487 IX86_BUILTIN_GATHER3ALTSIV8DF,
28488 IX86_BUILTIN_GATHER3ALTSIV8DI,
28489 IX86_BUILTIN_GATHER3DIV16SF,
28490 IX86_BUILTIN_GATHER3DIV16SI,
28491 IX86_BUILTIN_GATHER3DIV8DF,
28492 IX86_BUILTIN_GATHER3DIV8DI,
28493 IX86_BUILTIN_GATHER3SIV16SF,
28494 IX86_BUILTIN_GATHER3SIV16SI,
28495 IX86_BUILTIN_GATHER3SIV8DF,
28496 IX86_BUILTIN_GATHER3SIV8DI,
28497 IX86_BUILTIN_SCATTERDIV16SF,
28498 IX86_BUILTIN_SCATTERDIV16SI,
28499 IX86_BUILTIN_SCATTERDIV8DF,
28500 IX86_BUILTIN_SCATTERDIV8DI,
28501 IX86_BUILTIN_SCATTERSIV16SF,
28502 IX86_BUILTIN_SCATTERSIV16SI,
28503 IX86_BUILTIN_SCATTERSIV8DF,
28504 IX86_BUILTIN_SCATTERSIV8DI,
28505
28506 /* AVX512PF */
28507 IX86_BUILTIN_GATHERPFQPD,
28508 IX86_BUILTIN_GATHERPFDPS,
28509 IX86_BUILTIN_GATHERPFDPD,
28510 IX86_BUILTIN_GATHERPFQPS,
28511 IX86_BUILTIN_SCATTERPFDPD,
28512 IX86_BUILTIN_SCATTERPFDPS,
28513 IX86_BUILTIN_SCATTERPFQPD,
28514 IX86_BUILTIN_SCATTERPFQPS,
28515
28516 /* AVX-512ER */
28517 IX86_BUILTIN_EXP2PD_MASK,
28518 IX86_BUILTIN_EXP2PS_MASK,
28519 IX86_BUILTIN_EXP2PS,
28520 IX86_BUILTIN_RCP28PD,
28521 IX86_BUILTIN_RCP28PS,
28522 IX86_BUILTIN_RCP28SD,
28523 IX86_BUILTIN_RCP28SS,
28524 IX86_BUILTIN_RSQRT28PD,
28525 IX86_BUILTIN_RSQRT28PS,
28526 IX86_BUILTIN_RSQRT28SD,
28527 IX86_BUILTIN_RSQRT28SS,
28528
28529 /* SHA builtins. */
28530 IX86_BUILTIN_SHA1MSG1,
28531 IX86_BUILTIN_SHA1MSG2,
28532 IX86_BUILTIN_SHA1NEXTE,
28533 IX86_BUILTIN_SHA1RNDS4,
28534 IX86_BUILTIN_SHA256MSG1,
28535 IX86_BUILTIN_SHA256MSG2,
28536 IX86_BUILTIN_SHA256RNDS2,
28537
28538 /* CLFLUSHOPT instructions. */
28539 IX86_BUILTIN_CLFLUSHOPT,
28540
28541 /* TFmode support builtins. */
28542 IX86_BUILTIN_INFQ,
28543 IX86_BUILTIN_HUGE_VALQ,
28544 IX86_BUILTIN_FABSQ,
28545 IX86_BUILTIN_COPYSIGNQ,
28546
28547 /* Vectorizer support builtins. */
28548 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28549 IX86_BUILTIN_CPYSGNPS,
28550 IX86_BUILTIN_CPYSGNPD,
28551 IX86_BUILTIN_CPYSGNPS256,
28552 IX86_BUILTIN_CPYSGNPS512,
28553 IX86_BUILTIN_CPYSGNPD256,
28554 IX86_BUILTIN_CPYSGNPD512,
28555 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28556 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28557
28558
28559 /* FMA4 instructions. */
28560 IX86_BUILTIN_VFMADDSS,
28561 IX86_BUILTIN_VFMADDSD,
28562 IX86_BUILTIN_VFMADDPS,
28563 IX86_BUILTIN_VFMADDPD,
28564 IX86_BUILTIN_VFMADDPS256,
28565 IX86_BUILTIN_VFMADDPD256,
28566 IX86_BUILTIN_VFMADDSUBPS,
28567 IX86_BUILTIN_VFMADDSUBPD,
28568 IX86_BUILTIN_VFMADDSUBPS256,
28569 IX86_BUILTIN_VFMADDSUBPD256,
28570
28571 /* FMA3 instructions. */
28572 IX86_BUILTIN_VFMADDSS3,
28573 IX86_BUILTIN_VFMADDSD3,
28574
28575 /* XOP instructions. */
28576 IX86_BUILTIN_VPCMOV,
28577 IX86_BUILTIN_VPCMOV_V2DI,
28578 IX86_BUILTIN_VPCMOV_V4SI,
28579 IX86_BUILTIN_VPCMOV_V8HI,
28580 IX86_BUILTIN_VPCMOV_V16QI,
28581 IX86_BUILTIN_VPCMOV_V4SF,
28582 IX86_BUILTIN_VPCMOV_V2DF,
28583 IX86_BUILTIN_VPCMOV256,
28584 IX86_BUILTIN_VPCMOV_V4DI256,
28585 IX86_BUILTIN_VPCMOV_V8SI256,
28586 IX86_BUILTIN_VPCMOV_V16HI256,
28587 IX86_BUILTIN_VPCMOV_V32QI256,
28588 IX86_BUILTIN_VPCMOV_V8SF256,
28589 IX86_BUILTIN_VPCMOV_V4DF256,
28590
28591 IX86_BUILTIN_VPPERM,
28592
28593 IX86_BUILTIN_VPMACSSWW,
28594 IX86_BUILTIN_VPMACSWW,
28595 IX86_BUILTIN_VPMACSSWD,
28596 IX86_BUILTIN_VPMACSWD,
28597 IX86_BUILTIN_VPMACSSDD,
28598 IX86_BUILTIN_VPMACSDD,
28599 IX86_BUILTIN_VPMACSSDQL,
28600 IX86_BUILTIN_VPMACSSDQH,
28601 IX86_BUILTIN_VPMACSDQL,
28602 IX86_BUILTIN_VPMACSDQH,
28603 IX86_BUILTIN_VPMADCSSWD,
28604 IX86_BUILTIN_VPMADCSWD,
28605
28606 IX86_BUILTIN_VPHADDBW,
28607 IX86_BUILTIN_VPHADDBD,
28608 IX86_BUILTIN_VPHADDBQ,
28609 IX86_BUILTIN_VPHADDWD,
28610 IX86_BUILTIN_VPHADDWQ,
28611 IX86_BUILTIN_VPHADDDQ,
28612 IX86_BUILTIN_VPHADDUBW,
28613 IX86_BUILTIN_VPHADDUBD,
28614 IX86_BUILTIN_VPHADDUBQ,
28615 IX86_BUILTIN_VPHADDUWD,
28616 IX86_BUILTIN_VPHADDUWQ,
28617 IX86_BUILTIN_VPHADDUDQ,
28618 IX86_BUILTIN_VPHSUBBW,
28619 IX86_BUILTIN_VPHSUBWD,
28620 IX86_BUILTIN_VPHSUBDQ,
28621
28622 IX86_BUILTIN_VPROTB,
28623 IX86_BUILTIN_VPROTW,
28624 IX86_BUILTIN_VPROTD,
28625 IX86_BUILTIN_VPROTQ,
28626 IX86_BUILTIN_VPROTB_IMM,
28627 IX86_BUILTIN_VPROTW_IMM,
28628 IX86_BUILTIN_VPROTD_IMM,
28629 IX86_BUILTIN_VPROTQ_IMM,
28630
28631 IX86_BUILTIN_VPSHLB,
28632 IX86_BUILTIN_VPSHLW,
28633 IX86_BUILTIN_VPSHLD,
28634 IX86_BUILTIN_VPSHLQ,
28635 IX86_BUILTIN_VPSHAB,
28636 IX86_BUILTIN_VPSHAW,
28637 IX86_BUILTIN_VPSHAD,
28638 IX86_BUILTIN_VPSHAQ,
28639
28640 IX86_BUILTIN_VFRCZSS,
28641 IX86_BUILTIN_VFRCZSD,
28642 IX86_BUILTIN_VFRCZPS,
28643 IX86_BUILTIN_VFRCZPD,
28644 IX86_BUILTIN_VFRCZPS256,
28645 IX86_BUILTIN_VFRCZPD256,
28646
28647 IX86_BUILTIN_VPCOMEQUB,
28648 IX86_BUILTIN_VPCOMNEUB,
28649 IX86_BUILTIN_VPCOMLTUB,
28650 IX86_BUILTIN_VPCOMLEUB,
28651 IX86_BUILTIN_VPCOMGTUB,
28652 IX86_BUILTIN_VPCOMGEUB,
28653 IX86_BUILTIN_VPCOMFALSEUB,
28654 IX86_BUILTIN_VPCOMTRUEUB,
28655
28656 IX86_BUILTIN_VPCOMEQUW,
28657 IX86_BUILTIN_VPCOMNEUW,
28658 IX86_BUILTIN_VPCOMLTUW,
28659 IX86_BUILTIN_VPCOMLEUW,
28660 IX86_BUILTIN_VPCOMGTUW,
28661 IX86_BUILTIN_VPCOMGEUW,
28662 IX86_BUILTIN_VPCOMFALSEUW,
28663 IX86_BUILTIN_VPCOMTRUEUW,
28664
28665 IX86_BUILTIN_VPCOMEQUD,
28666 IX86_BUILTIN_VPCOMNEUD,
28667 IX86_BUILTIN_VPCOMLTUD,
28668 IX86_BUILTIN_VPCOMLEUD,
28669 IX86_BUILTIN_VPCOMGTUD,
28670 IX86_BUILTIN_VPCOMGEUD,
28671 IX86_BUILTIN_VPCOMFALSEUD,
28672 IX86_BUILTIN_VPCOMTRUEUD,
28673
28674 IX86_BUILTIN_VPCOMEQUQ,
28675 IX86_BUILTIN_VPCOMNEUQ,
28676 IX86_BUILTIN_VPCOMLTUQ,
28677 IX86_BUILTIN_VPCOMLEUQ,
28678 IX86_BUILTIN_VPCOMGTUQ,
28679 IX86_BUILTIN_VPCOMGEUQ,
28680 IX86_BUILTIN_VPCOMFALSEUQ,
28681 IX86_BUILTIN_VPCOMTRUEUQ,
28682
28683 IX86_BUILTIN_VPCOMEQB,
28684 IX86_BUILTIN_VPCOMNEB,
28685 IX86_BUILTIN_VPCOMLTB,
28686 IX86_BUILTIN_VPCOMLEB,
28687 IX86_BUILTIN_VPCOMGTB,
28688 IX86_BUILTIN_VPCOMGEB,
28689 IX86_BUILTIN_VPCOMFALSEB,
28690 IX86_BUILTIN_VPCOMTRUEB,
28691
28692 IX86_BUILTIN_VPCOMEQW,
28693 IX86_BUILTIN_VPCOMNEW,
28694 IX86_BUILTIN_VPCOMLTW,
28695 IX86_BUILTIN_VPCOMLEW,
28696 IX86_BUILTIN_VPCOMGTW,
28697 IX86_BUILTIN_VPCOMGEW,
28698 IX86_BUILTIN_VPCOMFALSEW,
28699 IX86_BUILTIN_VPCOMTRUEW,
28700
28701 IX86_BUILTIN_VPCOMEQD,
28702 IX86_BUILTIN_VPCOMNED,
28703 IX86_BUILTIN_VPCOMLTD,
28704 IX86_BUILTIN_VPCOMLED,
28705 IX86_BUILTIN_VPCOMGTD,
28706 IX86_BUILTIN_VPCOMGED,
28707 IX86_BUILTIN_VPCOMFALSED,
28708 IX86_BUILTIN_VPCOMTRUED,
28709
28710 IX86_BUILTIN_VPCOMEQQ,
28711 IX86_BUILTIN_VPCOMNEQ,
28712 IX86_BUILTIN_VPCOMLTQ,
28713 IX86_BUILTIN_VPCOMLEQ,
28714 IX86_BUILTIN_VPCOMGTQ,
28715 IX86_BUILTIN_VPCOMGEQ,
28716 IX86_BUILTIN_VPCOMFALSEQ,
28717 IX86_BUILTIN_VPCOMTRUEQ,
28718
28719 /* LWP instructions. */
28720 IX86_BUILTIN_LLWPCB,
28721 IX86_BUILTIN_SLWPCB,
28722 IX86_BUILTIN_LWPVAL32,
28723 IX86_BUILTIN_LWPVAL64,
28724 IX86_BUILTIN_LWPINS32,
28725 IX86_BUILTIN_LWPINS64,
28726
28727 IX86_BUILTIN_CLZS,
28728
28729 /* RTM */
28730 IX86_BUILTIN_XBEGIN,
28731 IX86_BUILTIN_XEND,
28732 IX86_BUILTIN_XABORT,
28733 IX86_BUILTIN_XTEST,
28734
28735 /* BMI instructions. */
28736 IX86_BUILTIN_BEXTR32,
28737 IX86_BUILTIN_BEXTR64,
28738 IX86_BUILTIN_CTZS,
28739
28740 /* TBM instructions. */
28741 IX86_BUILTIN_BEXTRI32,
28742 IX86_BUILTIN_BEXTRI64,
28743
28744 /* BMI2 instructions. */
28745 IX86_BUILTIN_BZHI32,
28746 IX86_BUILTIN_BZHI64,
28747 IX86_BUILTIN_PDEP32,
28748 IX86_BUILTIN_PDEP64,
28749 IX86_BUILTIN_PEXT32,
28750 IX86_BUILTIN_PEXT64,
28751
28752 /* ADX instructions. */
28753 IX86_BUILTIN_ADDCARRYX32,
28754 IX86_BUILTIN_ADDCARRYX64,
28755
28756 /* FSGSBASE instructions. */
28757 IX86_BUILTIN_RDFSBASE32,
28758 IX86_BUILTIN_RDFSBASE64,
28759 IX86_BUILTIN_RDGSBASE32,
28760 IX86_BUILTIN_RDGSBASE64,
28761 IX86_BUILTIN_WRFSBASE32,
28762 IX86_BUILTIN_WRFSBASE64,
28763 IX86_BUILTIN_WRGSBASE32,
28764 IX86_BUILTIN_WRGSBASE64,
28765
28766 /* RDRND instructions. */
28767 IX86_BUILTIN_RDRAND16_STEP,
28768 IX86_BUILTIN_RDRAND32_STEP,
28769 IX86_BUILTIN_RDRAND64_STEP,
28770
28771 /* RDSEED instructions. */
28772 IX86_BUILTIN_RDSEED16_STEP,
28773 IX86_BUILTIN_RDSEED32_STEP,
28774 IX86_BUILTIN_RDSEED64_STEP,
28775
28776 /* F16C instructions. */
28777 IX86_BUILTIN_CVTPH2PS,
28778 IX86_BUILTIN_CVTPH2PS256,
28779 IX86_BUILTIN_CVTPS2PH,
28780 IX86_BUILTIN_CVTPS2PH256,
28781
28782 /* CFString built-in for darwin */
28783 IX86_BUILTIN_CFSTRING,
28784
28785 /* Builtins to get CPU type and supported features. */
28786 IX86_BUILTIN_CPU_INIT,
28787 IX86_BUILTIN_CPU_IS,
28788 IX86_BUILTIN_CPU_SUPPORTS,
28789
28790 /* Read/write FLAGS register built-ins. */
28791 IX86_BUILTIN_READ_FLAGS,
28792 IX86_BUILTIN_WRITE_FLAGS,
28793
28794 IX86_BUILTIN_MAX
28795 };
28796
28797 /* Table for the ix86 builtin decls. */
28798 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28799
28800 /* Table of all of the builtin functions that are possible with different ISA's
28801 but are waiting to be built until a function is declared to use that
28802 ISA. */
28803 struct builtin_isa {
28804 const char *name; /* function name */
28805 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28806 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28807 bool const_p; /* true if the declaration is constant */
28808 bool set_and_not_built_p;
28809 };
28810
28811 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28812
28813
28814 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28815 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28816 function decl in the ix86_builtins array. Returns the function decl or
28817 NULL_TREE, if the builtin was not added.
28818
28819 If the front end has a special hook for builtin functions, delay adding
28820 builtin functions that aren't in the current ISA until the ISA is changed
28821 with function specific optimization. Doing so, can save about 300K for the
28822 default compiler. When the builtin is expanded, check at that time whether
28823 it is valid.
28824
28825 If the front end doesn't have a special hook, record all builtins, even if
28826 it isn't an instruction set in the current ISA in case the user uses
28827 function specific options for a different ISA, so that we don't get scope
28828 errors if a builtin is added in the middle of a function scope. */
28829
28830 static inline tree
28831 def_builtin (HOST_WIDE_INT mask, const char *name,
28832 enum ix86_builtin_func_type tcode,
28833 enum ix86_builtins code)
28834 {
28835 tree decl = NULL_TREE;
28836
28837 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28838 {
28839 ix86_builtins_isa[(int) code].isa = mask;
28840
28841 mask &= ~OPTION_MASK_ISA_64BIT;
28842 if (mask == 0
28843 || (mask & ix86_isa_flags) != 0
28844 || (lang_hooks.builtin_function
28845 == lang_hooks.builtin_function_ext_scope))
28846
28847 {
28848 tree type = ix86_get_builtin_func_type (tcode);
28849 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28850 NULL, NULL_TREE);
28851 ix86_builtins[(int) code] = decl;
28852 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28853 }
28854 else
28855 {
28856 ix86_builtins[(int) code] = NULL_TREE;
28857 ix86_builtins_isa[(int) code].tcode = tcode;
28858 ix86_builtins_isa[(int) code].name = name;
28859 ix86_builtins_isa[(int) code].const_p = false;
28860 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28861 }
28862 }
28863
28864 return decl;
28865 }
28866
28867 /* Like def_builtin, but also marks the function decl "const". */
28868
28869 static inline tree
28870 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28871 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28872 {
28873 tree decl = def_builtin (mask, name, tcode, code);
28874 if (decl)
28875 TREE_READONLY (decl) = 1;
28876 else
28877 ix86_builtins_isa[(int) code].const_p = true;
28878
28879 return decl;
28880 }
28881
28882 /* Add any new builtin functions for a given ISA that may not have been
28883 declared. This saves a bit of space compared to adding all of the
28884 declarations to the tree, even if we didn't use them. */
28885
28886 static void
28887 ix86_add_new_builtins (HOST_WIDE_INT isa)
28888 {
28889 int i;
28890
28891 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28892 {
28893 if ((ix86_builtins_isa[i].isa & isa) != 0
28894 && ix86_builtins_isa[i].set_and_not_built_p)
28895 {
28896 tree decl, type;
28897
28898 /* Don't define the builtin again. */
28899 ix86_builtins_isa[i].set_and_not_built_p = false;
28900
28901 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28902 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28903 type, i, BUILT_IN_MD, NULL,
28904 NULL_TREE);
28905
28906 ix86_builtins[i] = decl;
28907 if (ix86_builtins_isa[i].const_p)
28908 TREE_READONLY (decl) = 1;
28909 }
28910 }
28911 }
28912
28913 /* Bits for builtin_description.flag. */
28914
28915 /* Set when we don't support the comparison natively, and should
28916 swap_comparison in order to support it. */
28917 #define BUILTIN_DESC_SWAP_OPERANDS 1
28918
28919 struct builtin_description
28920 {
28921 const HOST_WIDE_INT mask;
28922 const enum insn_code icode;
28923 const char *const name;
28924 const enum ix86_builtins code;
28925 const enum rtx_code comparison;
28926 const int flag;
28927 };
28928
28929 static const struct builtin_description bdesc_comi[] =
28930 {
28931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28955 };
28956
28957 static const struct builtin_description bdesc_pcmpestr[] =
28958 {
28959 /* SSE4.2 */
28960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28967 };
28968
28969 static const struct builtin_description bdesc_pcmpistr[] =
28970 {
28971 /* SSE4.2 */
28972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28979 };
28980
28981 /* Special builtins with variable number of arguments. */
28982 static const struct builtin_description bdesc_special_args[] =
28983 {
28984 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28985 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28986 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28987
28988 /* 80387 (for use internally for atomic compound assignment). */
28989 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28990 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28991 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28992 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28993
28994 /* MMX */
28995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28996
28997 /* 3DNow! */
28998 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28999
29000 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29001 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29002 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29003 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29004 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29005 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29006 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29007 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29008 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009
29010 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29011 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29012 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29016 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018
29019 /* SSE */
29020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29023
29024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29028
29029 /* SSE or 3DNow!A */
29030 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29032
29033 /* SSE2 */
29034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29041 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29044
29045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29047
29048 /* SSE3 */
29049 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29050
29051 /* SSE4.1 */
29052 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29053
29054 /* SSE4A */
29055 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29056 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29057
29058 /* AVX */
29059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29061
29062 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29064 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29067
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29075
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29079
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29088
29089 /* AVX2 */
29090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29099
29100 /* AVX512F */
29101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29148
29149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29152 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29154 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29155
29156 /* FSGSBASE */
29157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29165
29166 /* RTM */
29167 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29168 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29169 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29170 };
29171
29172 /* Builtins with variable number of arguments. */
29173 static const struct builtin_description bdesc_args[] =
29174 {
29175 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29176 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29177 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29179 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29180 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29182
29183 /* MMX */
29184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29190
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29199
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29202
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29207
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29214
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29221
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29225
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29227
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29234
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29241
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29246
29247 /* 3DNow! */
29248 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29252
29253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29268
29269 /* 3DNow!A */
29270 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29272 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29273 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29275 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29276
29277 /* SSE */
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29290
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29292
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29322
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29343 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29344
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29346
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29350
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29353
29354 /* SSE MMX or 3Dnow!A */
29355 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29358
29359 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29363
29364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29366
29367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29368
29369 /* SSE2 */
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29371
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29377
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29383
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29385
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29389 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29390
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29394
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29424
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29442
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29451
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29460
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29463
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29468
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29471
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29478
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29483
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29492
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29496
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29499
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29502
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29504
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29506 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29509
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29517
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29525
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29530
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29534
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29536
29537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29538
29539 /* SSE2 MMX */
29540 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29542
29543 /* SSE3 */
29544 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29545 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29546
29547 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29553
29554 /* SSSE3 */
29555 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29561
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29586
29587 /* SSSE3. */
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29590
29591 /* SSE4.1 */
29592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29602
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29616
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29629
29630 /* SSE4.1 */
29631 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29635
29636 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29640
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29643
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29646
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29651
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29654
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29657
29658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29661
29662 /* SSE4.2 */
29663 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29664 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29665 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29666 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29667 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29668
29669 /* SSE4A */
29670 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29671 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29672 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29673 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29674
29675 /* AES */
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29678
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29683
29684 /* PCLMUL */
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29686
29687 /* AVX */
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29714
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29719
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29754
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29758
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29764
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29766
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29769
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29774
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29777
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29780
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29785
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29788
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29791
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29796
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29803
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29819
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29822
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29825
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29827
29828 /* AVX2 */
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29975
29976 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29977
29978 /* BMI */
29979 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29980 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29981 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29982
29983 /* TBM */
29984 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29985 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29986
29987 /* F16C */
29988 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29989 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29990 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29991 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29992
29993 /* BMI2 */
29994 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29995 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29996 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29997 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29998 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29999 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30000
30001 /* AVX512F */
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30052 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30163 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30164 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30165 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30166 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30193
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30198 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30202
30203 /* Mask arithmetic operations */
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30214
30215 /* SHA */
30216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30223 };
30224
30225 /* Builtins with rounding support. */
30226 static const struct builtin_description bdesc_round_args[] =
30227 {
30228 /* AVX512F */
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30248 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30250 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30257 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30259 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30309 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30311 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30313 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30315 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30319 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30323 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30348
30349 /* AVX512ER */
30350 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30351 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30352 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30353 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30354 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30355 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30359 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30360 };
30361
30362 /* FMA4 and XOP. */
30363 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30364 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30365 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30366 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30367 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30368 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30369 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30370 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30371 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30372 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30373 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30374 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30375 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30376 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30377 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30378 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30379 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30380 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30381 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30382 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30383 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30384 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30385 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30386 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30387 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30388 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30389 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30390 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30391 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30392 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30393 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30394 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30395 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30396 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30397 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30398 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30399 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30400 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30401 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30402 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30403 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30404 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30405 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30406 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30407 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30408 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30409 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30410 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30411 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30412 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30413 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30414 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30415
30416 static const struct builtin_description bdesc_multi_arg[] =
30417 {
30418 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30419 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30420 UNKNOWN, (int)MULTI_ARG_3_SF },
30421 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30422 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30423 UNKNOWN, (int)MULTI_ARG_3_DF },
30424
30425 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30426 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30427 UNKNOWN, (int)MULTI_ARG_3_SF },
30428 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30429 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30430 UNKNOWN, (int)MULTI_ARG_3_DF },
30431
30432 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30433 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30434 UNKNOWN, (int)MULTI_ARG_3_SF },
30435 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30436 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30437 UNKNOWN, (int)MULTI_ARG_3_DF },
30438 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30439 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30440 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30441 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30442 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30443 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30444
30445 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30446 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30447 UNKNOWN, (int)MULTI_ARG_3_SF },
30448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30449 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30450 UNKNOWN, (int)MULTI_ARG_3_DF },
30451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30452 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30453 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30455 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30456 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30457
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30465
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30473
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30475
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30488
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30505
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30512
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30528
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30536
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30544
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30552
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30560
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30568
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30576
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30584
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30592
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30601
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30610
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30615
30616 };
30617 \f
30618 /* TM vector builtins. */
30619
30620 /* Reuse the existing x86-specific `struct builtin_description' cause
30621 we're lazy. Add casts to make them fit. */
30622 static const struct builtin_description bdesc_tm[] =
30623 {
30624 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30626 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30627 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30628 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30629 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30630 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30631
30632 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30634 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30635 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30636 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30637 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30638 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30639
30640 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30642 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30643 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30644 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30645 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30646 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30647
30648 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30650 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30651 };
30652
30653 /* TM callbacks. */
30654
30655 /* Return the builtin decl needed to load a vector of TYPE. */
30656
30657 static tree
30658 ix86_builtin_tm_load (tree type)
30659 {
30660 if (TREE_CODE (type) == VECTOR_TYPE)
30661 {
30662 switch (tree_to_uhwi (TYPE_SIZE (type)))
30663 {
30664 case 64:
30665 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30666 case 128:
30667 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30668 case 256:
30669 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30670 }
30671 }
30672 return NULL_TREE;
30673 }
30674
30675 /* Return the builtin decl needed to store a vector of TYPE. */
30676
30677 static tree
30678 ix86_builtin_tm_store (tree type)
30679 {
30680 if (TREE_CODE (type) == VECTOR_TYPE)
30681 {
30682 switch (tree_to_uhwi (TYPE_SIZE (type)))
30683 {
30684 case 64:
30685 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30686 case 128:
30687 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30688 case 256:
30689 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30690 }
30691 }
30692 return NULL_TREE;
30693 }
30694 \f
30695 /* Initialize the transactional memory vector load/store builtins. */
30696
30697 static void
30698 ix86_init_tm_builtins (void)
30699 {
30700 enum ix86_builtin_func_type ftype;
30701 const struct builtin_description *d;
30702 size_t i;
30703 tree decl;
30704 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30705 tree attrs_log, attrs_type_log;
30706
30707 if (!flag_tm)
30708 return;
30709
30710 /* If there are no builtins defined, we must be compiling in a
30711 language without trans-mem support. */
30712 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30713 return;
30714
30715 /* Use whatever attributes a normal TM load has. */
30716 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30717 attrs_load = DECL_ATTRIBUTES (decl);
30718 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30719 /* Use whatever attributes a normal TM store has. */
30720 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30721 attrs_store = DECL_ATTRIBUTES (decl);
30722 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30723 /* Use whatever attributes a normal TM log has. */
30724 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30725 attrs_log = DECL_ATTRIBUTES (decl);
30726 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30727
30728 for (i = 0, d = bdesc_tm;
30729 i < ARRAY_SIZE (bdesc_tm);
30730 i++, d++)
30731 {
30732 if ((d->mask & ix86_isa_flags) != 0
30733 || (lang_hooks.builtin_function
30734 == lang_hooks.builtin_function_ext_scope))
30735 {
30736 tree type, attrs, attrs_type;
30737 enum built_in_function code = (enum built_in_function) d->code;
30738
30739 ftype = (enum ix86_builtin_func_type) d->flag;
30740 type = ix86_get_builtin_func_type (ftype);
30741
30742 if (BUILTIN_TM_LOAD_P (code))
30743 {
30744 attrs = attrs_load;
30745 attrs_type = attrs_type_load;
30746 }
30747 else if (BUILTIN_TM_STORE_P (code))
30748 {
30749 attrs = attrs_store;
30750 attrs_type = attrs_type_store;
30751 }
30752 else
30753 {
30754 attrs = attrs_log;
30755 attrs_type = attrs_type_log;
30756 }
30757 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30758 /* The builtin without the prefix for
30759 calling it directly. */
30760 d->name + strlen ("__builtin_"),
30761 attrs);
30762 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30763 set the TYPE_ATTRIBUTES. */
30764 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30765
30766 set_builtin_decl (code, decl, false);
30767 }
30768 }
30769 }
30770
30771 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30772 in the current target ISA to allow the user to compile particular modules
30773 with different target specific options that differ from the command line
30774 options. */
30775 static void
30776 ix86_init_mmx_sse_builtins (void)
30777 {
30778 const struct builtin_description * d;
30779 enum ix86_builtin_func_type ftype;
30780 size_t i;
30781
30782 /* Add all special builtins with variable number of operands. */
30783 for (i = 0, d = bdesc_special_args;
30784 i < ARRAY_SIZE (bdesc_special_args);
30785 i++, d++)
30786 {
30787 if (d->name == 0)
30788 continue;
30789
30790 ftype = (enum ix86_builtin_func_type) d->flag;
30791 def_builtin (d->mask, d->name, ftype, d->code);
30792 }
30793
30794 /* Add all builtins with variable number of operands. */
30795 for (i = 0, d = bdesc_args;
30796 i < ARRAY_SIZE (bdesc_args);
30797 i++, d++)
30798 {
30799 if (d->name == 0)
30800 continue;
30801
30802 ftype = (enum ix86_builtin_func_type) d->flag;
30803 def_builtin_const (d->mask, d->name, ftype, d->code);
30804 }
30805
30806 /* Add all builtins with rounding. */
30807 for (i = 0, d = bdesc_round_args;
30808 i < ARRAY_SIZE (bdesc_round_args);
30809 i++, d++)
30810 {
30811 if (d->name == 0)
30812 continue;
30813
30814 ftype = (enum ix86_builtin_func_type) d->flag;
30815 def_builtin_const (d->mask, d->name, ftype, d->code);
30816 }
30817
30818 /* pcmpestr[im] insns. */
30819 for (i = 0, d = bdesc_pcmpestr;
30820 i < ARRAY_SIZE (bdesc_pcmpestr);
30821 i++, d++)
30822 {
30823 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30824 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30825 else
30826 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30827 def_builtin_const (d->mask, d->name, ftype, d->code);
30828 }
30829
30830 /* pcmpistr[im] insns. */
30831 for (i = 0, d = bdesc_pcmpistr;
30832 i < ARRAY_SIZE (bdesc_pcmpistr);
30833 i++, d++)
30834 {
30835 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30836 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30837 else
30838 ftype = INT_FTYPE_V16QI_V16QI_INT;
30839 def_builtin_const (d->mask, d->name, ftype, d->code);
30840 }
30841
30842 /* comi/ucomi insns. */
30843 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30844 {
30845 if (d->mask == OPTION_MASK_ISA_SSE2)
30846 ftype = INT_FTYPE_V2DF_V2DF;
30847 else
30848 ftype = INT_FTYPE_V4SF_V4SF;
30849 def_builtin_const (d->mask, d->name, ftype, d->code);
30850 }
30851
30852 /* SSE */
30853 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30854 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30855 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30856 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30857
30858 /* SSE or 3DNow!A */
30859 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30860 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30861 IX86_BUILTIN_MASKMOVQ);
30862
30863 /* SSE2 */
30864 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30865 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30866
30867 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30868 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30869 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30870 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30871
30872 /* SSE3. */
30873 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30874 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30875 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30876 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30877
30878 /* AES */
30879 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30880 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30881 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30882 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30883 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30884 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30885 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30886 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30887 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30888 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30889 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30890 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30891
30892 /* PCLMUL */
30893 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30894 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30895
30896 /* RDRND */
30897 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30898 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30899 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30900 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30901 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30902 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30903 IX86_BUILTIN_RDRAND64_STEP);
30904
30905 /* AVX2 */
30906 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30907 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30908 IX86_BUILTIN_GATHERSIV2DF);
30909
30910 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30911 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30912 IX86_BUILTIN_GATHERSIV4DF);
30913
30914 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30915 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30916 IX86_BUILTIN_GATHERDIV2DF);
30917
30918 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30919 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30920 IX86_BUILTIN_GATHERDIV4DF);
30921
30922 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30923 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30924 IX86_BUILTIN_GATHERSIV4SF);
30925
30926 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30927 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30928 IX86_BUILTIN_GATHERSIV8SF);
30929
30930 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30931 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30932 IX86_BUILTIN_GATHERDIV4SF);
30933
30934 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30935 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30936 IX86_BUILTIN_GATHERDIV8SF);
30937
30938 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30939 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30940 IX86_BUILTIN_GATHERSIV2DI);
30941
30942 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30943 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30944 IX86_BUILTIN_GATHERSIV4DI);
30945
30946 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30947 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30948 IX86_BUILTIN_GATHERDIV2DI);
30949
30950 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30951 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30952 IX86_BUILTIN_GATHERDIV4DI);
30953
30954 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30955 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30956 IX86_BUILTIN_GATHERSIV4SI);
30957
30958 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30959 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30960 IX86_BUILTIN_GATHERSIV8SI);
30961
30962 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30963 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30964 IX86_BUILTIN_GATHERDIV4SI);
30965
30966 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30967 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30968 IX86_BUILTIN_GATHERDIV8SI);
30969
30970 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30971 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30972 IX86_BUILTIN_GATHERALTSIV4DF);
30973
30974 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30975 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30976 IX86_BUILTIN_GATHERALTDIV8SF);
30977
30978 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30979 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30980 IX86_BUILTIN_GATHERALTSIV4DI);
30981
30982 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30983 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30984 IX86_BUILTIN_GATHERALTDIV8SI);
30985
30986 /* AVX512F */
30987 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30988 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30989 IX86_BUILTIN_GATHER3SIV16SF);
30990
30991 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30992 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30993 IX86_BUILTIN_GATHER3SIV8DF);
30994
30995 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30996 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30997 IX86_BUILTIN_GATHER3DIV16SF);
30998
30999 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31000 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31001 IX86_BUILTIN_GATHER3DIV8DF);
31002
31003 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31004 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31005 IX86_BUILTIN_GATHER3SIV16SI);
31006
31007 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31008 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31009 IX86_BUILTIN_GATHER3SIV8DI);
31010
31011 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31012 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31013 IX86_BUILTIN_GATHER3DIV16SI);
31014
31015 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31016 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31017 IX86_BUILTIN_GATHER3DIV8DI);
31018
31019 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31020 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31021 IX86_BUILTIN_GATHER3ALTSIV8DF);
31022
31023 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31024 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31025 IX86_BUILTIN_GATHER3ALTDIV16SF);
31026
31027 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31028 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31029 IX86_BUILTIN_GATHER3ALTSIV8DI);
31030
31031 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31032 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31033 IX86_BUILTIN_GATHER3ALTDIV16SI);
31034
31035 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31036 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31037 IX86_BUILTIN_SCATTERSIV16SF);
31038
31039 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31040 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31041 IX86_BUILTIN_SCATTERSIV8DF);
31042
31043 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31044 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31045 IX86_BUILTIN_SCATTERDIV16SF);
31046
31047 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31048 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31049 IX86_BUILTIN_SCATTERDIV8DF);
31050
31051 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31052 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31053 IX86_BUILTIN_SCATTERSIV16SI);
31054
31055 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31056 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31057 IX86_BUILTIN_SCATTERSIV8DI);
31058
31059 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31060 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31061 IX86_BUILTIN_SCATTERDIV16SI);
31062
31063 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31064 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31065 IX86_BUILTIN_SCATTERDIV8DI);
31066
31067 /* AVX512PF */
31068 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31069 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31070 IX86_BUILTIN_GATHERPFDPD);
31071 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31072 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31073 IX86_BUILTIN_GATHERPFDPS);
31074 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31075 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31076 IX86_BUILTIN_GATHERPFQPD);
31077 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31078 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31079 IX86_BUILTIN_GATHERPFQPS);
31080 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31081 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31082 IX86_BUILTIN_SCATTERPFDPD);
31083 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31084 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31085 IX86_BUILTIN_SCATTERPFDPS);
31086 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31087 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31088 IX86_BUILTIN_SCATTERPFQPD);
31089 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31090 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31091 IX86_BUILTIN_SCATTERPFQPS);
31092
31093 /* SHA */
31094 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31095 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31096 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31097 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31098 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31099 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31100 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31101 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31102 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31103 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31104 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31105 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31106 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31107 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31108
31109 /* RTM. */
31110 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31111 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31112
31113 /* MMX access to the vec_init patterns. */
31114 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31115 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31116
31117 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31118 V4HI_FTYPE_HI_HI_HI_HI,
31119 IX86_BUILTIN_VEC_INIT_V4HI);
31120
31121 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31122 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31123 IX86_BUILTIN_VEC_INIT_V8QI);
31124
31125 /* Access to the vec_extract patterns. */
31126 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31127 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31128 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31129 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31130 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31131 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31132 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31133 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31134 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31135 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31136
31137 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31138 "__builtin_ia32_vec_ext_v4hi",
31139 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31140
31141 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31142 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31143
31144 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31145 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31146
31147 /* Access to the vec_set patterns. */
31148 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31149 "__builtin_ia32_vec_set_v2di",
31150 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31151
31152 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31153 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31154
31155 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31156 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31157
31158 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31159 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31160
31161 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31162 "__builtin_ia32_vec_set_v4hi",
31163 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31164
31165 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31166 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31167
31168 /* RDSEED */
31169 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31170 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31171 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31172 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31173 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31174 "__builtin_ia32_rdseed_di_step",
31175 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31176
31177 /* ADCX */
31178 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31179 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31180 def_builtin (OPTION_MASK_ISA_64BIT,
31181 "__builtin_ia32_addcarryx_u64",
31182 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31183 IX86_BUILTIN_ADDCARRYX64);
31184
31185 /* Read/write FLAGS. */
31186 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31187 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31188 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31189 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31190 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31191 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31192 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31193 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31194
31195 /* CLFLUSHOPT. */
31196 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31197 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31198
31199 /* Add FMA4 multi-arg argument instructions */
31200 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31201 {
31202 if (d->name == 0)
31203 continue;
31204
31205 ftype = (enum ix86_builtin_func_type) d->flag;
31206 def_builtin_const (d->mask, d->name, ftype, d->code);
31207 }
31208 }
31209
31210 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31211 to return a pointer to VERSION_DECL if the outcome of the expression
31212 formed by PREDICATE_CHAIN is true. This function will be called during
31213 version dispatch to decide which function version to execute. It returns
31214 the basic block at the end, to which more conditions can be added. */
31215
31216 static basic_block
31217 add_condition_to_bb (tree function_decl, tree version_decl,
31218 tree predicate_chain, basic_block new_bb)
31219 {
31220 gimple return_stmt;
31221 tree convert_expr, result_var;
31222 gimple convert_stmt;
31223 gimple call_cond_stmt;
31224 gimple if_else_stmt;
31225
31226 basic_block bb1, bb2, bb3;
31227 edge e12, e23;
31228
31229 tree cond_var, and_expr_var = NULL_TREE;
31230 gimple_seq gseq;
31231
31232 tree predicate_decl, predicate_arg;
31233
31234 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31235
31236 gcc_assert (new_bb != NULL);
31237 gseq = bb_seq (new_bb);
31238
31239
31240 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31241 build_fold_addr_expr (version_decl));
31242 result_var = create_tmp_var (ptr_type_node, NULL);
31243 convert_stmt = gimple_build_assign (result_var, convert_expr);
31244 return_stmt = gimple_build_return (result_var);
31245
31246 if (predicate_chain == NULL_TREE)
31247 {
31248 gimple_seq_add_stmt (&gseq, convert_stmt);
31249 gimple_seq_add_stmt (&gseq, return_stmt);
31250 set_bb_seq (new_bb, gseq);
31251 gimple_set_bb (convert_stmt, new_bb);
31252 gimple_set_bb (return_stmt, new_bb);
31253 pop_cfun ();
31254 return new_bb;
31255 }
31256
31257 while (predicate_chain != NULL)
31258 {
31259 cond_var = create_tmp_var (integer_type_node, NULL);
31260 predicate_decl = TREE_PURPOSE (predicate_chain);
31261 predicate_arg = TREE_VALUE (predicate_chain);
31262 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31263 gimple_call_set_lhs (call_cond_stmt, cond_var);
31264
31265 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31266 gimple_set_bb (call_cond_stmt, new_bb);
31267 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31268
31269 predicate_chain = TREE_CHAIN (predicate_chain);
31270
31271 if (and_expr_var == NULL)
31272 and_expr_var = cond_var;
31273 else
31274 {
31275 gimple assign_stmt;
31276 /* Use MIN_EXPR to check if any integer is zero?.
31277 and_expr_var = min_expr <cond_var, and_expr_var> */
31278 assign_stmt = gimple_build_assign (and_expr_var,
31279 build2 (MIN_EXPR, integer_type_node,
31280 cond_var, and_expr_var));
31281
31282 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31283 gimple_set_bb (assign_stmt, new_bb);
31284 gimple_seq_add_stmt (&gseq, assign_stmt);
31285 }
31286 }
31287
31288 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31289 integer_zero_node,
31290 NULL_TREE, NULL_TREE);
31291 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31292 gimple_set_bb (if_else_stmt, new_bb);
31293 gimple_seq_add_stmt (&gseq, if_else_stmt);
31294
31295 gimple_seq_add_stmt (&gseq, convert_stmt);
31296 gimple_seq_add_stmt (&gseq, return_stmt);
31297 set_bb_seq (new_bb, gseq);
31298
31299 bb1 = new_bb;
31300 e12 = split_block (bb1, if_else_stmt);
31301 bb2 = e12->dest;
31302 e12->flags &= ~EDGE_FALLTHRU;
31303 e12->flags |= EDGE_TRUE_VALUE;
31304
31305 e23 = split_block (bb2, return_stmt);
31306
31307 gimple_set_bb (convert_stmt, bb2);
31308 gimple_set_bb (return_stmt, bb2);
31309
31310 bb3 = e23->dest;
31311 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31312
31313 remove_edge (e23);
31314 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31315
31316 pop_cfun ();
31317
31318 return bb3;
31319 }
31320
31321 /* This parses the attribute arguments to target in DECL and determines
31322 the right builtin to use to match the platform specification.
31323 It returns the priority value for this version decl. If PREDICATE_LIST
31324 is not NULL, it stores the list of cpu features that need to be checked
31325 before dispatching this function. */
31326
31327 static unsigned int
31328 get_builtin_code_for_version (tree decl, tree *predicate_list)
31329 {
31330 tree attrs;
31331 struct cl_target_option cur_target;
31332 tree target_node;
31333 struct cl_target_option *new_target;
31334 const char *arg_str = NULL;
31335 const char *attrs_str = NULL;
31336 char *tok_str = NULL;
31337 char *token;
31338
31339 /* Priority of i386 features, greater value is higher priority. This is
31340 used to decide the order in which function dispatch must happen. For
31341 instance, a version specialized for SSE4.2 should be checked for dispatch
31342 before a version for SSE3, as SSE4.2 implies SSE3. */
31343 enum feature_priority
31344 {
31345 P_ZERO = 0,
31346 P_MMX,
31347 P_SSE,
31348 P_SSE2,
31349 P_SSE3,
31350 P_SSSE3,
31351 P_PROC_SSSE3,
31352 P_SSE4_A,
31353 P_PROC_SSE4_A,
31354 P_SSE4_1,
31355 P_SSE4_2,
31356 P_PROC_SSE4_2,
31357 P_POPCNT,
31358 P_AVX,
31359 P_PROC_AVX,
31360 P_FMA4,
31361 P_XOP,
31362 P_PROC_XOP,
31363 P_FMA,
31364 P_PROC_FMA,
31365 P_AVX2,
31366 P_PROC_AVX2
31367 };
31368
31369 enum feature_priority priority = P_ZERO;
31370
31371 /* These are the target attribute strings for which a dispatcher is
31372 available, from fold_builtin_cpu. */
31373
31374 static struct _feature_list
31375 {
31376 const char *const name;
31377 const enum feature_priority priority;
31378 }
31379 const feature_list[] =
31380 {
31381 {"mmx", P_MMX},
31382 {"sse", P_SSE},
31383 {"sse2", P_SSE2},
31384 {"sse3", P_SSE3},
31385 {"sse4a", P_SSE4_A},
31386 {"ssse3", P_SSSE3},
31387 {"sse4.1", P_SSE4_1},
31388 {"sse4.2", P_SSE4_2},
31389 {"popcnt", P_POPCNT},
31390 {"avx", P_AVX},
31391 {"fma4", P_FMA4},
31392 {"xop", P_XOP},
31393 {"fma", P_FMA},
31394 {"avx2", P_AVX2}
31395 };
31396
31397
31398 static unsigned int NUM_FEATURES
31399 = sizeof (feature_list) / sizeof (struct _feature_list);
31400
31401 unsigned int i;
31402
31403 tree predicate_chain = NULL_TREE;
31404 tree predicate_decl, predicate_arg;
31405
31406 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31407 gcc_assert (attrs != NULL);
31408
31409 attrs = TREE_VALUE (TREE_VALUE (attrs));
31410
31411 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31412 attrs_str = TREE_STRING_POINTER (attrs);
31413
31414 /* Return priority zero for default function. */
31415 if (strcmp (attrs_str, "default") == 0)
31416 return 0;
31417
31418 /* Handle arch= if specified. For priority, set it to be 1 more than
31419 the best instruction set the processor can handle. For instance, if
31420 there is a version for atom and a version for ssse3 (the highest ISA
31421 priority for atom), the atom version must be checked for dispatch
31422 before the ssse3 version. */
31423 if (strstr (attrs_str, "arch=") != NULL)
31424 {
31425 cl_target_option_save (&cur_target, &global_options);
31426 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31427 &global_options_set);
31428
31429 gcc_assert (target_node);
31430 new_target = TREE_TARGET_OPTION (target_node);
31431 gcc_assert (new_target);
31432
31433 if (new_target->arch_specified && new_target->arch > 0)
31434 {
31435 switch (new_target->arch)
31436 {
31437 case PROCESSOR_CORE2:
31438 arg_str = "core2";
31439 priority = P_PROC_SSSE3;
31440 break;
31441 case PROCESSOR_NEHALEM:
31442 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31443 arg_str = "westmere";
31444 else
31445 /* We translate "arch=corei7" and "arch=nehalem" to
31446 "corei7" so that it will be mapped to M_INTEL_COREI7
31447 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31448 arg_str = "corei7";
31449 priority = P_PROC_SSE4_2;
31450 break;
31451 case PROCESSOR_SANDYBRIDGE:
31452 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31453 arg_str = "ivybridge";
31454 else
31455 arg_str = "sandybridge";
31456 priority = P_PROC_AVX;
31457 break;
31458 case PROCESSOR_HASWELL:
31459 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31460 arg_str = "broadwell";
31461 else
31462 arg_str = "haswell";
31463 priority = P_PROC_AVX2;
31464 break;
31465 case PROCESSOR_BONNELL:
31466 arg_str = "bonnell";
31467 priority = P_PROC_SSSE3;
31468 break;
31469 case PROCESSOR_SILVERMONT:
31470 arg_str = "silvermont";
31471 priority = P_PROC_SSE4_2;
31472 break;
31473 case PROCESSOR_AMDFAM10:
31474 arg_str = "amdfam10h";
31475 priority = P_PROC_SSE4_A;
31476 break;
31477 case PROCESSOR_BTVER1:
31478 arg_str = "btver1";
31479 priority = P_PROC_SSE4_A;
31480 break;
31481 case PROCESSOR_BTVER2:
31482 arg_str = "btver2";
31483 priority = P_PROC_AVX;
31484 break;
31485 case PROCESSOR_BDVER1:
31486 arg_str = "bdver1";
31487 priority = P_PROC_XOP;
31488 break;
31489 case PROCESSOR_BDVER2:
31490 arg_str = "bdver2";
31491 priority = P_PROC_FMA;
31492 break;
31493 case PROCESSOR_BDVER3:
31494 arg_str = "bdver3";
31495 priority = P_PROC_FMA;
31496 break;
31497 case PROCESSOR_BDVER4:
31498 arg_str = "bdver4";
31499 priority = P_PROC_AVX2;
31500 break;
31501 }
31502 }
31503
31504 cl_target_option_restore (&global_options, &cur_target);
31505
31506 if (predicate_list && arg_str == NULL)
31507 {
31508 error_at (DECL_SOURCE_LOCATION (decl),
31509 "No dispatcher found for the versioning attributes");
31510 return 0;
31511 }
31512
31513 if (predicate_list)
31514 {
31515 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31516 /* For a C string literal the length includes the trailing NULL. */
31517 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31518 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31519 predicate_chain);
31520 }
31521 }
31522
31523 /* Process feature name. */
31524 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31525 strcpy (tok_str, attrs_str);
31526 token = strtok (tok_str, ",");
31527 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31528
31529 while (token != NULL)
31530 {
31531 /* Do not process "arch=" */
31532 if (strncmp (token, "arch=", 5) == 0)
31533 {
31534 token = strtok (NULL, ",");
31535 continue;
31536 }
31537 for (i = 0; i < NUM_FEATURES; ++i)
31538 {
31539 if (strcmp (token, feature_list[i].name) == 0)
31540 {
31541 if (predicate_list)
31542 {
31543 predicate_arg = build_string_literal (
31544 strlen (feature_list[i].name) + 1,
31545 feature_list[i].name);
31546 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31547 predicate_chain);
31548 }
31549 /* Find the maximum priority feature. */
31550 if (feature_list[i].priority > priority)
31551 priority = feature_list[i].priority;
31552
31553 break;
31554 }
31555 }
31556 if (predicate_list && i == NUM_FEATURES)
31557 {
31558 error_at (DECL_SOURCE_LOCATION (decl),
31559 "No dispatcher found for %s", token);
31560 return 0;
31561 }
31562 token = strtok (NULL, ",");
31563 }
31564 free (tok_str);
31565
31566 if (predicate_list && predicate_chain == NULL_TREE)
31567 {
31568 error_at (DECL_SOURCE_LOCATION (decl),
31569 "No dispatcher found for the versioning attributes : %s",
31570 attrs_str);
31571 return 0;
31572 }
31573 else if (predicate_list)
31574 {
31575 predicate_chain = nreverse (predicate_chain);
31576 *predicate_list = predicate_chain;
31577 }
31578
31579 return priority;
31580 }
31581
31582 /* This compares the priority of target features in function DECL1
31583 and DECL2. It returns positive value if DECL1 is higher priority,
31584 negative value if DECL2 is higher priority and 0 if they are the
31585 same. */
31586
31587 static int
31588 ix86_compare_version_priority (tree decl1, tree decl2)
31589 {
31590 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31591 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31592
31593 return (int)priority1 - (int)priority2;
31594 }
31595
31596 /* V1 and V2 point to function versions with different priorities
31597 based on the target ISA. This function compares their priorities. */
31598
31599 static int
31600 feature_compare (const void *v1, const void *v2)
31601 {
31602 typedef struct _function_version_info
31603 {
31604 tree version_decl;
31605 tree predicate_chain;
31606 unsigned int dispatch_priority;
31607 } function_version_info;
31608
31609 const function_version_info c1 = *(const function_version_info *)v1;
31610 const function_version_info c2 = *(const function_version_info *)v2;
31611 return (c2.dispatch_priority - c1.dispatch_priority);
31612 }
31613
31614 /* This function generates the dispatch function for
31615 multi-versioned functions. DISPATCH_DECL is the function which will
31616 contain the dispatch logic. FNDECLS are the function choices for
31617 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31618 in DISPATCH_DECL in which the dispatch code is generated. */
31619
31620 static int
31621 dispatch_function_versions (tree dispatch_decl,
31622 void *fndecls_p,
31623 basic_block *empty_bb)
31624 {
31625 tree default_decl;
31626 gimple ifunc_cpu_init_stmt;
31627 gimple_seq gseq;
31628 int ix;
31629 tree ele;
31630 vec<tree> *fndecls;
31631 unsigned int num_versions = 0;
31632 unsigned int actual_versions = 0;
31633 unsigned int i;
31634
31635 struct _function_version_info
31636 {
31637 tree version_decl;
31638 tree predicate_chain;
31639 unsigned int dispatch_priority;
31640 }*function_version_info;
31641
31642 gcc_assert (dispatch_decl != NULL
31643 && fndecls_p != NULL
31644 && empty_bb != NULL);
31645
31646 /*fndecls_p is actually a vector. */
31647 fndecls = static_cast<vec<tree> *> (fndecls_p);
31648
31649 /* At least one more version other than the default. */
31650 num_versions = fndecls->length ();
31651 gcc_assert (num_versions >= 2);
31652
31653 function_version_info = (struct _function_version_info *)
31654 XNEWVEC (struct _function_version_info, (num_versions - 1));
31655
31656 /* The first version in the vector is the default decl. */
31657 default_decl = (*fndecls)[0];
31658
31659 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31660
31661 gseq = bb_seq (*empty_bb);
31662 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31663 constructors, so explicity call __builtin_cpu_init here. */
31664 ifunc_cpu_init_stmt = gimple_build_call_vec (
31665 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31666 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31667 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31668 set_bb_seq (*empty_bb, gseq);
31669
31670 pop_cfun ();
31671
31672
31673 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31674 {
31675 tree version_decl = ele;
31676 tree predicate_chain = NULL_TREE;
31677 unsigned int priority;
31678 /* Get attribute string, parse it and find the right predicate decl.
31679 The predicate function could be a lengthy combination of many
31680 features, like arch-type and various isa-variants. */
31681 priority = get_builtin_code_for_version (version_decl,
31682 &predicate_chain);
31683
31684 if (predicate_chain == NULL_TREE)
31685 continue;
31686
31687 function_version_info [actual_versions].version_decl = version_decl;
31688 function_version_info [actual_versions].predicate_chain
31689 = predicate_chain;
31690 function_version_info [actual_versions].dispatch_priority = priority;
31691 actual_versions++;
31692 }
31693
31694 /* Sort the versions according to descending order of dispatch priority. The
31695 priority is based on the ISA. This is not a perfect solution. There
31696 could still be ambiguity. If more than one function version is suitable
31697 to execute, which one should be dispatched? In future, allow the user
31698 to specify a dispatch priority next to the version. */
31699 qsort (function_version_info, actual_versions,
31700 sizeof (struct _function_version_info), feature_compare);
31701
31702 for (i = 0; i < actual_versions; ++i)
31703 *empty_bb = add_condition_to_bb (dispatch_decl,
31704 function_version_info[i].version_decl,
31705 function_version_info[i].predicate_chain,
31706 *empty_bb);
31707
31708 /* dispatch default version at the end. */
31709 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31710 NULL, *empty_bb);
31711
31712 free (function_version_info);
31713 return 0;
31714 }
31715
31716 /* Comparator function to be used in qsort routine to sort attribute
31717 specification strings to "target". */
31718
31719 static int
31720 attr_strcmp (const void *v1, const void *v2)
31721 {
31722 const char *c1 = *(char *const*)v1;
31723 const char *c2 = *(char *const*)v2;
31724 return strcmp (c1, c2);
31725 }
31726
31727 /* ARGLIST is the argument to target attribute. This function tokenizes
31728 the comma separated arguments, sorts them and returns a string which
31729 is a unique identifier for the comma separated arguments. It also
31730 replaces non-identifier characters "=,-" with "_". */
31731
31732 static char *
31733 sorted_attr_string (tree arglist)
31734 {
31735 tree arg;
31736 size_t str_len_sum = 0;
31737 char **args = NULL;
31738 char *attr_str, *ret_str;
31739 char *attr = NULL;
31740 unsigned int argnum = 1;
31741 unsigned int i;
31742
31743 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31744 {
31745 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31746 size_t len = strlen (str);
31747 str_len_sum += len + 1;
31748 if (arg != arglist)
31749 argnum++;
31750 for (i = 0; i < strlen (str); i++)
31751 if (str[i] == ',')
31752 argnum++;
31753 }
31754
31755 attr_str = XNEWVEC (char, str_len_sum);
31756 str_len_sum = 0;
31757 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31758 {
31759 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31760 size_t len = strlen (str);
31761 memcpy (attr_str + str_len_sum, str, len);
31762 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31763 str_len_sum += len + 1;
31764 }
31765
31766 /* Replace "=,-" with "_". */
31767 for (i = 0; i < strlen (attr_str); i++)
31768 if (attr_str[i] == '=' || attr_str[i]== '-')
31769 attr_str[i] = '_';
31770
31771 if (argnum == 1)
31772 return attr_str;
31773
31774 args = XNEWVEC (char *, argnum);
31775
31776 i = 0;
31777 attr = strtok (attr_str, ",");
31778 while (attr != NULL)
31779 {
31780 args[i] = attr;
31781 i++;
31782 attr = strtok (NULL, ",");
31783 }
31784
31785 qsort (args, argnum, sizeof (char *), attr_strcmp);
31786
31787 ret_str = XNEWVEC (char, str_len_sum);
31788 str_len_sum = 0;
31789 for (i = 0; i < argnum; i++)
31790 {
31791 size_t len = strlen (args[i]);
31792 memcpy (ret_str + str_len_sum, args[i], len);
31793 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31794 str_len_sum += len + 1;
31795 }
31796
31797 XDELETEVEC (args);
31798 XDELETEVEC (attr_str);
31799 return ret_str;
31800 }
31801
31802 /* This function changes the assembler name for functions that are
31803 versions. If DECL is a function version and has a "target"
31804 attribute, it appends the attribute string to its assembler name. */
31805
31806 static tree
31807 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31808 {
31809 tree version_attr;
31810 const char *orig_name, *version_string;
31811 char *attr_str, *assembler_name;
31812
31813 if (DECL_DECLARED_INLINE_P (decl)
31814 && lookup_attribute ("gnu_inline",
31815 DECL_ATTRIBUTES (decl)))
31816 error_at (DECL_SOURCE_LOCATION (decl),
31817 "Function versions cannot be marked as gnu_inline,"
31818 " bodies have to be generated");
31819
31820 if (DECL_VIRTUAL_P (decl)
31821 || DECL_VINDEX (decl))
31822 sorry ("Virtual function multiversioning not supported");
31823
31824 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31825
31826 /* target attribute string cannot be NULL. */
31827 gcc_assert (version_attr != NULL_TREE);
31828
31829 orig_name = IDENTIFIER_POINTER (id);
31830 version_string
31831 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31832
31833 if (strcmp (version_string, "default") == 0)
31834 return id;
31835
31836 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31837 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31838
31839 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31840
31841 /* Allow assembler name to be modified if already set. */
31842 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31843 SET_DECL_RTL (decl, NULL);
31844
31845 tree ret = get_identifier (assembler_name);
31846 XDELETEVEC (attr_str);
31847 XDELETEVEC (assembler_name);
31848 return ret;
31849 }
31850
31851 /* This function returns true if FN1 and FN2 are versions of the same function,
31852 that is, the target strings of the function decls are different. This assumes
31853 that FN1 and FN2 have the same signature. */
31854
31855 static bool
31856 ix86_function_versions (tree fn1, tree fn2)
31857 {
31858 tree attr1, attr2;
31859 char *target1, *target2;
31860 bool result;
31861
31862 if (TREE_CODE (fn1) != FUNCTION_DECL
31863 || TREE_CODE (fn2) != FUNCTION_DECL)
31864 return false;
31865
31866 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31867 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31868
31869 /* At least one function decl should have the target attribute specified. */
31870 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31871 return false;
31872
31873 /* Diagnose missing target attribute if one of the decls is already
31874 multi-versioned. */
31875 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31876 {
31877 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31878 {
31879 if (attr2 != NULL_TREE)
31880 {
31881 tree tem = fn1;
31882 fn1 = fn2;
31883 fn2 = tem;
31884 attr1 = attr2;
31885 }
31886 error_at (DECL_SOURCE_LOCATION (fn2),
31887 "missing %<target%> attribute for multi-versioned %D",
31888 fn2);
31889 inform (DECL_SOURCE_LOCATION (fn1),
31890 "previous declaration of %D", fn1);
31891 /* Prevent diagnosing of the same error multiple times. */
31892 DECL_ATTRIBUTES (fn2)
31893 = tree_cons (get_identifier ("target"),
31894 copy_node (TREE_VALUE (attr1)),
31895 DECL_ATTRIBUTES (fn2));
31896 }
31897 return false;
31898 }
31899
31900 target1 = sorted_attr_string (TREE_VALUE (attr1));
31901 target2 = sorted_attr_string (TREE_VALUE (attr2));
31902
31903 /* The sorted target strings must be different for fn1 and fn2
31904 to be versions. */
31905 if (strcmp (target1, target2) == 0)
31906 result = false;
31907 else
31908 result = true;
31909
31910 XDELETEVEC (target1);
31911 XDELETEVEC (target2);
31912
31913 return result;
31914 }
31915
31916 static tree
31917 ix86_mangle_decl_assembler_name (tree decl, tree id)
31918 {
31919 /* For function version, add the target suffix to the assembler name. */
31920 if (TREE_CODE (decl) == FUNCTION_DECL
31921 && DECL_FUNCTION_VERSIONED (decl))
31922 id = ix86_mangle_function_version_assembler_name (decl, id);
31923 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31924 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31925 #endif
31926
31927 return id;
31928 }
31929
31930 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31931 is true, append the full path name of the source file. */
31932
31933 static char *
31934 make_name (tree decl, const char *suffix, bool make_unique)
31935 {
31936 char *global_var_name;
31937 int name_len;
31938 const char *name;
31939 const char *unique_name = NULL;
31940
31941 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31942
31943 /* Get a unique name that can be used globally without any chances
31944 of collision at link time. */
31945 if (make_unique)
31946 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31947
31948 name_len = strlen (name) + strlen (suffix) + 2;
31949
31950 if (make_unique)
31951 name_len += strlen (unique_name) + 1;
31952 global_var_name = XNEWVEC (char, name_len);
31953
31954 /* Use '.' to concatenate names as it is demangler friendly. */
31955 if (make_unique)
31956 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31957 suffix);
31958 else
31959 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31960
31961 return global_var_name;
31962 }
31963
31964 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31965
31966 /* Make a dispatcher declaration for the multi-versioned function DECL.
31967 Calls to DECL function will be replaced with calls to the dispatcher
31968 by the front-end. Return the decl created. */
31969
31970 static tree
31971 make_dispatcher_decl (const tree decl)
31972 {
31973 tree func_decl;
31974 char *func_name;
31975 tree fn_type, func_type;
31976 bool is_uniq = false;
31977
31978 if (TREE_PUBLIC (decl) == 0)
31979 is_uniq = true;
31980
31981 func_name = make_name (decl, "ifunc", is_uniq);
31982
31983 fn_type = TREE_TYPE (decl);
31984 func_type = build_function_type (TREE_TYPE (fn_type),
31985 TYPE_ARG_TYPES (fn_type));
31986
31987 func_decl = build_fn_decl (func_name, func_type);
31988 XDELETEVEC (func_name);
31989 TREE_USED (func_decl) = 1;
31990 DECL_CONTEXT (func_decl) = NULL_TREE;
31991 DECL_INITIAL (func_decl) = error_mark_node;
31992 DECL_ARTIFICIAL (func_decl) = 1;
31993 /* Mark this func as external, the resolver will flip it again if
31994 it gets generated. */
31995 DECL_EXTERNAL (func_decl) = 1;
31996 /* This will be of type IFUNCs have to be externally visible. */
31997 TREE_PUBLIC (func_decl) = 1;
31998
31999 return func_decl;
32000 }
32001
32002 #endif
32003
32004 /* Returns true if decl is multi-versioned and DECL is the default function,
32005 that is it is not tagged with target specific optimization. */
32006
32007 static bool
32008 is_function_default_version (const tree decl)
32009 {
32010 if (TREE_CODE (decl) != FUNCTION_DECL
32011 || !DECL_FUNCTION_VERSIONED (decl))
32012 return false;
32013 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32014 gcc_assert (attr);
32015 attr = TREE_VALUE (TREE_VALUE (attr));
32016 return (TREE_CODE (attr) == STRING_CST
32017 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32018 }
32019
32020 /* Make a dispatcher declaration for the multi-versioned function DECL.
32021 Calls to DECL function will be replaced with calls to the dispatcher
32022 by the front-end. Returns the decl of the dispatcher function. */
32023
32024 static tree
32025 ix86_get_function_versions_dispatcher (void *decl)
32026 {
32027 tree fn = (tree) decl;
32028 struct cgraph_node *node = NULL;
32029 struct cgraph_node *default_node = NULL;
32030 struct cgraph_function_version_info *node_v = NULL;
32031 struct cgraph_function_version_info *first_v = NULL;
32032
32033 tree dispatch_decl = NULL;
32034
32035 struct cgraph_function_version_info *default_version_info = NULL;
32036
32037 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32038
32039 node = cgraph_get_node (fn);
32040 gcc_assert (node != NULL);
32041
32042 node_v = get_cgraph_node_version (node);
32043 gcc_assert (node_v != NULL);
32044
32045 if (node_v->dispatcher_resolver != NULL)
32046 return node_v->dispatcher_resolver;
32047
32048 /* Find the default version and make it the first node. */
32049 first_v = node_v;
32050 /* Go to the beginning of the chain. */
32051 while (first_v->prev != NULL)
32052 first_v = first_v->prev;
32053 default_version_info = first_v;
32054 while (default_version_info != NULL)
32055 {
32056 if (is_function_default_version
32057 (default_version_info->this_node->decl))
32058 break;
32059 default_version_info = default_version_info->next;
32060 }
32061
32062 /* If there is no default node, just return NULL. */
32063 if (default_version_info == NULL)
32064 return NULL;
32065
32066 /* Make default info the first node. */
32067 if (first_v != default_version_info)
32068 {
32069 default_version_info->prev->next = default_version_info->next;
32070 if (default_version_info->next)
32071 default_version_info->next->prev = default_version_info->prev;
32072 first_v->prev = default_version_info;
32073 default_version_info->next = first_v;
32074 default_version_info->prev = NULL;
32075 }
32076
32077 default_node = default_version_info->this_node;
32078
32079 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32080 if (targetm.has_ifunc_p ())
32081 {
32082 struct cgraph_function_version_info *it_v = NULL;
32083 struct cgraph_node *dispatcher_node = NULL;
32084 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32085
32086 /* Right now, the dispatching is done via ifunc. */
32087 dispatch_decl = make_dispatcher_decl (default_node->decl);
32088
32089 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32090 gcc_assert (dispatcher_node != NULL);
32091 dispatcher_node->dispatcher_function = 1;
32092 dispatcher_version_info
32093 = insert_new_cgraph_node_version (dispatcher_node);
32094 dispatcher_version_info->next = default_version_info;
32095 dispatcher_node->definition = 1;
32096
32097 /* Set the dispatcher for all the versions. */
32098 it_v = default_version_info;
32099 while (it_v != NULL)
32100 {
32101 it_v->dispatcher_resolver = dispatch_decl;
32102 it_v = it_v->next;
32103 }
32104 }
32105 else
32106 #endif
32107 {
32108 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32109 "multiversioning needs ifunc which is not supported "
32110 "on this target");
32111 }
32112
32113 return dispatch_decl;
32114 }
32115
32116 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32117 it to CHAIN. */
32118
32119 static tree
32120 make_attribute (const char *name, const char *arg_name, tree chain)
32121 {
32122 tree attr_name;
32123 tree attr_arg_name;
32124 tree attr_args;
32125 tree attr;
32126
32127 attr_name = get_identifier (name);
32128 attr_arg_name = build_string (strlen (arg_name), arg_name);
32129 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32130 attr = tree_cons (attr_name, attr_args, chain);
32131 return attr;
32132 }
32133
32134 /* Make the resolver function decl to dispatch the versions of
32135 a multi-versioned function, DEFAULT_DECL. Create an
32136 empty basic block in the resolver and store the pointer in
32137 EMPTY_BB. Return the decl of the resolver function. */
32138
32139 static tree
32140 make_resolver_func (const tree default_decl,
32141 const tree dispatch_decl,
32142 basic_block *empty_bb)
32143 {
32144 char *resolver_name;
32145 tree decl, type, decl_name, t;
32146 bool is_uniq = false;
32147
32148 /* IFUNC's have to be globally visible. So, if the default_decl is
32149 not, then the name of the IFUNC should be made unique. */
32150 if (TREE_PUBLIC (default_decl) == 0)
32151 is_uniq = true;
32152
32153 /* Append the filename to the resolver function if the versions are
32154 not externally visible. This is because the resolver function has
32155 to be externally visible for the loader to find it. So, appending
32156 the filename will prevent conflicts with a resolver function from
32157 another module which is based on the same version name. */
32158 resolver_name = make_name (default_decl, "resolver", is_uniq);
32159
32160 /* The resolver function should return a (void *). */
32161 type = build_function_type_list (ptr_type_node, NULL_TREE);
32162
32163 decl = build_fn_decl (resolver_name, type);
32164 decl_name = get_identifier (resolver_name);
32165 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32166
32167 DECL_NAME (decl) = decl_name;
32168 TREE_USED (decl) = 1;
32169 DECL_ARTIFICIAL (decl) = 1;
32170 DECL_IGNORED_P (decl) = 0;
32171 /* IFUNC resolvers have to be externally visible. */
32172 TREE_PUBLIC (decl) = 1;
32173 DECL_UNINLINABLE (decl) = 1;
32174
32175 /* Resolver is not external, body is generated. */
32176 DECL_EXTERNAL (decl) = 0;
32177 DECL_EXTERNAL (dispatch_decl) = 0;
32178
32179 DECL_CONTEXT (decl) = NULL_TREE;
32180 DECL_INITIAL (decl) = make_node (BLOCK);
32181 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32182
32183 if (DECL_COMDAT_GROUP (default_decl)
32184 || TREE_PUBLIC (default_decl))
32185 {
32186 /* In this case, each translation unit with a call to this
32187 versioned function will put out a resolver. Ensure it
32188 is comdat to keep just one copy. */
32189 DECL_COMDAT (decl) = 1;
32190 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32191 }
32192 /* Build result decl and add to function_decl. */
32193 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32194 DECL_ARTIFICIAL (t) = 1;
32195 DECL_IGNORED_P (t) = 1;
32196 DECL_RESULT (decl) = t;
32197
32198 gimplify_function_tree (decl);
32199 push_cfun (DECL_STRUCT_FUNCTION (decl));
32200 *empty_bb = init_lowered_empty_function (decl, false);
32201
32202 cgraph_add_new_function (decl, true);
32203 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32204
32205 pop_cfun ();
32206
32207 gcc_assert (dispatch_decl != NULL);
32208 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32209 DECL_ATTRIBUTES (dispatch_decl)
32210 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32211
32212 /* Create the alias for dispatch to resolver here. */
32213 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32214 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32215 XDELETEVEC (resolver_name);
32216 return decl;
32217 }
32218
32219 /* Generate the dispatching code body to dispatch multi-versioned function
32220 DECL. The target hook is called to process the "target" attributes and
32221 provide the code to dispatch the right function at run-time. NODE points
32222 to the dispatcher decl whose body will be created. */
32223
32224 static tree
32225 ix86_generate_version_dispatcher_body (void *node_p)
32226 {
32227 tree resolver_decl;
32228 basic_block empty_bb;
32229 tree default_ver_decl;
32230 struct cgraph_node *versn;
32231 struct cgraph_node *node;
32232
32233 struct cgraph_function_version_info *node_version_info = NULL;
32234 struct cgraph_function_version_info *versn_info = NULL;
32235
32236 node = (cgraph_node *)node_p;
32237
32238 node_version_info = get_cgraph_node_version (node);
32239 gcc_assert (node->dispatcher_function
32240 && node_version_info != NULL);
32241
32242 if (node_version_info->dispatcher_resolver)
32243 return node_version_info->dispatcher_resolver;
32244
32245 /* The first version in the chain corresponds to the default version. */
32246 default_ver_decl = node_version_info->next->this_node->decl;
32247
32248 /* node is going to be an alias, so remove the finalized bit. */
32249 node->definition = false;
32250
32251 resolver_decl = make_resolver_func (default_ver_decl,
32252 node->decl, &empty_bb);
32253
32254 node_version_info->dispatcher_resolver = resolver_decl;
32255
32256 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32257
32258 auto_vec<tree, 2> fn_ver_vec;
32259
32260 for (versn_info = node_version_info->next; versn_info;
32261 versn_info = versn_info->next)
32262 {
32263 versn = versn_info->this_node;
32264 /* Check for virtual functions here again, as by this time it should
32265 have been determined if this function needs a vtable index or
32266 not. This happens for methods in derived classes that override
32267 virtual methods in base classes but are not explicitly marked as
32268 virtual. */
32269 if (DECL_VINDEX (versn->decl))
32270 sorry ("Virtual function multiversioning not supported");
32271
32272 fn_ver_vec.safe_push (versn->decl);
32273 }
32274
32275 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32276 rebuild_cgraph_edges ();
32277 pop_cfun ();
32278 return resolver_decl;
32279 }
32280 /* This builds the processor_model struct type defined in
32281 libgcc/config/i386/cpuinfo.c */
32282
32283 static tree
32284 build_processor_model_struct (void)
32285 {
32286 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32287 "__cpu_features"};
32288 tree field = NULL_TREE, field_chain = NULL_TREE;
32289 int i;
32290 tree type = make_node (RECORD_TYPE);
32291
32292 /* The first 3 fields are unsigned int. */
32293 for (i = 0; i < 3; ++i)
32294 {
32295 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32296 get_identifier (field_name[i]), unsigned_type_node);
32297 if (field_chain != NULL_TREE)
32298 DECL_CHAIN (field) = field_chain;
32299 field_chain = field;
32300 }
32301
32302 /* The last field is an array of unsigned integers of size one. */
32303 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32304 get_identifier (field_name[3]),
32305 build_array_type (unsigned_type_node,
32306 build_index_type (size_one_node)));
32307 if (field_chain != NULL_TREE)
32308 DECL_CHAIN (field) = field_chain;
32309 field_chain = field;
32310
32311 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32312 return type;
32313 }
32314
32315 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32316
32317 static tree
32318 make_var_decl (tree type, const char *name)
32319 {
32320 tree new_decl;
32321
32322 new_decl = build_decl (UNKNOWN_LOCATION,
32323 VAR_DECL,
32324 get_identifier(name),
32325 type);
32326
32327 DECL_EXTERNAL (new_decl) = 1;
32328 TREE_STATIC (new_decl) = 1;
32329 TREE_PUBLIC (new_decl) = 1;
32330 DECL_INITIAL (new_decl) = 0;
32331 DECL_ARTIFICIAL (new_decl) = 0;
32332 DECL_PRESERVE_P (new_decl) = 1;
32333
32334 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32335 assemble_variable (new_decl, 0, 0, 0);
32336
32337 return new_decl;
32338 }
32339
32340 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32341 into an integer defined in libgcc/config/i386/cpuinfo.c */
32342
32343 static tree
32344 fold_builtin_cpu (tree fndecl, tree *args)
32345 {
32346 unsigned int i;
32347 enum ix86_builtins fn_code = (enum ix86_builtins)
32348 DECL_FUNCTION_CODE (fndecl);
32349 tree param_string_cst = NULL;
32350
32351 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32352 enum processor_features
32353 {
32354 F_CMOV = 0,
32355 F_MMX,
32356 F_POPCNT,
32357 F_SSE,
32358 F_SSE2,
32359 F_SSE3,
32360 F_SSSE3,
32361 F_SSE4_1,
32362 F_SSE4_2,
32363 F_AVX,
32364 F_AVX2,
32365 F_SSE4_A,
32366 F_FMA4,
32367 F_XOP,
32368 F_FMA,
32369 F_MAX
32370 };
32371
32372 /* These are the values for vendor types and cpu types and subtypes
32373 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32374 the corresponding start value. */
32375 enum processor_model
32376 {
32377 M_INTEL = 1,
32378 M_AMD,
32379 M_CPU_TYPE_START,
32380 M_INTEL_BONNELL,
32381 M_INTEL_CORE2,
32382 M_INTEL_COREI7,
32383 M_AMDFAM10H,
32384 M_AMDFAM15H,
32385 M_INTEL_SILVERMONT,
32386 M_AMD_BTVER1,
32387 M_AMD_BTVER2,
32388 M_CPU_SUBTYPE_START,
32389 M_INTEL_COREI7_NEHALEM,
32390 M_INTEL_COREI7_WESTMERE,
32391 M_INTEL_COREI7_SANDYBRIDGE,
32392 M_AMDFAM10H_BARCELONA,
32393 M_AMDFAM10H_SHANGHAI,
32394 M_AMDFAM10H_ISTANBUL,
32395 M_AMDFAM15H_BDVER1,
32396 M_AMDFAM15H_BDVER2,
32397 M_AMDFAM15H_BDVER3,
32398 M_AMDFAM15H_BDVER4,
32399 M_INTEL_COREI7_IVYBRIDGE,
32400 M_INTEL_COREI7_HASWELL
32401 };
32402
32403 static struct _arch_names_table
32404 {
32405 const char *const name;
32406 const enum processor_model model;
32407 }
32408 const arch_names_table[] =
32409 {
32410 {"amd", M_AMD},
32411 {"intel", M_INTEL},
32412 {"atom", M_INTEL_BONNELL},
32413 {"slm", M_INTEL_SILVERMONT},
32414 {"core2", M_INTEL_CORE2},
32415 {"corei7", M_INTEL_COREI7},
32416 {"nehalem", M_INTEL_COREI7_NEHALEM},
32417 {"westmere", M_INTEL_COREI7_WESTMERE},
32418 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32419 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32420 {"haswell", M_INTEL_COREI7_HASWELL},
32421 {"bonnell", M_INTEL_BONNELL},
32422 {"silvermont", M_INTEL_SILVERMONT},
32423 {"amdfam10h", M_AMDFAM10H},
32424 {"barcelona", M_AMDFAM10H_BARCELONA},
32425 {"shanghai", M_AMDFAM10H_SHANGHAI},
32426 {"istanbul", M_AMDFAM10H_ISTANBUL},
32427 {"btver1", M_AMD_BTVER1},
32428 {"amdfam15h", M_AMDFAM15H},
32429 {"bdver1", M_AMDFAM15H_BDVER1},
32430 {"bdver2", M_AMDFAM15H_BDVER2},
32431 {"bdver3", M_AMDFAM15H_BDVER3},
32432 {"bdver4", M_AMDFAM15H_BDVER4},
32433 {"btver2", M_AMD_BTVER2},
32434 };
32435
32436 static struct _isa_names_table
32437 {
32438 const char *const name;
32439 const enum processor_features feature;
32440 }
32441 const isa_names_table[] =
32442 {
32443 {"cmov", F_CMOV},
32444 {"mmx", F_MMX},
32445 {"popcnt", F_POPCNT},
32446 {"sse", F_SSE},
32447 {"sse2", F_SSE2},
32448 {"sse3", F_SSE3},
32449 {"ssse3", F_SSSE3},
32450 {"sse4a", F_SSE4_A},
32451 {"sse4.1", F_SSE4_1},
32452 {"sse4.2", F_SSE4_2},
32453 {"avx", F_AVX},
32454 {"fma4", F_FMA4},
32455 {"xop", F_XOP},
32456 {"fma", F_FMA},
32457 {"avx2", F_AVX2}
32458 };
32459
32460 tree __processor_model_type = build_processor_model_struct ();
32461 tree __cpu_model_var = make_var_decl (__processor_model_type,
32462 "__cpu_model");
32463
32464
32465 varpool_add_new_variable (__cpu_model_var);
32466
32467 gcc_assert ((args != NULL) && (*args != NULL));
32468
32469 param_string_cst = *args;
32470 while (param_string_cst
32471 && TREE_CODE (param_string_cst) != STRING_CST)
32472 {
32473 /* *args must be a expr that can contain other EXPRS leading to a
32474 STRING_CST. */
32475 if (!EXPR_P (param_string_cst))
32476 {
32477 error ("Parameter to builtin must be a string constant or literal");
32478 return integer_zero_node;
32479 }
32480 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32481 }
32482
32483 gcc_assert (param_string_cst);
32484
32485 if (fn_code == IX86_BUILTIN_CPU_IS)
32486 {
32487 tree ref;
32488 tree field;
32489 tree final;
32490
32491 unsigned int field_val = 0;
32492 unsigned int NUM_ARCH_NAMES
32493 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32494
32495 for (i = 0; i < NUM_ARCH_NAMES; i++)
32496 if (strcmp (arch_names_table[i].name,
32497 TREE_STRING_POINTER (param_string_cst)) == 0)
32498 break;
32499
32500 if (i == NUM_ARCH_NAMES)
32501 {
32502 error ("Parameter to builtin not valid: %s",
32503 TREE_STRING_POINTER (param_string_cst));
32504 return integer_zero_node;
32505 }
32506
32507 field = TYPE_FIELDS (__processor_model_type);
32508 field_val = arch_names_table[i].model;
32509
32510 /* CPU types are stored in the next field. */
32511 if (field_val > M_CPU_TYPE_START
32512 && field_val < M_CPU_SUBTYPE_START)
32513 {
32514 field = DECL_CHAIN (field);
32515 field_val -= M_CPU_TYPE_START;
32516 }
32517
32518 /* CPU subtypes are stored in the next field. */
32519 if (field_val > M_CPU_SUBTYPE_START)
32520 {
32521 field = DECL_CHAIN ( DECL_CHAIN (field));
32522 field_val -= M_CPU_SUBTYPE_START;
32523 }
32524
32525 /* Get the appropriate field in __cpu_model. */
32526 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32527 field, NULL_TREE);
32528
32529 /* Check the value. */
32530 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32531 build_int_cstu (unsigned_type_node, field_val));
32532 return build1 (CONVERT_EXPR, integer_type_node, final);
32533 }
32534 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32535 {
32536 tree ref;
32537 tree array_elt;
32538 tree field;
32539 tree final;
32540
32541 unsigned int field_val = 0;
32542 unsigned int NUM_ISA_NAMES
32543 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32544
32545 for (i = 0; i < NUM_ISA_NAMES; i++)
32546 if (strcmp (isa_names_table[i].name,
32547 TREE_STRING_POINTER (param_string_cst)) == 0)
32548 break;
32549
32550 if (i == NUM_ISA_NAMES)
32551 {
32552 error ("Parameter to builtin not valid: %s",
32553 TREE_STRING_POINTER (param_string_cst));
32554 return integer_zero_node;
32555 }
32556
32557 field = TYPE_FIELDS (__processor_model_type);
32558 /* Get the last field, which is __cpu_features. */
32559 while (DECL_CHAIN (field))
32560 field = DECL_CHAIN (field);
32561
32562 /* Get the appropriate field: __cpu_model.__cpu_features */
32563 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32564 field, NULL_TREE);
32565
32566 /* Access the 0th element of __cpu_features array. */
32567 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32568 integer_zero_node, NULL_TREE, NULL_TREE);
32569
32570 field_val = (1 << isa_names_table[i].feature);
32571 /* Return __cpu_model.__cpu_features[0] & field_val */
32572 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32573 build_int_cstu (unsigned_type_node, field_val));
32574 return build1 (CONVERT_EXPR, integer_type_node, final);
32575 }
32576 gcc_unreachable ();
32577 }
32578
32579 static tree
32580 ix86_fold_builtin (tree fndecl, int n_args,
32581 tree *args, bool ignore ATTRIBUTE_UNUSED)
32582 {
32583 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32584 {
32585 enum ix86_builtins fn_code = (enum ix86_builtins)
32586 DECL_FUNCTION_CODE (fndecl);
32587 if (fn_code == IX86_BUILTIN_CPU_IS
32588 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32589 {
32590 gcc_assert (n_args == 1);
32591 return fold_builtin_cpu (fndecl, args);
32592 }
32593 }
32594
32595 #ifdef SUBTARGET_FOLD_BUILTIN
32596 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32597 #endif
32598
32599 return NULL_TREE;
32600 }
32601
32602 /* Make builtins to detect cpu type and features supported. NAME is
32603 the builtin name, CODE is the builtin code, and FTYPE is the function
32604 type of the builtin. */
32605
32606 static void
32607 make_cpu_type_builtin (const char* name, int code,
32608 enum ix86_builtin_func_type ftype, bool is_const)
32609 {
32610 tree decl;
32611 tree type;
32612
32613 type = ix86_get_builtin_func_type (ftype);
32614 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32615 NULL, NULL_TREE);
32616 gcc_assert (decl != NULL_TREE);
32617 ix86_builtins[(int) code] = decl;
32618 TREE_READONLY (decl) = is_const;
32619 }
32620
32621 /* Make builtins to get CPU type and features supported. The created
32622 builtins are :
32623
32624 __builtin_cpu_init (), to detect cpu type and features,
32625 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32626 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32627 */
32628
32629 static void
32630 ix86_init_platform_type_builtins (void)
32631 {
32632 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32633 INT_FTYPE_VOID, false);
32634 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32635 INT_FTYPE_PCCHAR, true);
32636 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32637 INT_FTYPE_PCCHAR, true);
32638 }
32639
32640 /* Internal method for ix86_init_builtins. */
32641
32642 static void
32643 ix86_init_builtins_va_builtins_abi (void)
32644 {
32645 tree ms_va_ref, sysv_va_ref;
32646 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32647 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32648 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32649 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32650
32651 if (!TARGET_64BIT)
32652 return;
32653 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32654 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32655 ms_va_ref = build_reference_type (ms_va_list_type_node);
32656 sysv_va_ref =
32657 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32658
32659 fnvoid_va_end_ms =
32660 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32661 fnvoid_va_start_ms =
32662 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32663 fnvoid_va_end_sysv =
32664 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32665 fnvoid_va_start_sysv =
32666 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32667 NULL_TREE);
32668 fnvoid_va_copy_ms =
32669 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32670 NULL_TREE);
32671 fnvoid_va_copy_sysv =
32672 build_function_type_list (void_type_node, sysv_va_ref,
32673 sysv_va_ref, NULL_TREE);
32674
32675 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32676 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32677 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32678 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32679 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32680 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32681 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32682 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32683 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32684 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32685 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32686 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32687 }
32688
32689 static void
32690 ix86_init_builtin_types (void)
32691 {
32692 tree float128_type_node, float80_type_node;
32693
32694 /* The __float80 type. */
32695 float80_type_node = long_double_type_node;
32696 if (TYPE_MODE (float80_type_node) != XFmode)
32697 {
32698 /* The __float80 type. */
32699 float80_type_node = make_node (REAL_TYPE);
32700
32701 TYPE_PRECISION (float80_type_node) = 80;
32702 layout_type (float80_type_node);
32703 }
32704 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32705
32706 /* The __float128 type. */
32707 float128_type_node = make_node (REAL_TYPE);
32708 TYPE_PRECISION (float128_type_node) = 128;
32709 layout_type (float128_type_node);
32710 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32711
32712 /* This macro is built by i386-builtin-types.awk. */
32713 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32714 }
32715
32716 static void
32717 ix86_init_builtins (void)
32718 {
32719 tree t;
32720
32721 ix86_init_builtin_types ();
32722
32723 /* Builtins to get CPU type and features. */
32724 ix86_init_platform_type_builtins ();
32725
32726 /* TFmode support builtins. */
32727 def_builtin_const (0, "__builtin_infq",
32728 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32729 def_builtin_const (0, "__builtin_huge_valq",
32730 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32731
32732 /* We will expand them to normal call if SSE isn't available since
32733 they are used by libgcc. */
32734 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32735 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32736 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32737 TREE_READONLY (t) = 1;
32738 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32739
32740 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32741 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32742 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32743 TREE_READONLY (t) = 1;
32744 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32745
32746 ix86_init_tm_builtins ();
32747 ix86_init_mmx_sse_builtins ();
32748
32749 if (TARGET_LP64)
32750 ix86_init_builtins_va_builtins_abi ();
32751
32752 #ifdef SUBTARGET_INIT_BUILTINS
32753 SUBTARGET_INIT_BUILTINS;
32754 #endif
32755 }
32756
32757 /* Return the ix86 builtin for CODE. */
32758
32759 static tree
32760 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32761 {
32762 if (code >= IX86_BUILTIN_MAX)
32763 return error_mark_node;
32764
32765 return ix86_builtins[code];
32766 }
32767
32768 /* Errors in the source file can cause expand_expr to return const0_rtx
32769 where we expect a vector. To avoid crashing, use one of the vector
32770 clear instructions. */
32771 static rtx
32772 safe_vector_operand (rtx x, enum machine_mode mode)
32773 {
32774 if (x == const0_rtx)
32775 x = CONST0_RTX (mode);
32776 return x;
32777 }
32778
32779 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32780
32781 static rtx
32782 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32783 {
32784 rtx pat;
32785 tree arg0 = CALL_EXPR_ARG (exp, 0);
32786 tree arg1 = CALL_EXPR_ARG (exp, 1);
32787 rtx op0 = expand_normal (arg0);
32788 rtx op1 = expand_normal (arg1);
32789 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32790 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32791 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32792
32793 if (VECTOR_MODE_P (mode0))
32794 op0 = safe_vector_operand (op0, mode0);
32795 if (VECTOR_MODE_P (mode1))
32796 op1 = safe_vector_operand (op1, mode1);
32797
32798 if (optimize || !target
32799 || GET_MODE (target) != tmode
32800 || !insn_data[icode].operand[0].predicate (target, tmode))
32801 target = gen_reg_rtx (tmode);
32802
32803 if (GET_MODE (op1) == SImode && mode1 == TImode)
32804 {
32805 rtx x = gen_reg_rtx (V4SImode);
32806 emit_insn (gen_sse2_loadd (x, op1));
32807 op1 = gen_lowpart (TImode, x);
32808 }
32809
32810 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32811 op0 = copy_to_mode_reg (mode0, op0);
32812 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32813 op1 = copy_to_mode_reg (mode1, op1);
32814
32815 pat = GEN_FCN (icode) (target, op0, op1);
32816 if (! pat)
32817 return 0;
32818
32819 emit_insn (pat);
32820
32821 return target;
32822 }
32823
32824 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32825
32826 static rtx
32827 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32828 enum ix86_builtin_func_type m_type,
32829 enum rtx_code sub_code)
32830 {
32831 rtx pat;
32832 int i;
32833 int nargs;
32834 bool comparison_p = false;
32835 bool tf_p = false;
32836 bool last_arg_constant = false;
32837 int num_memory = 0;
32838 struct {
32839 rtx op;
32840 enum machine_mode mode;
32841 } args[4];
32842
32843 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32844
32845 switch (m_type)
32846 {
32847 case MULTI_ARG_4_DF2_DI_I:
32848 case MULTI_ARG_4_DF2_DI_I1:
32849 case MULTI_ARG_4_SF2_SI_I:
32850 case MULTI_ARG_4_SF2_SI_I1:
32851 nargs = 4;
32852 last_arg_constant = true;
32853 break;
32854
32855 case MULTI_ARG_3_SF:
32856 case MULTI_ARG_3_DF:
32857 case MULTI_ARG_3_SF2:
32858 case MULTI_ARG_3_DF2:
32859 case MULTI_ARG_3_DI:
32860 case MULTI_ARG_3_SI:
32861 case MULTI_ARG_3_SI_DI:
32862 case MULTI_ARG_3_HI:
32863 case MULTI_ARG_3_HI_SI:
32864 case MULTI_ARG_3_QI:
32865 case MULTI_ARG_3_DI2:
32866 case MULTI_ARG_3_SI2:
32867 case MULTI_ARG_3_HI2:
32868 case MULTI_ARG_3_QI2:
32869 nargs = 3;
32870 break;
32871
32872 case MULTI_ARG_2_SF:
32873 case MULTI_ARG_2_DF:
32874 case MULTI_ARG_2_DI:
32875 case MULTI_ARG_2_SI:
32876 case MULTI_ARG_2_HI:
32877 case MULTI_ARG_2_QI:
32878 nargs = 2;
32879 break;
32880
32881 case MULTI_ARG_2_DI_IMM:
32882 case MULTI_ARG_2_SI_IMM:
32883 case MULTI_ARG_2_HI_IMM:
32884 case MULTI_ARG_2_QI_IMM:
32885 nargs = 2;
32886 last_arg_constant = true;
32887 break;
32888
32889 case MULTI_ARG_1_SF:
32890 case MULTI_ARG_1_DF:
32891 case MULTI_ARG_1_SF2:
32892 case MULTI_ARG_1_DF2:
32893 case MULTI_ARG_1_DI:
32894 case MULTI_ARG_1_SI:
32895 case MULTI_ARG_1_HI:
32896 case MULTI_ARG_1_QI:
32897 case MULTI_ARG_1_SI_DI:
32898 case MULTI_ARG_1_HI_DI:
32899 case MULTI_ARG_1_HI_SI:
32900 case MULTI_ARG_1_QI_DI:
32901 case MULTI_ARG_1_QI_SI:
32902 case MULTI_ARG_1_QI_HI:
32903 nargs = 1;
32904 break;
32905
32906 case MULTI_ARG_2_DI_CMP:
32907 case MULTI_ARG_2_SI_CMP:
32908 case MULTI_ARG_2_HI_CMP:
32909 case MULTI_ARG_2_QI_CMP:
32910 nargs = 2;
32911 comparison_p = true;
32912 break;
32913
32914 case MULTI_ARG_2_SF_TF:
32915 case MULTI_ARG_2_DF_TF:
32916 case MULTI_ARG_2_DI_TF:
32917 case MULTI_ARG_2_SI_TF:
32918 case MULTI_ARG_2_HI_TF:
32919 case MULTI_ARG_2_QI_TF:
32920 nargs = 2;
32921 tf_p = true;
32922 break;
32923
32924 default:
32925 gcc_unreachable ();
32926 }
32927
32928 if (optimize || !target
32929 || GET_MODE (target) != tmode
32930 || !insn_data[icode].operand[0].predicate (target, tmode))
32931 target = gen_reg_rtx (tmode);
32932
32933 gcc_assert (nargs <= 4);
32934
32935 for (i = 0; i < nargs; i++)
32936 {
32937 tree arg = CALL_EXPR_ARG (exp, i);
32938 rtx op = expand_normal (arg);
32939 int adjust = (comparison_p) ? 1 : 0;
32940 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32941
32942 if (last_arg_constant && i == nargs - 1)
32943 {
32944 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32945 {
32946 enum insn_code new_icode = icode;
32947 switch (icode)
32948 {
32949 case CODE_FOR_xop_vpermil2v2df3:
32950 case CODE_FOR_xop_vpermil2v4sf3:
32951 case CODE_FOR_xop_vpermil2v4df3:
32952 case CODE_FOR_xop_vpermil2v8sf3:
32953 error ("the last argument must be a 2-bit immediate");
32954 return gen_reg_rtx (tmode);
32955 case CODE_FOR_xop_rotlv2di3:
32956 new_icode = CODE_FOR_rotlv2di3;
32957 goto xop_rotl;
32958 case CODE_FOR_xop_rotlv4si3:
32959 new_icode = CODE_FOR_rotlv4si3;
32960 goto xop_rotl;
32961 case CODE_FOR_xop_rotlv8hi3:
32962 new_icode = CODE_FOR_rotlv8hi3;
32963 goto xop_rotl;
32964 case CODE_FOR_xop_rotlv16qi3:
32965 new_icode = CODE_FOR_rotlv16qi3;
32966 xop_rotl:
32967 if (CONST_INT_P (op))
32968 {
32969 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32970 op = GEN_INT (INTVAL (op) & mask);
32971 gcc_checking_assert
32972 (insn_data[icode].operand[i + 1].predicate (op, mode));
32973 }
32974 else
32975 {
32976 gcc_checking_assert
32977 (nargs == 2
32978 && insn_data[new_icode].operand[0].mode == tmode
32979 && insn_data[new_icode].operand[1].mode == tmode
32980 && insn_data[new_icode].operand[2].mode == mode
32981 && insn_data[new_icode].operand[0].predicate
32982 == insn_data[icode].operand[0].predicate
32983 && insn_data[new_icode].operand[1].predicate
32984 == insn_data[icode].operand[1].predicate);
32985 icode = new_icode;
32986 goto non_constant;
32987 }
32988 break;
32989 default:
32990 gcc_unreachable ();
32991 }
32992 }
32993 }
32994 else
32995 {
32996 non_constant:
32997 if (VECTOR_MODE_P (mode))
32998 op = safe_vector_operand (op, mode);
32999
33000 /* If we aren't optimizing, only allow one memory operand to be
33001 generated. */
33002 if (memory_operand (op, mode))
33003 num_memory++;
33004
33005 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33006
33007 if (optimize
33008 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33009 || num_memory > 1)
33010 op = force_reg (mode, op);
33011 }
33012
33013 args[i].op = op;
33014 args[i].mode = mode;
33015 }
33016
33017 switch (nargs)
33018 {
33019 case 1:
33020 pat = GEN_FCN (icode) (target, args[0].op);
33021 break;
33022
33023 case 2:
33024 if (tf_p)
33025 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33026 GEN_INT ((int)sub_code));
33027 else if (! comparison_p)
33028 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33029 else
33030 {
33031 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33032 args[0].op,
33033 args[1].op);
33034
33035 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33036 }
33037 break;
33038
33039 case 3:
33040 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33041 break;
33042
33043 case 4:
33044 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33045 break;
33046
33047 default:
33048 gcc_unreachable ();
33049 }
33050
33051 if (! pat)
33052 return 0;
33053
33054 emit_insn (pat);
33055 return target;
33056 }
33057
33058 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33059 insns with vec_merge. */
33060
33061 static rtx
33062 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33063 rtx target)
33064 {
33065 rtx pat;
33066 tree arg0 = CALL_EXPR_ARG (exp, 0);
33067 rtx op1, op0 = expand_normal (arg0);
33068 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33069 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33070
33071 if (optimize || !target
33072 || GET_MODE (target) != tmode
33073 || !insn_data[icode].operand[0].predicate (target, tmode))
33074 target = gen_reg_rtx (tmode);
33075
33076 if (VECTOR_MODE_P (mode0))
33077 op0 = safe_vector_operand (op0, mode0);
33078
33079 if ((optimize && !register_operand (op0, mode0))
33080 || !insn_data[icode].operand[1].predicate (op0, mode0))
33081 op0 = copy_to_mode_reg (mode0, op0);
33082
33083 op1 = op0;
33084 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33085 op1 = copy_to_mode_reg (mode0, op1);
33086
33087 pat = GEN_FCN (icode) (target, op0, op1);
33088 if (! pat)
33089 return 0;
33090 emit_insn (pat);
33091 return target;
33092 }
33093
33094 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33095
33096 static rtx
33097 ix86_expand_sse_compare (const struct builtin_description *d,
33098 tree exp, rtx target, bool swap)
33099 {
33100 rtx pat;
33101 tree arg0 = CALL_EXPR_ARG (exp, 0);
33102 tree arg1 = CALL_EXPR_ARG (exp, 1);
33103 rtx op0 = expand_normal (arg0);
33104 rtx op1 = expand_normal (arg1);
33105 rtx op2;
33106 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33107 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33108 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33109 enum rtx_code comparison = d->comparison;
33110
33111 if (VECTOR_MODE_P (mode0))
33112 op0 = safe_vector_operand (op0, mode0);
33113 if (VECTOR_MODE_P (mode1))
33114 op1 = safe_vector_operand (op1, mode1);
33115
33116 /* Swap operands if we have a comparison that isn't available in
33117 hardware. */
33118 if (swap)
33119 {
33120 rtx tmp = gen_reg_rtx (mode1);
33121 emit_move_insn (tmp, op1);
33122 op1 = op0;
33123 op0 = tmp;
33124 }
33125
33126 if (optimize || !target
33127 || GET_MODE (target) != tmode
33128 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33129 target = gen_reg_rtx (tmode);
33130
33131 if ((optimize && !register_operand (op0, mode0))
33132 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33133 op0 = copy_to_mode_reg (mode0, op0);
33134 if ((optimize && !register_operand (op1, mode1))
33135 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33136 op1 = copy_to_mode_reg (mode1, op1);
33137
33138 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33139 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33140 if (! pat)
33141 return 0;
33142 emit_insn (pat);
33143 return target;
33144 }
33145
33146 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33147
33148 static rtx
33149 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33150 rtx target)
33151 {
33152 rtx pat;
33153 tree arg0 = CALL_EXPR_ARG (exp, 0);
33154 tree arg1 = CALL_EXPR_ARG (exp, 1);
33155 rtx op0 = expand_normal (arg0);
33156 rtx op1 = expand_normal (arg1);
33157 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33158 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33159 enum rtx_code comparison = d->comparison;
33160
33161 if (VECTOR_MODE_P (mode0))
33162 op0 = safe_vector_operand (op0, mode0);
33163 if (VECTOR_MODE_P (mode1))
33164 op1 = safe_vector_operand (op1, mode1);
33165
33166 /* Swap operands if we have a comparison that isn't available in
33167 hardware. */
33168 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33169 {
33170 rtx tmp = op1;
33171 op1 = op0;
33172 op0 = tmp;
33173 }
33174
33175 target = gen_reg_rtx (SImode);
33176 emit_move_insn (target, const0_rtx);
33177 target = gen_rtx_SUBREG (QImode, target, 0);
33178
33179 if ((optimize && !register_operand (op0, mode0))
33180 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33181 op0 = copy_to_mode_reg (mode0, op0);
33182 if ((optimize && !register_operand (op1, mode1))
33183 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33184 op1 = copy_to_mode_reg (mode1, op1);
33185
33186 pat = GEN_FCN (d->icode) (op0, op1);
33187 if (! pat)
33188 return 0;
33189 emit_insn (pat);
33190 emit_insn (gen_rtx_SET (VOIDmode,
33191 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33192 gen_rtx_fmt_ee (comparison, QImode,
33193 SET_DEST (pat),
33194 const0_rtx)));
33195
33196 return SUBREG_REG (target);
33197 }
33198
33199 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33200
33201 static rtx
33202 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33203 rtx target)
33204 {
33205 rtx pat;
33206 tree arg0 = CALL_EXPR_ARG (exp, 0);
33207 rtx op1, op0 = expand_normal (arg0);
33208 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33209 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33210
33211 if (optimize || target == 0
33212 || GET_MODE (target) != tmode
33213 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33214 target = gen_reg_rtx (tmode);
33215
33216 if (VECTOR_MODE_P (mode0))
33217 op0 = safe_vector_operand (op0, mode0);
33218
33219 if ((optimize && !register_operand (op0, mode0))
33220 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33221 op0 = copy_to_mode_reg (mode0, op0);
33222
33223 op1 = GEN_INT (d->comparison);
33224
33225 pat = GEN_FCN (d->icode) (target, op0, op1);
33226 if (! pat)
33227 return 0;
33228 emit_insn (pat);
33229 return target;
33230 }
33231
33232 static rtx
33233 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33234 tree exp, rtx target)
33235 {
33236 rtx pat;
33237 tree arg0 = CALL_EXPR_ARG (exp, 0);
33238 tree arg1 = CALL_EXPR_ARG (exp, 1);
33239 rtx op0 = expand_normal (arg0);
33240 rtx op1 = expand_normal (arg1);
33241 rtx op2;
33242 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33243 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33244 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33245
33246 if (optimize || target == 0
33247 || GET_MODE (target) != tmode
33248 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33249 target = gen_reg_rtx (tmode);
33250
33251 op0 = safe_vector_operand (op0, mode0);
33252 op1 = safe_vector_operand (op1, mode1);
33253
33254 if ((optimize && !register_operand (op0, mode0))
33255 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33256 op0 = copy_to_mode_reg (mode0, op0);
33257 if ((optimize && !register_operand (op1, mode1))
33258 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33259 op1 = copy_to_mode_reg (mode1, op1);
33260
33261 op2 = GEN_INT (d->comparison);
33262
33263 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33264 if (! pat)
33265 return 0;
33266 emit_insn (pat);
33267 return target;
33268 }
33269
33270 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33271
33272 static rtx
33273 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33274 rtx target)
33275 {
33276 rtx pat;
33277 tree arg0 = CALL_EXPR_ARG (exp, 0);
33278 tree arg1 = CALL_EXPR_ARG (exp, 1);
33279 rtx op0 = expand_normal (arg0);
33280 rtx op1 = expand_normal (arg1);
33281 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33282 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33283 enum rtx_code comparison = d->comparison;
33284
33285 if (VECTOR_MODE_P (mode0))
33286 op0 = safe_vector_operand (op0, mode0);
33287 if (VECTOR_MODE_P (mode1))
33288 op1 = safe_vector_operand (op1, mode1);
33289
33290 target = gen_reg_rtx (SImode);
33291 emit_move_insn (target, const0_rtx);
33292 target = gen_rtx_SUBREG (QImode, target, 0);
33293
33294 if ((optimize && !register_operand (op0, mode0))
33295 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33296 op0 = copy_to_mode_reg (mode0, op0);
33297 if ((optimize && !register_operand (op1, mode1))
33298 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33299 op1 = copy_to_mode_reg (mode1, op1);
33300
33301 pat = GEN_FCN (d->icode) (op0, op1);
33302 if (! pat)
33303 return 0;
33304 emit_insn (pat);
33305 emit_insn (gen_rtx_SET (VOIDmode,
33306 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33307 gen_rtx_fmt_ee (comparison, QImode,
33308 SET_DEST (pat),
33309 const0_rtx)));
33310
33311 return SUBREG_REG (target);
33312 }
33313
33314 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33315
33316 static rtx
33317 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33318 tree exp, rtx target)
33319 {
33320 rtx pat;
33321 tree arg0 = CALL_EXPR_ARG (exp, 0);
33322 tree arg1 = CALL_EXPR_ARG (exp, 1);
33323 tree arg2 = CALL_EXPR_ARG (exp, 2);
33324 tree arg3 = CALL_EXPR_ARG (exp, 3);
33325 tree arg4 = CALL_EXPR_ARG (exp, 4);
33326 rtx scratch0, scratch1;
33327 rtx op0 = expand_normal (arg0);
33328 rtx op1 = expand_normal (arg1);
33329 rtx op2 = expand_normal (arg2);
33330 rtx op3 = expand_normal (arg3);
33331 rtx op4 = expand_normal (arg4);
33332 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33333
33334 tmode0 = insn_data[d->icode].operand[0].mode;
33335 tmode1 = insn_data[d->icode].operand[1].mode;
33336 modev2 = insn_data[d->icode].operand[2].mode;
33337 modei3 = insn_data[d->icode].operand[3].mode;
33338 modev4 = insn_data[d->icode].operand[4].mode;
33339 modei5 = insn_data[d->icode].operand[5].mode;
33340 modeimm = insn_data[d->icode].operand[6].mode;
33341
33342 if (VECTOR_MODE_P (modev2))
33343 op0 = safe_vector_operand (op0, modev2);
33344 if (VECTOR_MODE_P (modev4))
33345 op2 = safe_vector_operand (op2, modev4);
33346
33347 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33348 op0 = copy_to_mode_reg (modev2, op0);
33349 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33350 op1 = copy_to_mode_reg (modei3, op1);
33351 if ((optimize && !register_operand (op2, modev4))
33352 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33353 op2 = copy_to_mode_reg (modev4, op2);
33354 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33355 op3 = copy_to_mode_reg (modei5, op3);
33356
33357 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33358 {
33359 error ("the fifth argument must be an 8-bit immediate");
33360 return const0_rtx;
33361 }
33362
33363 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33364 {
33365 if (optimize || !target
33366 || GET_MODE (target) != tmode0
33367 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33368 target = gen_reg_rtx (tmode0);
33369
33370 scratch1 = gen_reg_rtx (tmode1);
33371
33372 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33373 }
33374 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33375 {
33376 if (optimize || !target
33377 || GET_MODE (target) != tmode1
33378 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33379 target = gen_reg_rtx (tmode1);
33380
33381 scratch0 = gen_reg_rtx (tmode0);
33382
33383 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33384 }
33385 else
33386 {
33387 gcc_assert (d->flag);
33388
33389 scratch0 = gen_reg_rtx (tmode0);
33390 scratch1 = gen_reg_rtx (tmode1);
33391
33392 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33393 }
33394
33395 if (! pat)
33396 return 0;
33397
33398 emit_insn (pat);
33399
33400 if (d->flag)
33401 {
33402 target = gen_reg_rtx (SImode);
33403 emit_move_insn (target, const0_rtx);
33404 target = gen_rtx_SUBREG (QImode, target, 0);
33405
33406 emit_insn
33407 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33408 gen_rtx_fmt_ee (EQ, QImode,
33409 gen_rtx_REG ((enum machine_mode) d->flag,
33410 FLAGS_REG),
33411 const0_rtx)));
33412 return SUBREG_REG (target);
33413 }
33414 else
33415 return target;
33416 }
33417
33418
33419 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33420
33421 static rtx
33422 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33423 tree exp, rtx target)
33424 {
33425 rtx pat;
33426 tree arg0 = CALL_EXPR_ARG (exp, 0);
33427 tree arg1 = CALL_EXPR_ARG (exp, 1);
33428 tree arg2 = CALL_EXPR_ARG (exp, 2);
33429 rtx scratch0, scratch1;
33430 rtx op0 = expand_normal (arg0);
33431 rtx op1 = expand_normal (arg1);
33432 rtx op2 = expand_normal (arg2);
33433 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33434
33435 tmode0 = insn_data[d->icode].operand[0].mode;
33436 tmode1 = insn_data[d->icode].operand[1].mode;
33437 modev2 = insn_data[d->icode].operand[2].mode;
33438 modev3 = insn_data[d->icode].operand[3].mode;
33439 modeimm = insn_data[d->icode].operand[4].mode;
33440
33441 if (VECTOR_MODE_P (modev2))
33442 op0 = safe_vector_operand (op0, modev2);
33443 if (VECTOR_MODE_P (modev3))
33444 op1 = safe_vector_operand (op1, modev3);
33445
33446 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33447 op0 = copy_to_mode_reg (modev2, op0);
33448 if ((optimize && !register_operand (op1, modev3))
33449 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33450 op1 = copy_to_mode_reg (modev3, op1);
33451
33452 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33453 {
33454 error ("the third argument must be an 8-bit immediate");
33455 return const0_rtx;
33456 }
33457
33458 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33459 {
33460 if (optimize || !target
33461 || GET_MODE (target) != tmode0
33462 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33463 target = gen_reg_rtx (tmode0);
33464
33465 scratch1 = gen_reg_rtx (tmode1);
33466
33467 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33468 }
33469 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33470 {
33471 if (optimize || !target
33472 || GET_MODE (target) != tmode1
33473 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33474 target = gen_reg_rtx (tmode1);
33475
33476 scratch0 = gen_reg_rtx (tmode0);
33477
33478 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33479 }
33480 else
33481 {
33482 gcc_assert (d->flag);
33483
33484 scratch0 = gen_reg_rtx (tmode0);
33485 scratch1 = gen_reg_rtx (tmode1);
33486
33487 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33488 }
33489
33490 if (! pat)
33491 return 0;
33492
33493 emit_insn (pat);
33494
33495 if (d->flag)
33496 {
33497 target = gen_reg_rtx (SImode);
33498 emit_move_insn (target, const0_rtx);
33499 target = gen_rtx_SUBREG (QImode, target, 0);
33500
33501 emit_insn
33502 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33503 gen_rtx_fmt_ee (EQ, QImode,
33504 gen_rtx_REG ((enum machine_mode) d->flag,
33505 FLAGS_REG),
33506 const0_rtx)));
33507 return SUBREG_REG (target);
33508 }
33509 else
33510 return target;
33511 }
33512
33513 /* Subroutine of ix86_expand_builtin to take care of insns with
33514 variable number of operands. */
33515
33516 static rtx
33517 ix86_expand_args_builtin (const struct builtin_description *d,
33518 tree exp, rtx target)
33519 {
33520 rtx pat, real_target;
33521 unsigned int i, nargs;
33522 unsigned int nargs_constant = 0;
33523 unsigned int mask_pos = 0;
33524 int num_memory = 0;
33525 struct
33526 {
33527 rtx op;
33528 enum machine_mode mode;
33529 } args[6];
33530 bool last_arg_count = false;
33531 enum insn_code icode = d->icode;
33532 const struct insn_data_d *insn_p = &insn_data[icode];
33533 enum machine_mode tmode = insn_p->operand[0].mode;
33534 enum machine_mode rmode = VOIDmode;
33535 bool swap = false;
33536 enum rtx_code comparison = d->comparison;
33537
33538 switch ((enum ix86_builtin_func_type) d->flag)
33539 {
33540 case V2DF_FTYPE_V2DF_ROUND:
33541 case V4DF_FTYPE_V4DF_ROUND:
33542 case V4SF_FTYPE_V4SF_ROUND:
33543 case V8SF_FTYPE_V8SF_ROUND:
33544 case V4SI_FTYPE_V4SF_ROUND:
33545 case V8SI_FTYPE_V8SF_ROUND:
33546 return ix86_expand_sse_round (d, exp, target);
33547 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33548 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33549 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33550 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33551 case INT_FTYPE_V8SF_V8SF_PTEST:
33552 case INT_FTYPE_V4DI_V4DI_PTEST:
33553 case INT_FTYPE_V4DF_V4DF_PTEST:
33554 case INT_FTYPE_V4SF_V4SF_PTEST:
33555 case INT_FTYPE_V2DI_V2DI_PTEST:
33556 case INT_FTYPE_V2DF_V2DF_PTEST:
33557 return ix86_expand_sse_ptest (d, exp, target);
33558 case FLOAT128_FTYPE_FLOAT128:
33559 case FLOAT_FTYPE_FLOAT:
33560 case INT_FTYPE_INT:
33561 case UINT64_FTYPE_INT:
33562 case UINT16_FTYPE_UINT16:
33563 case INT64_FTYPE_INT64:
33564 case INT64_FTYPE_V4SF:
33565 case INT64_FTYPE_V2DF:
33566 case INT_FTYPE_V16QI:
33567 case INT_FTYPE_V8QI:
33568 case INT_FTYPE_V8SF:
33569 case INT_FTYPE_V4DF:
33570 case INT_FTYPE_V4SF:
33571 case INT_FTYPE_V2DF:
33572 case INT_FTYPE_V32QI:
33573 case V16QI_FTYPE_V16QI:
33574 case V8SI_FTYPE_V8SF:
33575 case V8SI_FTYPE_V4SI:
33576 case V8HI_FTYPE_V8HI:
33577 case V8HI_FTYPE_V16QI:
33578 case V8QI_FTYPE_V8QI:
33579 case V8SF_FTYPE_V8SF:
33580 case V8SF_FTYPE_V8SI:
33581 case V8SF_FTYPE_V4SF:
33582 case V8SF_FTYPE_V8HI:
33583 case V4SI_FTYPE_V4SI:
33584 case V4SI_FTYPE_V16QI:
33585 case V4SI_FTYPE_V4SF:
33586 case V4SI_FTYPE_V8SI:
33587 case V4SI_FTYPE_V8HI:
33588 case V4SI_FTYPE_V4DF:
33589 case V4SI_FTYPE_V2DF:
33590 case V4HI_FTYPE_V4HI:
33591 case V4DF_FTYPE_V4DF:
33592 case V4DF_FTYPE_V4SI:
33593 case V4DF_FTYPE_V4SF:
33594 case V4DF_FTYPE_V2DF:
33595 case V4SF_FTYPE_V4SF:
33596 case V4SF_FTYPE_V4SI:
33597 case V4SF_FTYPE_V8SF:
33598 case V4SF_FTYPE_V4DF:
33599 case V4SF_FTYPE_V8HI:
33600 case V4SF_FTYPE_V2DF:
33601 case V2DI_FTYPE_V2DI:
33602 case V2DI_FTYPE_V16QI:
33603 case V2DI_FTYPE_V8HI:
33604 case V2DI_FTYPE_V4SI:
33605 case V2DF_FTYPE_V2DF:
33606 case V2DF_FTYPE_V4SI:
33607 case V2DF_FTYPE_V4DF:
33608 case V2DF_FTYPE_V4SF:
33609 case V2DF_FTYPE_V2SI:
33610 case V2SI_FTYPE_V2SI:
33611 case V2SI_FTYPE_V4SF:
33612 case V2SI_FTYPE_V2SF:
33613 case V2SI_FTYPE_V2DF:
33614 case V2SF_FTYPE_V2SF:
33615 case V2SF_FTYPE_V2SI:
33616 case V32QI_FTYPE_V32QI:
33617 case V32QI_FTYPE_V16QI:
33618 case V16HI_FTYPE_V16HI:
33619 case V16HI_FTYPE_V8HI:
33620 case V8SI_FTYPE_V8SI:
33621 case V16HI_FTYPE_V16QI:
33622 case V8SI_FTYPE_V16QI:
33623 case V4DI_FTYPE_V16QI:
33624 case V8SI_FTYPE_V8HI:
33625 case V4DI_FTYPE_V8HI:
33626 case V4DI_FTYPE_V4SI:
33627 case V4DI_FTYPE_V2DI:
33628 case HI_FTYPE_HI:
33629 case UINT_FTYPE_V2DF:
33630 case UINT_FTYPE_V4SF:
33631 case UINT64_FTYPE_V2DF:
33632 case UINT64_FTYPE_V4SF:
33633 case V16QI_FTYPE_V8DI:
33634 case V16HI_FTYPE_V16SI:
33635 case V16SI_FTYPE_HI:
33636 case V16SI_FTYPE_V16SI:
33637 case V16SI_FTYPE_INT:
33638 case V16SF_FTYPE_FLOAT:
33639 case V16SF_FTYPE_V4SF:
33640 case V16SF_FTYPE_V16SF:
33641 case V8HI_FTYPE_V8DI:
33642 case V8UHI_FTYPE_V8UHI:
33643 case V8SI_FTYPE_V8DI:
33644 case V8USI_FTYPE_V8USI:
33645 case V8SF_FTYPE_V8DF:
33646 case V8DI_FTYPE_QI:
33647 case V8DI_FTYPE_INT64:
33648 case V8DI_FTYPE_V4DI:
33649 case V8DI_FTYPE_V8DI:
33650 case V8DF_FTYPE_DOUBLE:
33651 case V8DF_FTYPE_V4DF:
33652 case V8DF_FTYPE_V8DF:
33653 case V8DF_FTYPE_V8SI:
33654 nargs = 1;
33655 break;
33656 case V4SF_FTYPE_V4SF_VEC_MERGE:
33657 case V2DF_FTYPE_V2DF_VEC_MERGE:
33658 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33659 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33660 case V16QI_FTYPE_V16QI_V16QI:
33661 case V16QI_FTYPE_V8HI_V8HI:
33662 case V16SI_FTYPE_V16SI_V16SI:
33663 case V16SF_FTYPE_V16SF_V16SF:
33664 case V16SF_FTYPE_V16SF_V16SI:
33665 case V8QI_FTYPE_V8QI_V8QI:
33666 case V8QI_FTYPE_V4HI_V4HI:
33667 case V8HI_FTYPE_V8HI_V8HI:
33668 case V8HI_FTYPE_V16QI_V16QI:
33669 case V8HI_FTYPE_V4SI_V4SI:
33670 case V8SF_FTYPE_V8SF_V8SF:
33671 case V8SF_FTYPE_V8SF_V8SI:
33672 case V8DI_FTYPE_V8DI_V8DI:
33673 case V8DF_FTYPE_V8DF_V8DF:
33674 case V8DF_FTYPE_V8DF_V8DI:
33675 case V4SI_FTYPE_V4SI_V4SI:
33676 case V4SI_FTYPE_V8HI_V8HI:
33677 case V4SI_FTYPE_V4SF_V4SF:
33678 case V4SI_FTYPE_V2DF_V2DF:
33679 case V4HI_FTYPE_V4HI_V4HI:
33680 case V4HI_FTYPE_V8QI_V8QI:
33681 case V4HI_FTYPE_V2SI_V2SI:
33682 case V4DF_FTYPE_V4DF_V4DF:
33683 case V4DF_FTYPE_V4DF_V4DI:
33684 case V4SF_FTYPE_V4SF_V4SF:
33685 case V4SF_FTYPE_V4SF_V4SI:
33686 case V4SF_FTYPE_V4SF_V2SI:
33687 case V4SF_FTYPE_V4SF_V2DF:
33688 case V4SF_FTYPE_V4SF_UINT:
33689 case V4SF_FTYPE_V4SF_UINT64:
33690 case V4SF_FTYPE_V4SF_DI:
33691 case V4SF_FTYPE_V4SF_SI:
33692 case V2DI_FTYPE_V2DI_V2DI:
33693 case V2DI_FTYPE_V16QI_V16QI:
33694 case V2DI_FTYPE_V4SI_V4SI:
33695 case V2UDI_FTYPE_V4USI_V4USI:
33696 case V2DI_FTYPE_V2DI_V16QI:
33697 case V2DI_FTYPE_V2DF_V2DF:
33698 case V2SI_FTYPE_V2SI_V2SI:
33699 case V2SI_FTYPE_V4HI_V4HI:
33700 case V2SI_FTYPE_V2SF_V2SF:
33701 case V2DF_FTYPE_V2DF_V2DF:
33702 case V2DF_FTYPE_V2DF_V4SF:
33703 case V2DF_FTYPE_V2DF_V2DI:
33704 case V2DF_FTYPE_V2DF_DI:
33705 case V2DF_FTYPE_V2DF_SI:
33706 case V2DF_FTYPE_V2DF_UINT:
33707 case V2DF_FTYPE_V2DF_UINT64:
33708 case V2SF_FTYPE_V2SF_V2SF:
33709 case V1DI_FTYPE_V1DI_V1DI:
33710 case V1DI_FTYPE_V8QI_V8QI:
33711 case V1DI_FTYPE_V2SI_V2SI:
33712 case V32QI_FTYPE_V16HI_V16HI:
33713 case V16HI_FTYPE_V8SI_V8SI:
33714 case V32QI_FTYPE_V32QI_V32QI:
33715 case V16HI_FTYPE_V32QI_V32QI:
33716 case V16HI_FTYPE_V16HI_V16HI:
33717 case V8SI_FTYPE_V4DF_V4DF:
33718 case V8SI_FTYPE_V8SI_V8SI:
33719 case V8SI_FTYPE_V16HI_V16HI:
33720 case V4DI_FTYPE_V4DI_V4DI:
33721 case V4DI_FTYPE_V8SI_V8SI:
33722 case V4UDI_FTYPE_V8USI_V8USI:
33723 case QI_FTYPE_V8DI_V8DI:
33724 case HI_FTYPE_V16SI_V16SI:
33725 if (comparison == UNKNOWN)
33726 return ix86_expand_binop_builtin (icode, exp, target);
33727 nargs = 2;
33728 break;
33729 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33730 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33731 gcc_assert (comparison != UNKNOWN);
33732 nargs = 2;
33733 swap = true;
33734 break;
33735 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33736 case V16HI_FTYPE_V16HI_SI_COUNT:
33737 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33738 case V8SI_FTYPE_V8SI_SI_COUNT:
33739 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33740 case V4DI_FTYPE_V4DI_INT_COUNT:
33741 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33742 case V8HI_FTYPE_V8HI_SI_COUNT:
33743 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33744 case V4SI_FTYPE_V4SI_SI_COUNT:
33745 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33746 case V4HI_FTYPE_V4HI_SI_COUNT:
33747 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33748 case V2DI_FTYPE_V2DI_SI_COUNT:
33749 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33750 case V2SI_FTYPE_V2SI_SI_COUNT:
33751 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33752 case V1DI_FTYPE_V1DI_SI_COUNT:
33753 nargs = 2;
33754 last_arg_count = true;
33755 break;
33756 case UINT64_FTYPE_UINT64_UINT64:
33757 case UINT_FTYPE_UINT_UINT:
33758 case UINT_FTYPE_UINT_USHORT:
33759 case UINT_FTYPE_UINT_UCHAR:
33760 case UINT16_FTYPE_UINT16_INT:
33761 case UINT8_FTYPE_UINT8_INT:
33762 case HI_FTYPE_HI_HI:
33763 case V16SI_FTYPE_V8DF_V8DF:
33764 nargs = 2;
33765 break;
33766 case V2DI_FTYPE_V2DI_INT_CONVERT:
33767 nargs = 2;
33768 rmode = V1TImode;
33769 nargs_constant = 1;
33770 break;
33771 case V4DI_FTYPE_V4DI_INT_CONVERT:
33772 nargs = 2;
33773 rmode = V2TImode;
33774 nargs_constant = 1;
33775 break;
33776 case V8HI_FTYPE_V8HI_INT:
33777 case V8HI_FTYPE_V8SF_INT:
33778 case V16HI_FTYPE_V16SF_INT:
33779 case V8HI_FTYPE_V4SF_INT:
33780 case V8SF_FTYPE_V8SF_INT:
33781 case V4SF_FTYPE_V16SF_INT:
33782 case V16SF_FTYPE_V16SF_INT:
33783 case V4SI_FTYPE_V4SI_INT:
33784 case V4SI_FTYPE_V8SI_INT:
33785 case V4HI_FTYPE_V4HI_INT:
33786 case V4DF_FTYPE_V4DF_INT:
33787 case V4DF_FTYPE_V8DF_INT:
33788 case V4SF_FTYPE_V4SF_INT:
33789 case V4SF_FTYPE_V8SF_INT:
33790 case V2DI_FTYPE_V2DI_INT:
33791 case V2DF_FTYPE_V2DF_INT:
33792 case V2DF_FTYPE_V4DF_INT:
33793 case V16HI_FTYPE_V16HI_INT:
33794 case V8SI_FTYPE_V8SI_INT:
33795 case V16SI_FTYPE_V16SI_INT:
33796 case V4SI_FTYPE_V16SI_INT:
33797 case V4DI_FTYPE_V4DI_INT:
33798 case V2DI_FTYPE_V4DI_INT:
33799 case V4DI_FTYPE_V8DI_INT:
33800 case HI_FTYPE_HI_INT:
33801 nargs = 2;
33802 nargs_constant = 1;
33803 break;
33804 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33805 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33806 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33807 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33808 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33809 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33810 case HI_FTYPE_V16SI_V16SI_HI:
33811 case QI_FTYPE_V8DI_V8DI_QI:
33812 case V16HI_FTYPE_V16SI_V16HI_HI:
33813 case V16QI_FTYPE_V16SI_V16QI_HI:
33814 case V16QI_FTYPE_V8DI_V16QI_QI:
33815 case V16SF_FTYPE_V16SF_V16SF_HI:
33816 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33817 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33818 case V16SF_FTYPE_V16SI_V16SF_HI:
33819 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33820 case V16SF_FTYPE_V4SF_V16SF_HI:
33821 case V16SI_FTYPE_SI_V16SI_HI:
33822 case V16SI_FTYPE_V16HI_V16SI_HI:
33823 case V16SI_FTYPE_V16QI_V16SI_HI:
33824 case V16SI_FTYPE_V16SF_V16SI_HI:
33825 case V16SI_FTYPE_V16SI_V16SI_HI:
33826 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33827 case V16SI_FTYPE_V4SI_V16SI_HI:
33828 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33829 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33830 case V8DF_FTYPE_V2DF_V8DF_QI:
33831 case V8DF_FTYPE_V4DF_V8DF_QI:
33832 case V8DF_FTYPE_V8DF_V8DF_QI:
33833 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33834 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33835 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33836 case V8DF_FTYPE_V8SF_V8DF_QI:
33837 case V8DF_FTYPE_V8SI_V8DF_QI:
33838 case V8DI_FTYPE_DI_V8DI_QI:
33839 case V8DI_FTYPE_V16QI_V8DI_QI:
33840 case V8DI_FTYPE_V2DI_V8DI_QI:
33841 case V8DI_FTYPE_V4DI_V8DI_QI:
33842 case V8DI_FTYPE_V8DI_V8DI_QI:
33843 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33844 case V8DI_FTYPE_V8HI_V8DI_QI:
33845 case V8DI_FTYPE_V8SI_V8DI_QI:
33846 case V8HI_FTYPE_V8DI_V8HI_QI:
33847 case V8SF_FTYPE_V8DF_V8SF_QI:
33848 case V8SI_FTYPE_V8DF_V8SI_QI:
33849 case V8SI_FTYPE_V8DI_V8SI_QI:
33850 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33851 nargs = 3;
33852 break;
33853 case V32QI_FTYPE_V32QI_V32QI_INT:
33854 case V16HI_FTYPE_V16HI_V16HI_INT:
33855 case V16QI_FTYPE_V16QI_V16QI_INT:
33856 case V4DI_FTYPE_V4DI_V4DI_INT:
33857 case V8HI_FTYPE_V8HI_V8HI_INT:
33858 case V8SI_FTYPE_V8SI_V8SI_INT:
33859 case V8SI_FTYPE_V8SI_V4SI_INT:
33860 case V8SF_FTYPE_V8SF_V8SF_INT:
33861 case V8SF_FTYPE_V8SF_V4SF_INT:
33862 case V4SI_FTYPE_V4SI_V4SI_INT:
33863 case V4DF_FTYPE_V4DF_V4DF_INT:
33864 case V16SF_FTYPE_V16SF_V16SF_INT:
33865 case V16SF_FTYPE_V16SF_V4SF_INT:
33866 case V16SI_FTYPE_V16SI_V4SI_INT:
33867 case V4DF_FTYPE_V4DF_V2DF_INT:
33868 case V4SF_FTYPE_V4SF_V4SF_INT:
33869 case V2DI_FTYPE_V2DI_V2DI_INT:
33870 case V4DI_FTYPE_V4DI_V2DI_INT:
33871 case V2DF_FTYPE_V2DF_V2DF_INT:
33872 case QI_FTYPE_V8DI_V8DI_INT:
33873 case QI_FTYPE_V8DF_V8DF_INT:
33874 case QI_FTYPE_V2DF_V2DF_INT:
33875 case QI_FTYPE_V4SF_V4SF_INT:
33876 case HI_FTYPE_V16SI_V16SI_INT:
33877 case HI_FTYPE_V16SF_V16SF_INT:
33878 nargs = 3;
33879 nargs_constant = 1;
33880 break;
33881 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33882 nargs = 3;
33883 rmode = V4DImode;
33884 nargs_constant = 1;
33885 break;
33886 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33887 nargs = 3;
33888 rmode = V2DImode;
33889 nargs_constant = 1;
33890 break;
33891 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33892 nargs = 3;
33893 rmode = DImode;
33894 nargs_constant = 1;
33895 break;
33896 case V2DI_FTYPE_V2DI_UINT_UINT:
33897 nargs = 3;
33898 nargs_constant = 2;
33899 break;
33900 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33901 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33902 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33903 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33904 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33905 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33906 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33907 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33908 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33909 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33910 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33911 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33912 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33913 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33914 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33915 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33916 nargs = 4;
33917 break;
33918 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33919 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33920 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33921 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33922 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33923 nargs = 4;
33924 nargs_constant = 1;
33925 break;
33926 case QI_FTYPE_V2DF_V2DF_INT_QI:
33927 case QI_FTYPE_V4SF_V4SF_INT_QI:
33928 nargs = 4;
33929 mask_pos = 1;
33930 nargs_constant = 1;
33931 break;
33932 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33933 nargs = 4;
33934 nargs_constant = 2;
33935 break;
33936 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33937 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33938 nargs = 4;
33939 break;
33940 case QI_FTYPE_V8DI_V8DI_INT_QI:
33941 case HI_FTYPE_V16SI_V16SI_INT_HI:
33942 case QI_FTYPE_V8DF_V8DF_INT_QI:
33943 case HI_FTYPE_V16SF_V16SF_INT_HI:
33944 mask_pos = 1;
33945 nargs = 4;
33946 nargs_constant = 1;
33947 break;
33948 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33949 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33950 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33951 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33952 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33953 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33954 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33955 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33956 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33957 nargs = 4;
33958 mask_pos = 2;
33959 nargs_constant = 1;
33960 break;
33961 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33962 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33963 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33964 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33965 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33966 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33967 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33968 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33969 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33970 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33971 nargs = 5;
33972 mask_pos = 2;
33973 nargs_constant = 1;
33974 break;
33975 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33976 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33977 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33978 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33979 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33980 nargs = 5;
33981 mask_pos = 1;
33982 nargs_constant = 1;
33983 break;
33984
33985 default:
33986 gcc_unreachable ();
33987 }
33988
33989 gcc_assert (nargs <= ARRAY_SIZE (args));
33990
33991 if (comparison != UNKNOWN)
33992 {
33993 gcc_assert (nargs == 2);
33994 return ix86_expand_sse_compare (d, exp, target, swap);
33995 }
33996
33997 if (rmode == VOIDmode || rmode == tmode)
33998 {
33999 if (optimize
34000 || target == 0
34001 || GET_MODE (target) != tmode
34002 || !insn_p->operand[0].predicate (target, tmode))
34003 target = gen_reg_rtx (tmode);
34004 real_target = target;
34005 }
34006 else
34007 {
34008 real_target = gen_reg_rtx (tmode);
34009 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34010 }
34011
34012 for (i = 0; i < nargs; i++)
34013 {
34014 tree arg = CALL_EXPR_ARG (exp, i);
34015 rtx op = expand_normal (arg);
34016 enum machine_mode mode = insn_p->operand[i + 1].mode;
34017 bool match = insn_p->operand[i + 1].predicate (op, mode);
34018
34019 if (last_arg_count && (i + 1) == nargs)
34020 {
34021 /* SIMD shift insns take either an 8-bit immediate or
34022 register as count. But builtin functions take int as
34023 count. If count doesn't match, we put it in register. */
34024 if (!match)
34025 {
34026 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34027 if (!insn_p->operand[i + 1].predicate (op, mode))
34028 op = copy_to_reg (op);
34029 }
34030 }
34031 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34032 (!mask_pos && (nargs - i) <= nargs_constant))
34033 {
34034 if (!match)
34035 switch (icode)
34036 {
34037 case CODE_FOR_avx2_inserti128:
34038 case CODE_FOR_avx2_extracti128:
34039 error ("the last argument must be an 1-bit immediate");
34040 return const0_rtx;
34041
34042 case CODE_FOR_avx512f_cmpv8di3_mask:
34043 case CODE_FOR_avx512f_cmpv16si3_mask:
34044 case CODE_FOR_avx512f_ucmpv8di3_mask:
34045 case CODE_FOR_avx512f_ucmpv16si3_mask:
34046 error ("the last argument must be a 3-bit immediate");
34047 return const0_rtx;
34048
34049 case CODE_FOR_sse4_1_roundsd:
34050 case CODE_FOR_sse4_1_roundss:
34051
34052 case CODE_FOR_sse4_1_roundpd:
34053 case CODE_FOR_sse4_1_roundps:
34054 case CODE_FOR_avx_roundpd256:
34055 case CODE_FOR_avx_roundps256:
34056
34057 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34058 case CODE_FOR_sse4_1_roundps_sfix:
34059 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34060 case CODE_FOR_avx_roundps_sfix256:
34061
34062 case CODE_FOR_sse4_1_blendps:
34063 case CODE_FOR_avx_blendpd256:
34064 case CODE_FOR_avx_vpermilv4df:
34065 case CODE_FOR_avx512f_getmantv8df_mask:
34066 case CODE_FOR_avx512f_getmantv16sf_mask:
34067 error ("the last argument must be a 4-bit immediate");
34068 return const0_rtx;
34069
34070 case CODE_FOR_sha1rnds4:
34071 case CODE_FOR_sse4_1_blendpd:
34072 case CODE_FOR_avx_vpermilv2df:
34073 case CODE_FOR_xop_vpermil2v2df3:
34074 case CODE_FOR_xop_vpermil2v4sf3:
34075 case CODE_FOR_xop_vpermil2v4df3:
34076 case CODE_FOR_xop_vpermil2v8sf3:
34077 case CODE_FOR_avx512f_vinsertf32x4_mask:
34078 case CODE_FOR_avx512f_vinserti32x4_mask:
34079 case CODE_FOR_avx512f_vextractf32x4_mask:
34080 case CODE_FOR_avx512f_vextracti32x4_mask:
34081 error ("the last argument must be a 2-bit immediate");
34082 return const0_rtx;
34083
34084 case CODE_FOR_avx_vextractf128v4df:
34085 case CODE_FOR_avx_vextractf128v8sf:
34086 case CODE_FOR_avx_vextractf128v8si:
34087 case CODE_FOR_avx_vinsertf128v4df:
34088 case CODE_FOR_avx_vinsertf128v8sf:
34089 case CODE_FOR_avx_vinsertf128v8si:
34090 case CODE_FOR_avx512f_vinsertf64x4_mask:
34091 case CODE_FOR_avx512f_vinserti64x4_mask:
34092 case CODE_FOR_avx512f_vextractf64x4_mask:
34093 case CODE_FOR_avx512f_vextracti64x4_mask:
34094 error ("the last argument must be a 1-bit immediate");
34095 return const0_rtx;
34096
34097 case CODE_FOR_avx_vmcmpv2df3:
34098 case CODE_FOR_avx_vmcmpv4sf3:
34099 case CODE_FOR_avx_cmpv2df3:
34100 case CODE_FOR_avx_cmpv4sf3:
34101 case CODE_FOR_avx_cmpv4df3:
34102 case CODE_FOR_avx_cmpv8sf3:
34103 case CODE_FOR_avx512f_cmpv8df3_mask:
34104 case CODE_FOR_avx512f_cmpv16sf3_mask:
34105 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34106 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34107 error ("the last argument must be a 5-bit immediate");
34108 return const0_rtx;
34109
34110 default:
34111 switch (nargs_constant)
34112 {
34113 case 2:
34114 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34115 (!mask_pos && (nargs - i) == nargs_constant))
34116 {
34117 error ("the next to last argument must be an 8-bit immediate");
34118 break;
34119 }
34120 case 1:
34121 error ("the last argument must be an 8-bit immediate");
34122 break;
34123 default:
34124 gcc_unreachable ();
34125 }
34126 return const0_rtx;
34127 }
34128 }
34129 else
34130 {
34131 if (VECTOR_MODE_P (mode))
34132 op = safe_vector_operand (op, mode);
34133
34134 /* If we aren't optimizing, only allow one memory operand to
34135 be generated. */
34136 if (memory_operand (op, mode))
34137 num_memory++;
34138
34139 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34140 {
34141 if (optimize || !match || num_memory > 1)
34142 op = copy_to_mode_reg (mode, op);
34143 }
34144 else
34145 {
34146 op = copy_to_reg (op);
34147 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34148 }
34149 }
34150
34151 args[i].op = op;
34152 args[i].mode = mode;
34153 }
34154
34155 switch (nargs)
34156 {
34157 case 1:
34158 pat = GEN_FCN (icode) (real_target, args[0].op);
34159 break;
34160 case 2:
34161 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34162 break;
34163 case 3:
34164 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34165 args[2].op);
34166 break;
34167 case 4:
34168 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34169 args[2].op, args[3].op);
34170 break;
34171 case 5:
34172 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34173 args[2].op, args[3].op, args[4].op);
34174 case 6:
34175 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34176 args[2].op, args[3].op, args[4].op,
34177 args[5].op);
34178 break;
34179 default:
34180 gcc_unreachable ();
34181 }
34182
34183 if (! pat)
34184 return 0;
34185
34186 emit_insn (pat);
34187 return target;
34188 }
34189
34190 /* Transform pattern of following layout:
34191 (parallel [
34192 set (A B)
34193 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34194 ])
34195 into:
34196 (set (A B))
34197
34198 Or:
34199 (parallel [ A B
34200 ...
34201 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34202 ...
34203 ])
34204 into:
34205 (parallel [ A B ... ]) */
34206
34207 static rtx
34208 ix86_erase_embedded_rounding (rtx pat)
34209 {
34210 if (GET_CODE (pat) == INSN)
34211 pat = PATTERN (pat);
34212
34213 gcc_assert (GET_CODE (pat) == PARALLEL);
34214
34215 if (XVECLEN (pat, 0) == 2)
34216 {
34217 rtx p0 = XVECEXP (pat, 0, 0);
34218 rtx p1 = XVECEXP (pat, 0, 1);
34219
34220 gcc_assert (GET_CODE (p0) == SET
34221 && GET_CODE (p1) == UNSPEC
34222 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34223
34224 return p0;
34225 }
34226 else
34227 {
34228 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34229 int i = 0;
34230 int j = 0;
34231
34232 for (; i < XVECLEN (pat, 0); ++i)
34233 {
34234 rtx elem = XVECEXP (pat, 0, i);
34235 if (GET_CODE (elem) != UNSPEC
34236 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34237 res [j++] = elem;
34238 }
34239
34240 /* No more than 1 occurence was removed. */
34241 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34242
34243 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34244 }
34245 }
34246
34247 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34248 with rounding. */
34249 static rtx
34250 ix86_expand_sse_comi_round (const struct builtin_description *d,
34251 tree exp, rtx target)
34252 {
34253 rtx pat, set_dst;
34254 tree arg0 = CALL_EXPR_ARG (exp, 0);
34255 tree arg1 = CALL_EXPR_ARG (exp, 1);
34256 tree arg2 = CALL_EXPR_ARG (exp, 2);
34257 tree arg3 = CALL_EXPR_ARG (exp, 3);
34258 rtx op0 = expand_normal (arg0);
34259 rtx op1 = expand_normal (arg1);
34260 rtx op2 = expand_normal (arg2);
34261 rtx op3 = expand_normal (arg3);
34262 enum insn_code icode = d->icode;
34263 const struct insn_data_d *insn_p = &insn_data[icode];
34264 enum machine_mode mode0 = insn_p->operand[0].mode;
34265 enum machine_mode mode1 = insn_p->operand[1].mode;
34266 enum rtx_code comparison = UNEQ;
34267 bool need_ucomi = false;
34268
34269 /* See avxintrin.h for values. */
34270 enum rtx_code comi_comparisons[32] =
34271 {
34272 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34273 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34274 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34275 };
34276 bool need_ucomi_values[32] =
34277 {
34278 true, false, false, true, true, false, false, true,
34279 true, false, false, true, true, false, false, true,
34280 false, true, true, false, false, true, true, false,
34281 false, true, true, false, false, true, true, false
34282 };
34283
34284 if (!CONST_INT_P (op2))
34285 {
34286 error ("the third argument must be comparison constant");
34287 return const0_rtx;
34288 }
34289 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34290 {
34291 error ("incorect comparison mode");
34292 return const0_rtx;
34293 }
34294
34295 if (!insn_p->operand[2].predicate (op3, SImode))
34296 {
34297 error ("incorrect rounding operand");
34298 return const0_rtx;
34299 }
34300
34301 comparison = comi_comparisons[INTVAL (op2)];
34302 need_ucomi = need_ucomi_values[INTVAL (op2)];
34303
34304 if (VECTOR_MODE_P (mode0))
34305 op0 = safe_vector_operand (op0, mode0);
34306 if (VECTOR_MODE_P (mode1))
34307 op1 = safe_vector_operand (op1, mode1);
34308
34309 target = gen_reg_rtx (SImode);
34310 emit_move_insn (target, const0_rtx);
34311 target = gen_rtx_SUBREG (QImode, target, 0);
34312
34313 if ((optimize && !register_operand (op0, mode0))
34314 || !insn_p->operand[0].predicate (op0, mode0))
34315 op0 = copy_to_mode_reg (mode0, op0);
34316 if ((optimize && !register_operand (op1, mode1))
34317 || !insn_p->operand[1].predicate (op1, mode1))
34318 op1 = copy_to_mode_reg (mode1, op1);
34319
34320 if (need_ucomi)
34321 icode = icode == CODE_FOR_sse_comi_round
34322 ? CODE_FOR_sse_ucomi_round
34323 : CODE_FOR_sse2_ucomi_round;
34324
34325 pat = GEN_FCN (icode) (op0, op1, op3);
34326 if (! pat)
34327 return 0;
34328
34329 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34330 if (INTVAL (op3) == NO_ROUND)
34331 {
34332 pat = ix86_erase_embedded_rounding (pat);
34333 if (! pat)
34334 return 0;
34335
34336 set_dst = SET_DEST (pat);
34337 }
34338 else
34339 {
34340 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34341 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34342 }
34343
34344 emit_insn (pat);
34345 emit_insn (gen_rtx_SET (VOIDmode,
34346 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34347 gen_rtx_fmt_ee (comparison, QImode,
34348 set_dst,
34349 const0_rtx)));
34350
34351 return SUBREG_REG (target);
34352 }
34353
34354 static rtx
34355 ix86_expand_round_builtin (const struct builtin_description *d,
34356 tree exp, rtx target)
34357 {
34358 rtx pat;
34359 unsigned int i, nargs;
34360 struct
34361 {
34362 rtx op;
34363 enum machine_mode mode;
34364 } args[6];
34365 enum insn_code icode = d->icode;
34366 const struct insn_data_d *insn_p = &insn_data[icode];
34367 enum machine_mode tmode = insn_p->operand[0].mode;
34368 unsigned int nargs_constant = 0;
34369 unsigned int redundant_embed_rnd = 0;
34370
34371 switch ((enum ix86_builtin_func_type) d->flag)
34372 {
34373 case UINT64_FTYPE_V2DF_INT:
34374 case UINT64_FTYPE_V4SF_INT:
34375 case UINT_FTYPE_V2DF_INT:
34376 case UINT_FTYPE_V4SF_INT:
34377 case INT64_FTYPE_V2DF_INT:
34378 case INT64_FTYPE_V4SF_INT:
34379 case INT_FTYPE_V2DF_INT:
34380 case INT_FTYPE_V4SF_INT:
34381 nargs = 2;
34382 break;
34383 case V4SF_FTYPE_V4SF_UINT_INT:
34384 case V4SF_FTYPE_V4SF_UINT64_INT:
34385 case V2DF_FTYPE_V2DF_UINT64_INT:
34386 case V4SF_FTYPE_V4SF_INT_INT:
34387 case V4SF_FTYPE_V4SF_INT64_INT:
34388 case V2DF_FTYPE_V2DF_INT64_INT:
34389 case V4SF_FTYPE_V4SF_V4SF_INT:
34390 case V2DF_FTYPE_V2DF_V2DF_INT:
34391 case V4SF_FTYPE_V4SF_V2DF_INT:
34392 case V2DF_FTYPE_V2DF_V4SF_INT:
34393 nargs = 3;
34394 break;
34395 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34396 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34397 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34398 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34399 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34400 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34401 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34402 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34403 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34404 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34405 nargs = 4;
34406 break;
34407 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34408 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34409 nargs_constant = 2;
34410 nargs = 4;
34411 break;
34412 case INT_FTYPE_V4SF_V4SF_INT_INT:
34413 case INT_FTYPE_V2DF_V2DF_INT_INT:
34414 return ix86_expand_sse_comi_round (d, exp, target);
34415 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34416 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34417 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34418 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34419 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34420 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34421 nargs = 5;
34422 break;
34423 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34424 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34425 nargs_constant = 4;
34426 nargs = 5;
34427 break;
34428 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34429 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34430 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34431 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34432 nargs_constant = 3;
34433 nargs = 5;
34434 break;
34435 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34436 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34437 nargs = 6;
34438 nargs_constant = 4;
34439 break;
34440 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34441 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34442 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34443 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34444 nargs = 6;
34445 nargs_constant = 3;
34446 break;
34447 default:
34448 gcc_unreachable ();
34449 }
34450 gcc_assert (nargs <= ARRAY_SIZE (args));
34451
34452 if (optimize
34453 || target == 0
34454 || GET_MODE (target) != tmode
34455 || !insn_p->operand[0].predicate (target, tmode))
34456 target = gen_reg_rtx (tmode);
34457
34458 for (i = 0; i < nargs; i++)
34459 {
34460 tree arg = CALL_EXPR_ARG (exp, i);
34461 rtx op = expand_normal (arg);
34462 enum machine_mode mode = insn_p->operand[i + 1].mode;
34463 bool match = insn_p->operand[i + 1].predicate (op, mode);
34464
34465 if (i == nargs - nargs_constant)
34466 {
34467 if (!match)
34468 {
34469 switch (icode)
34470 {
34471 case CODE_FOR_avx512f_getmantv8df_mask_round:
34472 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34473 case CODE_FOR_avx512f_getmantv2df_round:
34474 case CODE_FOR_avx512f_getmantv4sf_round:
34475 error ("the immediate argument must be a 4-bit immediate");
34476 return const0_rtx;
34477 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34478 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34479 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34480 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34481 error ("the immediate argument must be a 5-bit immediate");
34482 return const0_rtx;
34483 default:
34484 error ("the immediate argument must be an 8-bit immediate");
34485 return const0_rtx;
34486 }
34487 }
34488 }
34489 else if (i == nargs-1)
34490 {
34491 if (!insn_p->operand[nargs].predicate (op, SImode))
34492 {
34493 error ("incorrect rounding operand");
34494 return const0_rtx;
34495 }
34496
34497 /* If there is no rounding use normal version of the pattern. */
34498 if (INTVAL (op) == NO_ROUND)
34499 redundant_embed_rnd = 1;
34500 }
34501 else
34502 {
34503 if (VECTOR_MODE_P (mode))
34504 op = safe_vector_operand (op, mode);
34505
34506 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34507 {
34508 if (optimize || !match)
34509 op = copy_to_mode_reg (mode, op);
34510 }
34511 else
34512 {
34513 op = copy_to_reg (op);
34514 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34515 }
34516 }
34517
34518 args[i].op = op;
34519 args[i].mode = mode;
34520 }
34521
34522 switch (nargs)
34523 {
34524 case 1:
34525 pat = GEN_FCN (icode) (target, args[0].op);
34526 break;
34527 case 2:
34528 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34529 break;
34530 case 3:
34531 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34532 args[2].op);
34533 break;
34534 case 4:
34535 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34536 args[2].op, args[3].op);
34537 break;
34538 case 5:
34539 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34540 args[2].op, args[3].op, args[4].op);
34541 case 6:
34542 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34543 args[2].op, args[3].op, args[4].op,
34544 args[5].op);
34545 break;
34546 default:
34547 gcc_unreachable ();
34548 }
34549
34550 if (!pat)
34551 return 0;
34552
34553 if (redundant_embed_rnd)
34554 pat = ix86_erase_embedded_rounding (pat);
34555
34556 emit_insn (pat);
34557 return target;
34558 }
34559
34560 /* Subroutine of ix86_expand_builtin to take care of special insns
34561 with variable number of operands. */
34562
34563 static rtx
34564 ix86_expand_special_args_builtin (const struct builtin_description *d,
34565 tree exp, rtx target)
34566 {
34567 tree arg;
34568 rtx pat, op;
34569 unsigned int i, nargs, arg_adjust, memory;
34570 bool aligned_mem = false;
34571 struct
34572 {
34573 rtx op;
34574 enum machine_mode mode;
34575 } args[3];
34576 enum insn_code icode = d->icode;
34577 bool last_arg_constant = false;
34578 const struct insn_data_d *insn_p = &insn_data[icode];
34579 enum machine_mode tmode = insn_p->operand[0].mode;
34580 enum { load, store } klass;
34581
34582 switch ((enum ix86_builtin_func_type) d->flag)
34583 {
34584 case VOID_FTYPE_VOID:
34585 emit_insn (GEN_FCN (icode) (target));
34586 return 0;
34587 case VOID_FTYPE_UINT64:
34588 case VOID_FTYPE_UNSIGNED:
34589 nargs = 0;
34590 klass = store;
34591 memory = 0;
34592 break;
34593
34594 case INT_FTYPE_VOID:
34595 case UINT64_FTYPE_VOID:
34596 case UNSIGNED_FTYPE_VOID:
34597 nargs = 0;
34598 klass = load;
34599 memory = 0;
34600 break;
34601 case UINT64_FTYPE_PUNSIGNED:
34602 case V2DI_FTYPE_PV2DI:
34603 case V4DI_FTYPE_PV4DI:
34604 case V32QI_FTYPE_PCCHAR:
34605 case V16QI_FTYPE_PCCHAR:
34606 case V8SF_FTYPE_PCV4SF:
34607 case V8SF_FTYPE_PCFLOAT:
34608 case V4SF_FTYPE_PCFLOAT:
34609 case V4DF_FTYPE_PCV2DF:
34610 case V4DF_FTYPE_PCDOUBLE:
34611 case V2DF_FTYPE_PCDOUBLE:
34612 case VOID_FTYPE_PVOID:
34613 case V16SI_FTYPE_PV4SI:
34614 case V16SF_FTYPE_PV4SF:
34615 case V8DI_FTYPE_PV4DI:
34616 case V8DI_FTYPE_PV8DI:
34617 case V8DF_FTYPE_PV4DF:
34618 nargs = 1;
34619 klass = load;
34620 memory = 0;
34621 switch (icode)
34622 {
34623 case CODE_FOR_sse4_1_movntdqa:
34624 case CODE_FOR_avx2_movntdqa:
34625 case CODE_FOR_avx512f_movntdqa:
34626 aligned_mem = true;
34627 break;
34628 default:
34629 break;
34630 }
34631 break;
34632 case VOID_FTYPE_PV2SF_V4SF:
34633 case VOID_FTYPE_PV8DI_V8DI:
34634 case VOID_FTYPE_PV4DI_V4DI:
34635 case VOID_FTYPE_PV2DI_V2DI:
34636 case VOID_FTYPE_PCHAR_V32QI:
34637 case VOID_FTYPE_PCHAR_V16QI:
34638 case VOID_FTYPE_PFLOAT_V16SF:
34639 case VOID_FTYPE_PFLOAT_V8SF:
34640 case VOID_FTYPE_PFLOAT_V4SF:
34641 case VOID_FTYPE_PDOUBLE_V8DF:
34642 case VOID_FTYPE_PDOUBLE_V4DF:
34643 case VOID_FTYPE_PDOUBLE_V2DF:
34644 case VOID_FTYPE_PLONGLONG_LONGLONG:
34645 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34646 case VOID_FTYPE_PINT_INT:
34647 nargs = 1;
34648 klass = store;
34649 /* Reserve memory operand for target. */
34650 memory = ARRAY_SIZE (args);
34651 switch (icode)
34652 {
34653 /* These builtins and instructions require the memory
34654 to be properly aligned. */
34655 case CODE_FOR_avx_movntv4di:
34656 case CODE_FOR_sse2_movntv2di:
34657 case CODE_FOR_avx_movntv8sf:
34658 case CODE_FOR_sse_movntv4sf:
34659 case CODE_FOR_sse4a_vmmovntv4sf:
34660 case CODE_FOR_avx_movntv4df:
34661 case CODE_FOR_sse2_movntv2df:
34662 case CODE_FOR_sse4a_vmmovntv2df:
34663 case CODE_FOR_sse2_movntidi:
34664 case CODE_FOR_sse_movntq:
34665 case CODE_FOR_sse2_movntisi:
34666 case CODE_FOR_avx512f_movntv16sf:
34667 case CODE_FOR_avx512f_movntv8df:
34668 case CODE_FOR_avx512f_movntv8di:
34669 aligned_mem = true;
34670 break;
34671 default:
34672 break;
34673 }
34674 break;
34675 case V4SF_FTYPE_V4SF_PCV2SF:
34676 case V2DF_FTYPE_V2DF_PCDOUBLE:
34677 nargs = 2;
34678 klass = load;
34679 memory = 1;
34680 break;
34681 case V8SF_FTYPE_PCV8SF_V8SI:
34682 case V4DF_FTYPE_PCV4DF_V4DI:
34683 case V4SF_FTYPE_PCV4SF_V4SI:
34684 case V2DF_FTYPE_PCV2DF_V2DI:
34685 case V8SI_FTYPE_PCV8SI_V8SI:
34686 case V4DI_FTYPE_PCV4DI_V4DI:
34687 case V4SI_FTYPE_PCV4SI_V4SI:
34688 case V2DI_FTYPE_PCV2DI_V2DI:
34689 nargs = 2;
34690 klass = load;
34691 memory = 0;
34692 break;
34693 case VOID_FTYPE_PV8DF_V8DF_QI:
34694 case VOID_FTYPE_PV16SF_V16SF_HI:
34695 case VOID_FTYPE_PV8DI_V8DI_QI:
34696 case VOID_FTYPE_PV16SI_V16SI_HI:
34697 switch (icode)
34698 {
34699 /* These builtins and instructions require the memory
34700 to be properly aligned. */
34701 case CODE_FOR_avx512f_storev16sf_mask:
34702 case CODE_FOR_avx512f_storev16si_mask:
34703 case CODE_FOR_avx512f_storev8df_mask:
34704 case CODE_FOR_avx512f_storev8di_mask:
34705 aligned_mem = true;
34706 break;
34707 default:
34708 break;
34709 }
34710 /* FALLTHRU */
34711 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34712 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34713 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34714 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34715 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34716 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34717 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34718 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34719 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34720 case VOID_FTYPE_PFLOAT_V4SF_QI:
34721 case VOID_FTYPE_PV8SI_V8DI_QI:
34722 case VOID_FTYPE_PV8HI_V8DI_QI:
34723 case VOID_FTYPE_PV16HI_V16SI_HI:
34724 case VOID_FTYPE_PV16QI_V8DI_QI:
34725 case VOID_FTYPE_PV16QI_V16SI_HI:
34726 nargs = 2;
34727 klass = store;
34728 /* Reserve memory operand for target. */
34729 memory = ARRAY_SIZE (args);
34730 break;
34731 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34732 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34733 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34734 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34735 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34736 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34737 nargs = 3;
34738 klass = load;
34739 memory = 0;
34740 switch (icode)
34741 {
34742 /* These builtins and instructions require the memory
34743 to be properly aligned. */
34744 case CODE_FOR_avx512f_loadv16sf_mask:
34745 case CODE_FOR_avx512f_loadv16si_mask:
34746 case CODE_FOR_avx512f_loadv8df_mask:
34747 case CODE_FOR_avx512f_loadv8di_mask:
34748 aligned_mem = true;
34749 break;
34750 default:
34751 break;
34752 }
34753 break;
34754 case VOID_FTYPE_UINT_UINT_UINT:
34755 case VOID_FTYPE_UINT64_UINT_UINT:
34756 case UCHAR_FTYPE_UINT_UINT_UINT:
34757 case UCHAR_FTYPE_UINT64_UINT_UINT:
34758 nargs = 3;
34759 klass = load;
34760 memory = ARRAY_SIZE (args);
34761 last_arg_constant = true;
34762 break;
34763 default:
34764 gcc_unreachable ();
34765 }
34766
34767 gcc_assert (nargs <= ARRAY_SIZE (args));
34768
34769 if (klass == store)
34770 {
34771 arg = CALL_EXPR_ARG (exp, 0);
34772 op = expand_normal (arg);
34773 gcc_assert (target == 0);
34774 if (memory)
34775 {
34776 op = ix86_zero_extend_to_Pmode (op);
34777 target = gen_rtx_MEM (tmode, op);
34778 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34779 on it. Try to improve it using get_pointer_alignment,
34780 and if the special builtin is one that requires strict
34781 mode alignment, also from it's GET_MODE_ALIGNMENT.
34782 Failure to do so could lead to ix86_legitimate_combined_insn
34783 rejecting all changes to such insns. */
34784 unsigned int align = get_pointer_alignment (arg);
34785 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34786 align = GET_MODE_ALIGNMENT (tmode);
34787 if (MEM_ALIGN (target) < align)
34788 set_mem_align (target, align);
34789 }
34790 else
34791 target = force_reg (tmode, op);
34792 arg_adjust = 1;
34793 }
34794 else
34795 {
34796 arg_adjust = 0;
34797 if (optimize
34798 || target == 0
34799 || !register_operand (target, tmode)
34800 || GET_MODE (target) != tmode)
34801 target = gen_reg_rtx (tmode);
34802 }
34803
34804 for (i = 0; i < nargs; i++)
34805 {
34806 enum machine_mode mode = insn_p->operand[i + 1].mode;
34807 bool match;
34808
34809 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34810 op = expand_normal (arg);
34811 match = insn_p->operand[i + 1].predicate (op, mode);
34812
34813 if (last_arg_constant && (i + 1) == nargs)
34814 {
34815 if (!match)
34816 {
34817 if (icode == CODE_FOR_lwp_lwpvalsi3
34818 || icode == CODE_FOR_lwp_lwpinssi3
34819 || icode == CODE_FOR_lwp_lwpvaldi3
34820 || icode == CODE_FOR_lwp_lwpinsdi3)
34821 error ("the last argument must be a 32-bit immediate");
34822 else
34823 error ("the last argument must be an 8-bit immediate");
34824 return const0_rtx;
34825 }
34826 }
34827 else
34828 {
34829 if (i == memory)
34830 {
34831 /* This must be the memory operand. */
34832 op = ix86_zero_extend_to_Pmode (op);
34833 op = gen_rtx_MEM (mode, op);
34834 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34835 on it. Try to improve it using get_pointer_alignment,
34836 and if the special builtin is one that requires strict
34837 mode alignment, also from it's GET_MODE_ALIGNMENT.
34838 Failure to do so could lead to ix86_legitimate_combined_insn
34839 rejecting all changes to such insns. */
34840 unsigned int align = get_pointer_alignment (arg);
34841 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34842 align = GET_MODE_ALIGNMENT (mode);
34843 if (MEM_ALIGN (op) < align)
34844 set_mem_align (op, align);
34845 }
34846 else
34847 {
34848 /* This must be register. */
34849 if (VECTOR_MODE_P (mode))
34850 op = safe_vector_operand (op, mode);
34851
34852 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34853 op = copy_to_mode_reg (mode, op);
34854 else
34855 {
34856 op = copy_to_reg (op);
34857 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34858 }
34859 }
34860 }
34861
34862 args[i].op = op;
34863 args[i].mode = mode;
34864 }
34865
34866 switch (nargs)
34867 {
34868 case 0:
34869 pat = GEN_FCN (icode) (target);
34870 break;
34871 case 1:
34872 pat = GEN_FCN (icode) (target, args[0].op);
34873 break;
34874 case 2:
34875 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34876 break;
34877 case 3:
34878 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34879 break;
34880 default:
34881 gcc_unreachable ();
34882 }
34883
34884 if (! pat)
34885 return 0;
34886 emit_insn (pat);
34887 return klass == store ? 0 : target;
34888 }
34889
34890 /* Return the integer constant in ARG. Constrain it to be in the range
34891 of the subparts of VEC_TYPE; issue an error if not. */
34892
34893 static int
34894 get_element_number (tree vec_type, tree arg)
34895 {
34896 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34897
34898 if (!tree_fits_uhwi_p (arg)
34899 || (elt = tree_to_uhwi (arg), elt > max))
34900 {
34901 error ("selector must be an integer constant in the range 0..%wi", max);
34902 return 0;
34903 }
34904
34905 return elt;
34906 }
34907
34908 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34909 ix86_expand_vector_init. We DO have language-level syntax for this, in
34910 the form of (type){ init-list }. Except that since we can't place emms
34911 instructions from inside the compiler, we can't allow the use of MMX
34912 registers unless the user explicitly asks for it. So we do *not* define
34913 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34914 we have builtins invoked by mmintrin.h that gives us license to emit
34915 these sorts of instructions. */
34916
34917 static rtx
34918 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34919 {
34920 enum machine_mode tmode = TYPE_MODE (type);
34921 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34922 int i, n_elt = GET_MODE_NUNITS (tmode);
34923 rtvec v = rtvec_alloc (n_elt);
34924
34925 gcc_assert (VECTOR_MODE_P (tmode));
34926 gcc_assert (call_expr_nargs (exp) == n_elt);
34927
34928 for (i = 0; i < n_elt; ++i)
34929 {
34930 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34931 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34932 }
34933
34934 if (!target || !register_operand (target, tmode))
34935 target = gen_reg_rtx (tmode);
34936
34937 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34938 return target;
34939 }
34940
34941 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34942 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34943 had a language-level syntax for referencing vector elements. */
34944
34945 static rtx
34946 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34947 {
34948 enum machine_mode tmode, mode0;
34949 tree arg0, arg1;
34950 int elt;
34951 rtx op0;
34952
34953 arg0 = CALL_EXPR_ARG (exp, 0);
34954 arg1 = CALL_EXPR_ARG (exp, 1);
34955
34956 op0 = expand_normal (arg0);
34957 elt = get_element_number (TREE_TYPE (arg0), arg1);
34958
34959 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34960 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34961 gcc_assert (VECTOR_MODE_P (mode0));
34962
34963 op0 = force_reg (mode0, op0);
34964
34965 if (optimize || !target || !register_operand (target, tmode))
34966 target = gen_reg_rtx (tmode);
34967
34968 ix86_expand_vector_extract (true, target, op0, elt);
34969
34970 return target;
34971 }
34972
34973 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34974 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34975 a language-level syntax for referencing vector elements. */
34976
34977 static rtx
34978 ix86_expand_vec_set_builtin (tree exp)
34979 {
34980 enum machine_mode tmode, mode1;
34981 tree arg0, arg1, arg2;
34982 int elt;
34983 rtx op0, op1, target;
34984
34985 arg0 = CALL_EXPR_ARG (exp, 0);
34986 arg1 = CALL_EXPR_ARG (exp, 1);
34987 arg2 = CALL_EXPR_ARG (exp, 2);
34988
34989 tmode = TYPE_MODE (TREE_TYPE (arg0));
34990 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34991 gcc_assert (VECTOR_MODE_P (tmode));
34992
34993 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34994 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34995 elt = get_element_number (TREE_TYPE (arg0), arg2);
34996
34997 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34998 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34999
35000 op0 = force_reg (tmode, op0);
35001 op1 = force_reg (mode1, op1);
35002
35003 /* OP0 is the source of these builtin functions and shouldn't be
35004 modified. Create a copy, use it and return it as target. */
35005 target = gen_reg_rtx (tmode);
35006 emit_move_insn (target, op0);
35007 ix86_expand_vector_set (true, target, op1, elt);
35008
35009 return target;
35010 }
35011
35012 /* Expand an expression EXP that calls a built-in function,
35013 with result going to TARGET if that's convenient
35014 (and in mode MODE if that's convenient).
35015 SUBTARGET may be used as the target for computing one of EXP's operands.
35016 IGNORE is nonzero if the value is to be ignored. */
35017
35018 static rtx
35019 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35020 enum machine_mode mode, int ignore)
35021 {
35022 const struct builtin_description *d;
35023 size_t i;
35024 enum insn_code icode;
35025 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35026 tree arg0, arg1, arg2, arg3, arg4;
35027 rtx op0, op1, op2, op3, op4, pat, insn;
35028 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35029 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35030
35031 /* For CPU builtins that can be folded, fold first and expand the fold. */
35032 switch (fcode)
35033 {
35034 case IX86_BUILTIN_CPU_INIT:
35035 {
35036 /* Make it call __cpu_indicator_init in libgcc. */
35037 tree call_expr, fndecl, type;
35038 type = build_function_type_list (integer_type_node, NULL_TREE);
35039 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35040 call_expr = build_call_expr (fndecl, 0);
35041 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35042 }
35043 case IX86_BUILTIN_CPU_IS:
35044 case IX86_BUILTIN_CPU_SUPPORTS:
35045 {
35046 tree arg0 = CALL_EXPR_ARG (exp, 0);
35047 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35048 gcc_assert (fold_expr != NULL_TREE);
35049 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35050 }
35051 }
35052
35053 /* Determine whether the builtin function is available under the current ISA.
35054 Originally the builtin was not created if it wasn't applicable to the
35055 current ISA based on the command line switches. With function specific
35056 options, we need to check in the context of the function making the call
35057 whether it is supported. */
35058 if (ix86_builtins_isa[fcode].isa
35059 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35060 {
35061 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35062 NULL, (enum fpmath_unit) 0, false);
35063
35064 if (!opts)
35065 error ("%qE needs unknown isa option", fndecl);
35066 else
35067 {
35068 gcc_assert (opts != NULL);
35069 error ("%qE needs isa option %s", fndecl, opts);
35070 free (opts);
35071 }
35072 return const0_rtx;
35073 }
35074
35075 switch (fcode)
35076 {
35077 case IX86_BUILTIN_MASKMOVQ:
35078 case IX86_BUILTIN_MASKMOVDQU:
35079 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35080 ? CODE_FOR_mmx_maskmovq
35081 : CODE_FOR_sse2_maskmovdqu);
35082 /* Note the arg order is different from the operand order. */
35083 arg1 = CALL_EXPR_ARG (exp, 0);
35084 arg2 = CALL_EXPR_ARG (exp, 1);
35085 arg0 = CALL_EXPR_ARG (exp, 2);
35086 op0 = expand_normal (arg0);
35087 op1 = expand_normal (arg1);
35088 op2 = expand_normal (arg2);
35089 mode0 = insn_data[icode].operand[0].mode;
35090 mode1 = insn_data[icode].operand[1].mode;
35091 mode2 = insn_data[icode].operand[2].mode;
35092
35093 op0 = ix86_zero_extend_to_Pmode (op0);
35094 op0 = gen_rtx_MEM (mode1, op0);
35095
35096 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35097 op0 = copy_to_mode_reg (mode0, op0);
35098 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35099 op1 = copy_to_mode_reg (mode1, op1);
35100 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35101 op2 = copy_to_mode_reg (mode2, op2);
35102 pat = GEN_FCN (icode) (op0, op1, op2);
35103 if (! pat)
35104 return 0;
35105 emit_insn (pat);
35106 return 0;
35107
35108 case IX86_BUILTIN_LDMXCSR:
35109 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35110 target = assign_386_stack_local (SImode, SLOT_TEMP);
35111 emit_move_insn (target, op0);
35112 emit_insn (gen_sse_ldmxcsr (target));
35113 return 0;
35114
35115 case IX86_BUILTIN_STMXCSR:
35116 target = assign_386_stack_local (SImode, SLOT_TEMP);
35117 emit_insn (gen_sse_stmxcsr (target));
35118 return copy_to_mode_reg (SImode, target);
35119
35120 case IX86_BUILTIN_CLFLUSH:
35121 arg0 = CALL_EXPR_ARG (exp, 0);
35122 op0 = expand_normal (arg0);
35123 icode = CODE_FOR_sse2_clflush;
35124 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35125 op0 = ix86_zero_extend_to_Pmode (op0);
35126
35127 emit_insn (gen_sse2_clflush (op0));
35128 return 0;
35129
35130 case IX86_BUILTIN_CLFLUSHOPT:
35131 arg0 = CALL_EXPR_ARG (exp, 0);
35132 op0 = expand_normal (arg0);
35133 icode = CODE_FOR_clflushopt;
35134 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35135 op0 = ix86_zero_extend_to_Pmode (op0);
35136
35137 emit_insn (gen_clflushopt (op0));
35138 return 0;
35139
35140 case IX86_BUILTIN_MONITOR:
35141 arg0 = CALL_EXPR_ARG (exp, 0);
35142 arg1 = CALL_EXPR_ARG (exp, 1);
35143 arg2 = CALL_EXPR_ARG (exp, 2);
35144 op0 = expand_normal (arg0);
35145 op1 = expand_normal (arg1);
35146 op2 = expand_normal (arg2);
35147 if (!REG_P (op0))
35148 op0 = ix86_zero_extend_to_Pmode (op0);
35149 if (!REG_P (op1))
35150 op1 = copy_to_mode_reg (SImode, op1);
35151 if (!REG_P (op2))
35152 op2 = copy_to_mode_reg (SImode, op2);
35153 emit_insn (ix86_gen_monitor (op0, op1, op2));
35154 return 0;
35155
35156 case IX86_BUILTIN_MWAIT:
35157 arg0 = CALL_EXPR_ARG (exp, 0);
35158 arg1 = CALL_EXPR_ARG (exp, 1);
35159 op0 = expand_normal (arg0);
35160 op1 = expand_normal (arg1);
35161 if (!REG_P (op0))
35162 op0 = copy_to_mode_reg (SImode, op0);
35163 if (!REG_P (op1))
35164 op1 = copy_to_mode_reg (SImode, op1);
35165 emit_insn (gen_sse3_mwait (op0, op1));
35166 return 0;
35167
35168 case IX86_BUILTIN_VEC_INIT_V2SI:
35169 case IX86_BUILTIN_VEC_INIT_V4HI:
35170 case IX86_BUILTIN_VEC_INIT_V8QI:
35171 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35172
35173 case IX86_BUILTIN_VEC_EXT_V2DF:
35174 case IX86_BUILTIN_VEC_EXT_V2DI:
35175 case IX86_BUILTIN_VEC_EXT_V4SF:
35176 case IX86_BUILTIN_VEC_EXT_V4SI:
35177 case IX86_BUILTIN_VEC_EXT_V8HI:
35178 case IX86_BUILTIN_VEC_EXT_V2SI:
35179 case IX86_BUILTIN_VEC_EXT_V4HI:
35180 case IX86_BUILTIN_VEC_EXT_V16QI:
35181 return ix86_expand_vec_ext_builtin (exp, target);
35182
35183 case IX86_BUILTIN_VEC_SET_V2DI:
35184 case IX86_BUILTIN_VEC_SET_V4SF:
35185 case IX86_BUILTIN_VEC_SET_V4SI:
35186 case IX86_BUILTIN_VEC_SET_V8HI:
35187 case IX86_BUILTIN_VEC_SET_V4HI:
35188 case IX86_BUILTIN_VEC_SET_V16QI:
35189 return ix86_expand_vec_set_builtin (exp);
35190
35191 case IX86_BUILTIN_INFQ:
35192 case IX86_BUILTIN_HUGE_VALQ:
35193 {
35194 REAL_VALUE_TYPE inf;
35195 rtx tmp;
35196
35197 real_inf (&inf);
35198 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35199
35200 tmp = validize_mem (force_const_mem (mode, tmp));
35201
35202 if (target == 0)
35203 target = gen_reg_rtx (mode);
35204
35205 emit_move_insn (target, tmp);
35206 return target;
35207 }
35208
35209 case IX86_BUILTIN_RDPMC:
35210 case IX86_BUILTIN_RDTSC:
35211 case IX86_BUILTIN_RDTSCP:
35212
35213 op0 = gen_reg_rtx (DImode);
35214 op1 = gen_reg_rtx (DImode);
35215
35216 if (fcode == IX86_BUILTIN_RDPMC)
35217 {
35218 arg0 = CALL_EXPR_ARG (exp, 0);
35219 op2 = expand_normal (arg0);
35220 if (!register_operand (op2, SImode))
35221 op2 = copy_to_mode_reg (SImode, op2);
35222
35223 insn = (TARGET_64BIT
35224 ? gen_rdpmc_rex64 (op0, op1, op2)
35225 : gen_rdpmc (op0, op2));
35226 emit_insn (insn);
35227 }
35228 else if (fcode == IX86_BUILTIN_RDTSC)
35229 {
35230 insn = (TARGET_64BIT
35231 ? gen_rdtsc_rex64 (op0, op1)
35232 : gen_rdtsc (op0));
35233 emit_insn (insn);
35234 }
35235 else
35236 {
35237 op2 = gen_reg_rtx (SImode);
35238
35239 insn = (TARGET_64BIT
35240 ? gen_rdtscp_rex64 (op0, op1, op2)
35241 : gen_rdtscp (op0, op2));
35242 emit_insn (insn);
35243
35244 arg0 = CALL_EXPR_ARG (exp, 0);
35245 op4 = expand_normal (arg0);
35246 if (!address_operand (op4, VOIDmode))
35247 {
35248 op4 = convert_memory_address (Pmode, op4);
35249 op4 = copy_addr_to_reg (op4);
35250 }
35251 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35252 }
35253
35254 if (target == 0)
35255 {
35256 /* mode is VOIDmode if __builtin_rd* has been called
35257 without lhs. */
35258 if (mode == VOIDmode)
35259 return target;
35260 target = gen_reg_rtx (mode);
35261 }
35262
35263 if (TARGET_64BIT)
35264 {
35265 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35266 op1, 1, OPTAB_DIRECT);
35267 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35268 op0, 1, OPTAB_DIRECT);
35269 }
35270
35271 emit_move_insn (target, op0);
35272 return target;
35273
35274 case IX86_BUILTIN_FXSAVE:
35275 case IX86_BUILTIN_FXRSTOR:
35276 case IX86_BUILTIN_FXSAVE64:
35277 case IX86_BUILTIN_FXRSTOR64:
35278 case IX86_BUILTIN_FNSTENV:
35279 case IX86_BUILTIN_FLDENV:
35280 case IX86_BUILTIN_FNSTSW:
35281 mode0 = BLKmode;
35282 switch (fcode)
35283 {
35284 case IX86_BUILTIN_FXSAVE:
35285 icode = CODE_FOR_fxsave;
35286 break;
35287 case IX86_BUILTIN_FXRSTOR:
35288 icode = CODE_FOR_fxrstor;
35289 break;
35290 case IX86_BUILTIN_FXSAVE64:
35291 icode = CODE_FOR_fxsave64;
35292 break;
35293 case IX86_BUILTIN_FXRSTOR64:
35294 icode = CODE_FOR_fxrstor64;
35295 break;
35296 case IX86_BUILTIN_FNSTENV:
35297 icode = CODE_FOR_fnstenv;
35298 break;
35299 case IX86_BUILTIN_FLDENV:
35300 icode = CODE_FOR_fldenv;
35301 break;
35302 case IX86_BUILTIN_FNSTSW:
35303 icode = CODE_FOR_fnstsw;
35304 mode0 = HImode;
35305 break;
35306 default:
35307 gcc_unreachable ();
35308 }
35309
35310 arg0 = CALL_EXPR_ARG (exp, 0);
35311 op0 = expand_normal (arg0);
35312
35313 if (!address_operand (op0, VOIDmode))
35314 {
35315 op0 = convert_memory_address (Pmode, op0);
35316 op0 = copy_addr_to_reg (op0);
35317 }
35318 op0 = gen_rtx_MEM (mode0, op0);
35319
35320 pat = GEN_FCN (icode) (op0);
35321 if (pat)
35322 emit_insn (pat);
35323 return 0;
35324
35325 case IX86_BUILTIN_XSAVE:
35326 case IX86_BUILTIN_XRSTOR:
35327 case IX86_BUILTIN_XSAVE64:
35328 case IX86_BUILTIN_XRSTOR64:
35329 case IX86_BUILTIN_XSAVEOPT:
35330 case IX86_BUILTIN_XSAVEOPT64:
35331 case IX86_BUILTIN_XSAVES:
35332 case IX86_BUILTIN_XRSTORS:
35333 case IX86_BUILTIN_XSAVES64:
35334 case IX86_BUILTIN_XRSTORS64:
35335 case IX86_BUILTIN_XSAVEC:
35336 case IX86_BUILTIN_XSAVEC64:
35337 arg0 = CALL_EXPR_ARG (exp, 0);
35338 arg1 = CALL_EXPR_ARG (exp, 1);
35339 op0 = expand_normal (arg0);
35340 op1 = expand_normal (arg1);
35341
35342 if (!address_operand (op0, VOIDmode))
35343 {
35344 op0 = convert_memory_address (Pmode, op0);
35345 op0 = copy_addr_to_reg (op0);
35346 }
35347 op0 = gen_rtx_MEM (BLKmode, op0);
35348
35349 op1 = force_reg (DImode, op1);
35350
35351 if (TARGET_64BIT)
35352 {
35353 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35354 NULL, 1, OPTAB_DIRECT);
35355 switch (fcode)
35356 {
35357 case IX86_BUILTIN_XSAVE:
35358 icode = CODE_FOR_xsave_rex64;
35359 break;
35360 case IX86_BUILTIN_XRSTOR:
35361 icode = CODE_FOR_xrstor_rex64;
35362 break;
35363 case IX86_BUILTIN_XSAVE64:
35364 icode = CODE_FOR_xsave64;
35365 break;
35366 case IX86_BUILTIN_XRSTOR64:
35367 icode = CODE_FOR_xrstor64;
35368 break;
35369 case IX86_BUILTIN_XSAVEOPT:
35370 icode = CODE_FOR_xsaveopt_rex64;
35371 break;
35372 case IX86_BUILTIN_XSAVEOPT64:
35373 icode = CODE_FOR_xsaveopt64;
35374 break;
35375 case IX86_BUILTIN_XSAVES:
35376 icode = CODE_FOR_xsaves_rex64;
35377 break;
35378 case IX86_BUILTIN_XRSTORS:
35379 icode = CODE_FOR_xrstors_rex64;
35380 break;
35381 case IX86_BUILTIN_XSAVES64:
35382 icode = CODE_FOR_xsaves64;
35383 break;
35384 case IX86_BUILTIN_XRSTORS64:
35385 icode = CODE_FOR_xrstors64;
35386 break;
35387 case IX86_BUILTIN_XSAVEC:
35388 icode = CODE_FOR_xsavec_rex64;
35389 break;
35390 case IX86_BUILTIN_XSAVEC64:
35391 icode = CODE_FOR_xsavec64;
35392 break;
35393 default:
35394 gcc_unreachable ();
35395 }
35396
35397 op2 = gen_lowpart (SImode, op2);
35398 op1 = gen_lowpart (SImode, op1);
35399 pat = GEN_FCN (icode) (op0, op1, op2);
35400 }
35401 else
35402 {
35403 switch (fcode)
35404 {
35405 case IX86_BUILTIN_XSAVE:
35406 icode = CODE_FOR_xsave;
35407 break;
35408 case IX86_BUILTIN_XRSTOR:
35409 icode = CODE_FOR_xrstor;
35410 break;
35411 case IX86_BUILTIN_XSAVEOPT:
35412 icode = CODE_FOR_xsaveopt;
35413 break;
35414 case IX86_BUILTIN_XSAVES:
35415 icode = CODE_FOR_xsaves;
35416 break;
35417 case IX86_BUILTIN_XRSTORS:
35418 icode = CODE_FOR_xrstors;
35419 break;
35420 case IX86_BUILTIN_XSAVEC:
35421 icode = CODE_FOR_xsavec;
35422 break;
35423 default:
35424 gcc_unreachable ();
35425 }
35426 pat = GEN_FCN (icode) (op0, op1);
35427 }
35428
35429 if (pat)
35430 emit_insn (pat);
35431 return 0;
35432
35433 case IX86_BUILTIN_LLWPCB:
35434 arg0 = CALL_EXPR_ARG (exp, 0);
35435 op0 = expand_normal (arg0);
35436 icode = CODE_FOR_lwp_llwpcb;
35437 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35438 op0 = ix86_zero_extend_to_Pmode (op0);
35439 emit_insn (gen_lwp_llwpcb (op0));
35440 return 0;
35441
35442 case IX86_BUILTIN_SLWPCB:
35443 icode = CODE_FOR_lwp_slwpcb;
35444 if (!target
35445 || !insn_data[icode].operand[0].predicate (target, Pmode))
35446 target = gen_reg_rtx (Pmode);
35447 emit_insn (gen_lwp_slwpcb (target));
35448 return target;
35449
35450 case IX86_BUILTIN_BEXTRI32:
35451 case IX86_BUILTIN_BEXTRI64:
35452 arg0 = CALL_EXPR_ARG (exp, 0);
35453 arg1 = CALL_EXPR_ARG (exp, 1);
35454 op0 = expand_normal (arg0);
35455 op1 = expand_normal (arg1);
35456 icode = (fcode == IX86_BUILTIN_BEXTRI32
35457 ? CODE_FOR_tbm_bextri_si
35458 : CODE_FOR_tbm_bextri_di);
35459 if (!CONST_INT_P (op1))
35460 {
35461 error ("last argument must be an immediate");
35462 return const0_rtx;
35463 }
35464 else
35465 {
35466 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35467 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35468 op1 = GEN_INT (length);
35469 op2 = GEN_INT (lsb_index);
35470 pat = GEN_FCN (icode) (target, op0, op1, op2);
35471 if (pat)
35472 emit_insn (pat);
35473 return target;
35474 }
35475
35476 case IX86_BUILTIN_RDRAND16_STEP:
35477 icode = CODE_FOR_rdrandhi_1;
35478 mode0 = HImode;
35479 goto rdrand_step;
35480
35481 case IX86_BUILTIN_RDRAND32_STEP:
35482 icode = CODE_FOR_rdrandsi_1;
35483 mode0 = SImode;
35484 goto rdrand_step;
35485
35486 case IX86_BUILTIN_RDRAND64_STEP:
35487 icode = CODE_FOR_rdranddi_1;
35488 mode0 = DImode;
35489
35490 rdrand_step:
35491 op0 = gen_reg_rtx (mode0);
35492 emit_insn (GEN_FCN (icode) (op0));
35493
35494 arg0 = CALL_EXPR_ARG (exp, 0);
35495 op1 = expand_normal (arg0);
35496 if (!address_operand (op1, VOIDmode))
35497 {
35498 op1 = convert_memory_address (Pmode, op1);
35499 op1 = copy_addr_to_reg (op1);
35500 }
35501 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35502
35503 op1 = gen_reg_rtx (SImode);
35504 emit_move_insn (op1, CONST1_RTX (SImode));
35505
35506 /* Emit SImode conditional move. */
35507 if (mode0 == HImode)
35508 {
35509 op2 = gen_reg_rtx (SImode);
35510 emit_insn (gen_zero_extendhisi2 (op2, op0));
35511 }
35512 else if (mode0 == SImode)
35513 op2 = op0;
35514 else
35515 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35516
35517 if (target == 0
35518 || !register_operand (target, SImode))
35519 target = gen_reg_rtx (SImode);
35520
35521 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35522 const0_rtx);
35523 emit_insn (gen_rtx_SET (VOIDmode, target,
35524 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35525 return target;
35526
35527 case IX86_BUILTIN_RDSEED16_STEP:
35528 icode = CODE_FOR_rdseedhi_1;
35529 mode0 = HImode;
35530 goto rdseed_step;
35531
35532 case IX86_BUILTIN_RDSEED32_STEP:
35533 icode = CODE_FOR_rdseedsi_1;
35534 mode0 = SImode;
35535 goto rdseed_step;
35536
35537 case IX86_BUILTIN_RDSEED64_STEP:
35538 icode = CODE_FOR_rdseeddi_1;
35539 mode0 = DImode;
35540
35541 rdseed_step:
35542 op0 = gen_reg_rtx (mode0);
35543 emit_insn (GEN_FCN (icode) (op0));
35544
35545 arg0 = CALL_EXPR_ARG (exp, 0);
35546 op1 = expand_normal (arg0);
35547 if (!address_operand (op1, VOIDmode))
35548 {
35549 op1 = convert_memory_address (Pmode, op1);
35550 op1 = copy_addr_to_reg (op1);
35551 }
35552 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35553
35554 op2 = gen_reg_rtx (QImode);
35555
35556 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35557 const0_rtx);
35558 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35559
35560 if (target == 0
35561 || !register_operand (target, SImode))
35562 target = gen_reg_rtx (SImode);
35563
35564 emit_insn (gen_zero_extendqisi2 (target, op2));
35565 return target;
35566
35567 case IX86_BUILTIN_ADDCARRYX32:
35568 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35569 mode0 = SImode;
35570 goto addcarryx;
35571
35572 case IX86_BUILTIN_ADDCARRYX64:
35573 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35574 mode0 = DImode;
35575
35576 addcarryx:
35577 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35578 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35579 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35580 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35581
35582 op0 = gen_reg_rtx (QImode);
35583
35584 /* Generate CF from input operand. */
35585 op1 = expand_normal (arg0);
35586 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35587 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35588
35589 /* Gen ADCX instruction to compute X+Y+CF. */
35590 op2 = expand_normal (arg1);
35591 op3 = expand_normal (arg2);
35592
35593 if (!REG_P (op2))
35594 op2 = copy_to_mode_reg (mode0, op2);
35595 if (!REG_P (op3))
35596 op3 = copy_to_mode_reg (mode0, op3);
35597
35598 op0 = gen_reg_rtx (mode0);
35599
35600 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35601 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35602 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35603
35604 /* Store the result. */
35605 op4 = expand_normal (arg3);
35606 if (!address_operand (op4, VOIDmode))
35607 {
35608 op4 = convert_memory_address (Pmode, op4);
35609 op4 = copy_addr_to_reg (op4);
35610 }
35611 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35612
35613 /* Return current CF value. */
35614 if (target == 0)
35615 target = gen_reg_rtx (QImode);
35616
35617 PUT_MODE (pat, QImode);
35618 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35619 return target;
35620
35621 case IX86_BUILTIN_READ_FLAGS:
35622 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35623
35624 if (optimize
35625 || target == NULL_RTX
35626 || !nonimmediate_operand (target, word_mode)
35627 || GET_MODE (target) != word_mode)
35628 target = gen_reg_rtx (word_mode);
35629
35630 emit_insn (gen_pop (target));
35631 return target;
35632
35633 case IX86_BUILTIN_WRITE_FLAGS:
35634
35635 arg0 = CALL_EXPR_ARG (exp, 0);
35636 op0 = expand_normal (arg0);
35637 if (!general_no_elim_operand (op0, word_mode))
35638 op0 = copy_to_mode_reg (word_mode, op0);
35639
35640 emit_insn (gen_push (op0));
35641 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35642 return 0;
35643
35644 case IX86_BUILTIN_KORTESTC16:
35645 icode = CODE_FOR_kortestchi;
35646 mode0 = HImode;
35647 mode1 = CCCmode;
35648 goto kortest;
35649
35650 case IX86_BUILTIN_KORTESTZ16:
35651 icode = CODE_FOR_kortestzhi;
35652 mode0 = HImode;
35653 mode1 = CCZmode;
35654
35655 kortest:
35656 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35657 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35658 op0 = expand_normal (arg0);
35659 op1 = expand_normal (arg1);
35660
35661 op0 = copy_to_reg (op0);
35662 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35663 op1 = copy_to_reg (op1);
35664 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35665
35666 target = gen_reg_rtx (QImode);
35667 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35668
35669 /* Emit kortest. */
35670 emit_insn (GEN_FCN (icode) (op0, op1));
35671 /* And use setcc to return result from flags. */
35672 ix86_expand_setcc (target, EQ,
35673 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35674 return target;
35675
35676 case IX86_BUILTIN_GATHERSIV2DF:
35677 icode = CODE_FOR_avx2_gathersiv2df;
35678 goto gather_gen;
35679 case IX86_BUILTIN_GATHERSIV4DF:
35680 icode = CODE_FOR_avx2_gathersiv4df;
35681 goto gather_gen;
35682 case IX86_BUILTIN_GATHERDIV2DF:
35683 icode = CODE_FOR_avx2_gatherdiv2df;
35684 goto gather_gen;
35685 case IX86_BUILTIN_GATHERDIV4DF:
35686 icode = CODE_FOR_avx2_gatherdiv4df;
35687 goto gather_gen;
35688 case IX86_BUILTIN_GATHERSIV4SF:
35689 icode = CODE_FOR_avx2_gathersiv4sf;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERSIV8SF:
35692 icode = CODE_FOR_avx2_gathersiv8sf;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERDIV4SF:
35695 icode = CODE_FOR_avx2_gatherdiv4sf;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERDIV8SF:
35698 icode = CODE_FOR_avx2_gatherdiv8sf;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERSIV2DI:
35701 icode = CODE_FOR_avx2_gathersiv2di;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERSIV4DI:
35704 icode = CODE_FOR_avx2_gathersiv4di;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERDIV2DI:
35707 icode = CODE_FOR_avx2_gatherdiv2di;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERDIV4DI:
35710 icode = CODE_FOR_avx2_gatherdiv4di;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERSIV4SI:
35713 icode = CODE_FOR_avx2_gathersiv4si;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERSIV8SI:
35716 icode = CODE_FOR_avx2_gathersiv8si;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERDIV4SI:
35719 icode = CODE_FOR_avx2_gatherdiv4si;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERDIV8SI:
35722 icode = CODE_FOR_avx2_gatherdiv8si;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERALTSIV4DF:
35725 icode = CODE_FOR_avx2_gathersiv4df;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERALTDIV8SF:
35728 icode = CODE_FOR_avx2_gatherdiv8sf;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERALTSIV4DI:
35731 icode = CODE_FOR_avx2_gathersiv4di;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHERALTDIV8SI:
35734 icode = CODE_FOR_avx2_gatherdiv8si;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHER3SIV16SF:
35737 icode = CODE_FOR_avx512f_gathersiv16sf;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHER3SIV8DF:
35740 icode = CODE_FOR_avx512f_gathersiv8df;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHER3DIV16SF:
35743 icode = CODE_FOR_avx512f_gatherdiv16sf;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHER3DIV8DF:
35746 icode = CODE_FOR_avx512f_gatherdiv8df;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3SIV16SI:
35749 icode = CODE_FOR_avx512f_gathersiv16si;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3SIV8DI:
35752 icode = CODE_FOR_avx512f_gathersiv8di;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3DIV16SI:
35755 icode = CODE_FOR_avx512f_gatherdiv16si;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3DIV8DI:
35758 icode = CODE_FOR_avx512f_gatherdiv8di;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35761 icode = CODE_FOR_avx512f_gathersiv8df;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35764 icode = CODE_FOR_avx512f_gatherdiv16sf;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35767 icode = CODE_FOR_avx512f_gathersiv8di;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35770 icode = CODE_FOR_avx512f_gatherdiv16si;
35771 goto gather_gen;
35772 case IX86_BUILTIN_SCATTERSIV16SF:
35773 icode = CODE_FOR_avx512f_scattersiv16sf;
35774 goto scatter_gen;
35775 case IX86_BUILTIN_SCATTERSIV8DF:
35776 icode = CODE_FOR_avx512f_scattersiv8df;
35777 goto scatter_gen;
35778 case IX86_BUILTIN_SCATTERDIV16SF:
35779 icode = CODE_FOR_avx512f_scatterdiv16sf;
35780 goto scatter_gen;
35781 case IX86_BUILTIN_SCATTERDIV8DF:
35782 icode = CODE_FOR_avx512f_scatterdiv8df;
35783 goto scatter_gen;
35784 case IX86_BUILTIN_SCATTERSIV16SI:
35785 icode = CODE_FOR_avx512f_scattersiv16si;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERSIV8DI:
35788 icode = CODE_FOR_avx512f_scattersiv8di;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERDIV16SI:
35791 icode = CODE_FOR_avx512f_scatterdiv16si;
35792 goto scatter_gen;
35793 case IX86_BUILTIN_SCATTERDIV8DI:
35794 icode = CODE_FOR_avx512f_scatterdiv8di;
35795 goto scatter_gen;
35796
35797 case IX86_BUILTIN_GATHERPFDPD:
35798 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35799 goto vec_prefetch_gen;
35800 case IX86_BUILTIN_GATHERPFDPS:
35801 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35802 goto vec_prefetch_gen;
35803 case IX86_BUILTIN_GATHERPFQPD:
35804 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35805 goto vec_prefetch_gen;
35806 case IX86_BUILTIN_GATHERPFQPS:
35807 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35808 goto vec_prefetch_gen;
35809 case IX86_BUILTIN_SCATTERPFDPD:
35810 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_SCATTERPFDPS:
35813 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_SCATTERPFQPD:
35816 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35817 goto vec_prefetch_gen;
35818 case IX86_BUILTIN_SCATTERPFQPS:
35819 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35820 goto vec_prefetch_gen;
35821
35822 gather_gen:
35823 rtx half;
35824 rtx (*gen) (rtx, rtx);
35825
35826 arg0 = CALL_EXPR_ARG (exp, 0);
35827 arg1 = CALL_EXPR_ARG (exp, 1);
35828 arg2 = CALL_EXPR_ARG (exp, 2);
35829 arg3 = CALL_EXPR_ARG (exp, 3);
35830 arg4 = CALL_EXPR_ARG (exp, 4);
35831 op0 = expand_normal (arg0);
35832 op1 = expand_normal (arg1);
35833 op2 = expand_normal (arg2);
35834 op3 = expand_normal (arg3);
35835 op4 = expand_normal (arg4);
35836 /* Note the arg order is different from the operand order. */
35837 mode0 = insn_data[icode].operand[1].mode;
35838 mode2 = insn_data[icode].operand[3].mode;
35839 mode3 = insn_data[icode].operand[4].mode;
35840 mode4 = insn_data[icode].operand[5].mode;
35841
35842 if (target == NULL_RTX
35843 || GET_MODE (target) != insn_data[icode].operand[0].mode
35844 || !insn_data[icode].operand[0].predicate (target,
35845 GET_MODE (target)))
35846 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35847 else
35848 subtarget = target;
35849
35850 switch (fcode)
35851 {
35852 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35853 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35854 half = gen_reg_rtx (V8SImode);
35855 if (!nonimmediate_operand (op2, V16SImode))
35856 op2 = copy_to_mode_reg (V16SImode, op2);
35857 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35858 op2 = half;
35859 break;
35860 case IX86_BUILTIN_GATHERALTSIV4DF:
35861 case IX86_BUILTIN_GATHERALTSIV4DI:
35862 half = gen_reg_rtx (V4SImode);
35863 if (!nonimmediate_operand (op2, V8SImode))
35864 op2 = copy_to_mode_reg (V8SImode, op2);
35865 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35866 op2 = half;
35867 break;
35868 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35869 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35870 half = gen_reg_rtx (mode0);
35871 if (mode0 == V8SFmode)
35872 gen = gen_vec_extract_lo_v16sf;
35873 else
35874 gen = gen_vec_extract_lo_v16si;
35875 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35876 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35877 emit_insn (gen (half, op0));
35878 op0 = half;
35879 if (GET_MODE (op3) != VOIDmode)
35880 {
35881 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35882 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35883 emit_insn (gen (half, op3));
35884 op3 = half;
35885 }
35886 break;
35887 case IX86_BUILTIN_GATHERALTDIV8SF:
35888 case IX86_BUILTIN_GATHERALTDIV8SI:
35889 half = gen_reg_rtx (mode0);
35890 if (mode0 == V4SFmode)
35891 gen = gen_vec_extract_lo_v8sf;
35892 else
35893 gen = gen_vec_extract_lo_v8si;
35894 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35895 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35896 emit_insn (gen (half, op0));
35897 op0 = half;
35898 if (GET_MODE (op3) != VOIDmode)
35899 {
35900 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35901 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35902 emit_insn (gen (half, op3));
35903 op3 = half;
35904 }
35905 break;
35906 default:
35907 break;
35908 }
35909
35910 /* Force memory operand only with base register here. But we
35911 don't want to do it on memory operand for other builtin
35912 functions. */
35913 op1 = ix86_zero_extend_to_Pmode (op1);
35914
35915 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35916 op0 = copy_to_mode_reg (mode0, op0);
35917 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35918 op1 = copy_to_mode_reg (Pmode, op1);
35919 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35920 op2 = copy_to_mode_reg (mode2, op2);
35921 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35922 {
35923 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35924 op3 = copy_to_mode_reg (mode3, op3);
35925 }
35926 else
35927 {
35928 op3 = copy_to_reg (op3);
35929 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35930 }
35931 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35932 {
35933 error ("the last argument must be scale 1, 2, 4, 8");
35934 return const0_rtx;
35935 }
35936
35937 /* Optimize. If mask is known to have all high bits set,
35938 replace op0 with pc_rtx to signal that the instruction
35939 overwrites the whole destination and doesn't use its
35940 previous contents. */
35941 if (optimize)
35942 {
35943 if (TREE_CODE (arg3) == INTEGER_CST)
35944 {
35945 if (integer_all_onesp (arg3))
35946 op0 = pc_rtx;
35947 }
35948 else if (TREE_CODE (arg3) == VECTOR_CST)
35949 {
35950 unsigned int negative = 0;
35951 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35952 {
35953 tree cst = VECTOR_CST_ELT (arg3, i);
35954 if (TREE_CODE (cst) == INTEGER_CST
35955 && tree_int_cst_sign_bit (cst))
35956 negative++;
35957 else if (TREE_CODE (cst) == REAL_CST
35958 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35959 negative++;
35960 }
35961 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35962 op0 = pc_rtx;
35963 }
35964 else if (TREE_CODE (arg3) == SSA_NAME
35965 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35966 {
35967 /* Recognize also when mask is like:
35968 __v2df src = _mm_setzero_pd ();
35969 __v2df mask = _mm_cmpeq_pd (src, src);
35970 or
35971 __v8sf src = _mm256_setzero_ps ();
35972 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35973 as that is a cheaper way to load all ones into
35974 a register than having to load a constant from
35975 memory. */
35976 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35977 if (is_gimple_call (def_stmt))
35978 {
35979 tree fndecl = gimple_call_fndecl (def_stmt);
35980 if (fndecl
35981 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35982 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35983 {
35984 case IX86_BUILTIN_CMPPD:
35985 case IX86_BUILTIN_CMPPS:
35986 case IX86_BUILTIN_CMPPD256:
35987 case IX86_BUILTIN_CMPPS256:
35988 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35989 break;
35990 /* FALLTHRU */
35991 case IX86_BUILTIN_CMPEQPD:
35992 case IX86_BUILTIN_CMPEQPS:
35993 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35994 && initializer_zerop (gimple_call_arg (def_stmt,
35995 1)))
35996 op0 = pc_rtx;
35997 break;
35998 default:
35999 break;
36000 }
36001 }
36002 }
36003 }
36004
36005 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36006 if (! pat)
36007 return const0_rtx;
36008 emit_insn (pat);
36009
36010 switch (fcode)
36011 {
36012 case IX86_BUILTIN_GATHER3DIV16SF:
36013 if (target == NULL_RTX)
36014 target = gen_reg_rtx (V8SFmode);
36015 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36016 break;
36017 case IX86_BUILTIN_GATHER3DIV16SI:
36018 if (target == NULL_RTX)
36019 target = gen_reg_rtx (V8SImode);
36020 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36021 break;
36022 case IX86_BUILTIN_GATHERDIV8SF:
36023 if (target == NULL_RTX)
36024 target = gen_reg_rtx (V4SFmode);
36025 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36026 break;
36027 case IX86_BUILTIN_GATHERDIV8SI:
36028 if (target == NULL_RTX)
36029 target = gen_reg_rtx (V4SImode);
36030 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36031 break;
36032 default:
36033 target = subtarget;
36034 break;
36035 }
36036 return target;
36037
36038 scatter_gen:
36039 arg0 = CALL_EXPR_ARG (exp, 0);
36040 arg1 = CALL_EXPR_ARG (exp, 1);
36041 arg2 = CALL_EXPR_ARG (exp, 2);
36042 arg3 = CALL_EXPR_ARG (exp, 3);
36043 arg4 = CALL_EXPR_ARG (exp, 4);
36044 op0 = expand_normal (arg0);
36045 op1 = expand_normal (arg1);
36046 op2 = expand_normal (arg2);
36047 op3 = expand_normal (arg3);
36048 op4 = expand_normal (arg4);
36049 mode1 = insn_data[icode].operand[1].mode;
36050 mode2 = insn_data[icode].operand[2].mode;
36051 mode3 = insn_data[icode].operand[3].mode;
36052 mode4 = insn_data[icode].operand[4].mode;
36053
36054 /* Force memory operand only with base register here. But we
36055 don't want to do it on memory operand for other builtin
36056 functions. */
36057 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36058
36059 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36060 op0 = copy_to_mode_reg (Pmode, op0);
36061
36062 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36063 {
36064 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36065 op1 = copy_to_mode_reg (mode1, op1);
36066 }
36067 else
36068 {
36069 op1 = copy_to_reg (op1);
36070 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36071 }
36072
36073 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36074 op2 = copy_to_mode_reg (mode2, op2);
36075
36076 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36077 op3 = copy_to_mode_reg (mode3, op3);
36078
36079 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36080 {
36081 error ("the last argument must be scale 1, 2, 4, 8");
36082 return const0_rtx;
36083 }
36084
36085 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36086 if (! pat)
36087 return const0_rtx;
36088
36089 emit_insn (pat);
36090 return 0;
36091
36092 vec_prefetch_gen:
36093 arg0 = CALL_EXPR_ARG (exp, 0);
36094 arg1 = CALL_EXPR_ARG (exp, 1);
36095 arg2 = CALL_EXPR_ARG (exp, 2);
36096 arg3 = CALL_EXPR_ARG (exp, 3);
36097 arg4 = CALL_EXPR_ARG (exp, 4);
36098 op0 = expand_normal (arg0);
36099 op1 = expand_normal (arg1);
36100 op2 = expand_normal (arg2);
36101 op3 = expand_normal (arg3);
36102 op4 = expand_normal (arg4);
36103 mode0 = insn_data[icode].operand[0].mode;
36104 mode1 = insn_data[icode].operand[1].mode;
36105 mode3 = insn_data[icode].operand[3].mode;
36106 mode4 = insn_data[icode].operand[4].mode;
36107
36108 if (GET_MODE (op0) == mode0
36109 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36110 {
36111 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36112 op0 = copy_to_mode_reg (mode0, op0);
36113 }
36114 else if (op0 != constm1_rtx)
36115 {
36116 op0 = copy_to_reg (op0);
36117 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36118 }
36119
36120 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36121 op1 = copy_to_mode_reg (mode1, op1);
36122
36123 /* Force memory operand only with base register here. But we
36124 don't want to do it on memory operand for other builtin
36125 functions. */
36126 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36127
36128 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36129 op2 = copy_to_mode_reg (Pmode, op2);
36130
36131 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36132 {
36133 error ("the forth argument must be scale 1, 2, 4, 8");
36134 return const0_rtx;
36135 }
36136
36137 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36138 {
36139 error ("incorrect hint operand");
36140 return const0_rtx;
36141 }
36142
36143 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36144 if (! pat)
36145 return const0_rtx;
36146
36147 emit_insn (pat);
36148
36149 return 0;
36150
36151 case IX86_BUILTIN_XABORT:
36152 icode = CODE_FOR_xabort;
36153 arg0 = CALL_EXPR_ARG (exp, 0);
36154 op0 = expand_normal (arg0);
36155 mode0 = insn_data[icode].operand[0].mode;
36156 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36157 {
36158 error ("the xabort's argument must be an 8-bit immediate");
36159 return const0_rtx;
36160 }
36161 emit_insn (gen_xabort (op0));
36162 return 0;
36163
36164 default:
36165 break;
36166 }
36167
36168 for (i = 0, d = bdesc_special_args;
36169 i < ARRAY_SIZE (bdesc_special_args);
36170 i++, d++)
36171 if (d->code == fcode)
36172 return ix86_expand_special_args_builtin (d, exp, target);
36173
36174 for (i = 0, d = bdesc_args;
36175 i < ARRAY_SIZE (bdesc_args);
36176 i++, d++)
36177 if (d->code == fcode)
36178 switch (fcode)
36179 {
36180 case IX86_BUILTIN_FABSQ:
36181 case IX86_BUILTIN_COPYSIGNQ:
36182 if (!TARGET_SSE)
36183 /* Emit a normal call if SSE isn't available. */
36184 return expand_call (exp, target, ignore);
36185 default:
36186 return ix86_expand_args_builtin (d, exp, target);
36187 }
36188
36189 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36190 if (d->code == fcode)
36191 return ix86_expand_sse_comi (d, exp, target);
36192
36193 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36194 if (d->code == fcode)
36195 return ix86_expand_round_builtin (d, exp, target);
36196
36197 for (i = 0, d = bdesc_pcmpestr;
36198 i < ARRAY_SIZE (bdesc_pcmpestr);
36199 i++, d++)
36200 if (d->code == fcode)
36201 return ix86_expand_sse_pcmpestr (d, exp, target);
36202
36203 for (i = 0, d = bdesc_pcmpistr;
36204 i < ARRAY_SIZE (bdesc_pcmpistr);
36205 i++, d++)
36206 if (d->code == fcode)
36207 return ix86_expand_sse_pcmpistr (d, exp, target);
36208
36209 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36210 if (d->code == fcode)
36211 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36212 (enum ix86_builtin_func_type)
36213 d->flag, d->comparison);
36214
36215 gcc_unreachable ();
36216 }
36217
36218 /* This returns the target-specific builtin with code CODE if
36219 current_function_decl has visibility on this builtin, which is checked
36220 using isa flags. Returns NULL_TREE otherwise. */
36221
36222 static tree ix86_get_builtin (enum ix86_builtins code)
36223 {
36224 struct cl_target_option *opts;
36225 tree target_tree = NULL_TREE;
36226
36227 /* Determine the isa flags of current_function_decl. */
36228
36229 if (current_function_decl)
36230 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36231
36232 if (target_tree == NULL)
36233 target_tree = target_option_default_node;
36234
36235 opts = TREE_TARGET_OPTION (target_tree);
36236
36237 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36238 return ix86_builtin_decl (code, true);
36239 else
36240 return NULL_TREE;
36241 }
36242
36243 /* Returns a function decl for a vectorized version of the builtin function
36244 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36245 if it is not available. */
36246
36247 static tree
36248 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36249 tree type_in)
36250 {
36251 enum machine_mode in_mode, out_mode;
36252 int in_n, out_n;
36253 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36254
36255 if (TREE_CODE (type_out) != VECTOR_TYPE
36256 || TREE_CODE (type_in) != VECTOR_TYPE
36257 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36258 return NULL_TREE;
36259
36260 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36261 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36262 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36263 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36264
36265 switch (fn)
36266 {
36267 case BUILT_IN_SQRT:
36268 if (out_mode == DFmode && in_mode == DFmode)
36269 {
36270 if (out_n == 2 && in_n == 2)
36271 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36272 else if (out_n == 4 && in_n == 4)
36273 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36274 else if (out_n == 8 && in_n == 8)
36275 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36276 }
36277 break;
36278
36279 case BUILT_IN_EXP2F:
36280 if (out_mode == SFmode && in_mode == SFmode)
36281 {
36282 if (out_n == 16 && in_n == 16)
36283 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36284 }
36285 break;
36286
36287 case BUILT_IN_SQRTF:
36288 if (out_mode == SFmode && in_mode == SFmode)
36289 {
36290 if (out_n == 4 && in_n == 4)
36291 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36292 else if (out_n == 8 && in_n == 8)
36293 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36294 else if (out_n == 16 && in_n == 16)
36295 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36296 }
36297 break;
36298
36299 case BUILT_IN_IFLOOR:
36300 case BUILT_IN_LFLOOR:
36301 case BUILT_IN_LLFLOOR:
36302 /* The round insn does not trap on denormals. */
36303 if (flag_trapping_math || !TARGET_ROUND)
36304 break;
36305
36306 if (out_mode == SImode && in_mode == DFmode)
36307 {
36308 if (out_n == 4 && in_n == 2)
36309 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36310 else if (out_n == 8 && in_n == 4)
36311 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36312 else if (out_n == 16 && in_n == 8)
36313 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36314 }
36315 break;
36316
36317 case BUILT_IN_IFLOORF:
36318 case BUILT_IN_LFLOORF:
36319 case BUILT_IN_LLFLOORF:
36320 /* The round insn does not trap on denormals. */
36321 if (flag_trapping_math || !TARGET_ROUND)
36322 break;
36323
36324 if (out_mode == SImode && in_mode == SFmode)
36325 {
36326 if (out_n == 4 && in_n == 4)
36327 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36328 else if (out_n == 8 && in_n == 8)
36329 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36330 }
36331 break;
36332
36333 case BUILT_IN_ICEIL:
36334 case BUILT_IN_LCEIL:
36335 case BUILT_IN_LLCEIL:
36336 /* The round insn does not trap on denormals. */
36337 if (flag_trapping_math || !TARGET_ROUND)
36338 break;
36339
36340 if (out_mode == SImode && in_mode == DFmode)
36341 {
36342 if (out_n == 4 && in_n == 2)
36343 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36344 else if (out_n == 8 && in_n == 4)
36345 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36346 else if (out_n == 16 && in_n == 8)
36347 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36348 }
36349 break;
36350
36351 case BUILT_IN_ICEILF:
36352 case BUILT_IN_LCEILF:
36353 case BUILT_IN_LLCEILF:
36354 /* The round insn does not trap on denormals. */
36355 if (flag_trapping_math || !TARGET_ROUND)
36356 break;
36357
36358 if (out_mode == SImode && in_mode == SFmode)
36359 {
36360 if (out_n == 4 && in_n == 4)
36361 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36362 else if (out_n == 8 && in_n == 8)
36363 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36364 }
36365 break;
36366
36367 case BUILT_IN_IRINT:
36368 case BUILT_IN_LRINT:
36369 case BUILT_IN_LLRINT:
36370 if (out_mode == SImode && in_mode == DFmode)
36371 {
36372 if (out_n == 4 && in_n == 2)
36373 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36374 else if (out_n == 8 && in_n == 4)
36375 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36376 }
36377 break;
36378
36379 case BUILT_IN_IRINTF:
36380 case BUILT_IN_LRINTF:
36381 case BUILT_IN_LLRINTF:
36382 if (out_mode == SImode && in_mode == SFmode)
36383 {
36384 if (out_n == 4 && in_n == 4)
36385 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36386 else if (out_n == 8 && in_n == 8)
36387 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36388 }
36389 break;
36390
36391 case BUILT_IN_IROUND:
36392 case BUILT_IN_LROUND:
36393 case BUILT_IN_LLROUND:
36394 /* The round insn does not trap on denormals. */
36395 if (flag_trapping_math || !TARGET_ROUND)
36396 break;
36397
36398 if (out_mode == SImode && in_mode == DFmode)
36399 {
36400 if (out_n == 4 && in_n == 2)
36401 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36402 else if (out_n == 8 && in_n == 4)
36403 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36404 else if (out_n == 16 && in_n == 8)
36405 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36406 }
36407 break;
36408
36409 case BUILT_IN_IROUNDF:
36410 case BUILT_IN_LROUNDF:
36411 case BUILT_IN_LLROUNDF:
36412 /* The round insn does not trap on denormals. */
36413 if (flag_trapping_math || !TARGET_ROUND)
36414 break;
36415
36416 if (out_mode == SImode && in_mode == SFmode)
36417 {
36418 if (out_n == 4 && in_n == 4)
36419 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36420 else if (out_n == 8 && in_n == 8)
36421 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36422 }
36423 break;
36424
36425 case BUILT_IN_COPYSIGN:
36426 if (out_mode == DFmode && in_mode == DFmode)
36427 {
36428 if (out_n == 2 && in_n == 2)
36429 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36430 else if (out_n == 4 && in_n == 4)
36431 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36432 else if (out_n == 8 && in_n == 8)
36433 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36434 }
36435 break;
36436
36437 case BUILT_IN_COPYSIGNF:
36438 if (out_mode == SFmode && in_mode == SFmode)
36439 {
36440 if (out_n == 4 && in_n == 4)
36441 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36442 else if (out_n == 8 && in_n == 8)
36443 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36444 else if (out_n == 16 && in_n == 16)
36445 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36446 }
36447 break;
36448
36449 case BUILT_IN_FLOOR:
36450 /* The round insn does not trap on denormals. */
36451 if (flag_trapping_math || !TARGET_ROUND)
36452 break;
36453
36454 if (out_mode == DFmode && in_mode == DFmode)
36455 {
36456 if (out_n == 2 && in_n == 2)
36457 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36458 else if (out_n == 4 && in_n == 4)
36459 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36460 }
36461 break;
36462
36463 case BUILT_IN_FLOORF:
36464 /* The round insn does not trap on denormals. */
36465 if (flag_trapping_math || !TARGET_ROUND)
36466 break;
36467
36468 if (out_mode == SFmode && in_mode == SFmode)
36469 {
36470 if (out_n == 4 && in_n == 4)
36471 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36472 else if (out_n == 8 && in_n == 8)
36473 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36474 }
36475 break;
36476
36477 case BUILT_IN_CEIL:
36478 /* The round insn does not trap on denormals. */
36479 if (flag_trapping_math || !TARGET_ROUND)
36480 break;
36481
36482 if (out_mode == DFmode && in_mode == DFmode)
36483 {
36484 if (out_n == 2 && in_n == 2)
36485 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36486 else if (out_n == 4 && in_n == 4)
36487 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36488 }
36489 break;
36490
36491 case BUILT_IN_CEILF:
36492 /* The round insn does not trap on denormals. */
36493 if (flag_trapping_math || !TARGET_ROUND)
36494 break;
36495
36496 if (out_mode == SFmode && in_mode == SFmode)
36497 {
36498 if (out_n == 4 && in_n == 4)
36499 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36500 else if (out_n == 8 && in_n == 8)
36501 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36502 }
36503 break;
36504
36505 case BUILT_IN_TRUNC:
36506 /* The round insn does not trap on denormals. */
36507 if (flag_trapping_math || !TARGET_ROUND)
36508 break;
36509
36510 if (out_mode == DFmode && in_mode == DFmode)
36511 {
36512 if (out_n == 2 && in_n == 2)
36513 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36514 else if (out_n == 4 && in_n == 4)
36515 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36516 }
36517 break;
36518
36519 case BUILT_IN_TRUNCF:
36520 /* The round insn does not trap on denormals. */
36521 if (flag_trapping_math || !TARGET_ROUND)
36522 break;
36523
36524 if (out_mode == SFmode && in_mode == SFmode)
36525 {
36526 if (out_n == 4 && in_n == 4)
36527 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36528 else if (out_n == 8 && in_n == 8)
36529 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36530 }
36531 break;
36532
36533 case BUILT_IN_RINT:
36534 /* The round insn does not trap on denormals. */
36535 if (flag_trapping_math || !TARGET_ROUND)
36536 break;
36537
36538 if (out_mode == DFmode && in_mode == DFmode)
36539 {
36540 if (out_n == 2 && in_n == 2)
36541 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36542 else if (out_n == 4 && in_n == 4)
36543 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36544 }
36545 break;
36546
36547 case BUILT_IN_RINTF:
36548 /* The round insn does not trap on denormals. */
36549 if (flag_trapping_math || !TARGET_ROUND)
36550 break;
36551
36552 if (out_mode == SFmode && in_mode == SFmode)
36553 {
36554 if (out_n == 4 && in_n == 4)
36555 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36556 else if (out_n == 8 && in_n == 8)
36557 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36558 }
36559 break;
36560
36561 case BUILT_IN_ROUND:
36562 /* The round insn does not trap on denormals. */
36563 if (flag_trapping_math || !TARGET_ROUND)
36564 break;
36565
36566 if (out_mode == DFmode && in_mode == DFmode)
36567 {
36568 if (out_n == 2 && in_n == 2)
36569 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36570 else if (out_n == 4 && in_n == 4)
36571 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36572 }
36573 break;
36574
36575 case BUILT_IN_ROUNDF:
36576 /* The round insn does not trap on denormals. */
36577 if (flag_trapping_math || !TARGET_ROUND)
36578 break;
36579
36580 if (out_mode == SFmode && in_mode == SFmode)
36581 {
36582 if (out_n == 4 && in_n == 4)
36583 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36584 else if (out_n == 8 && in_n == 8)
36585 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36586 }
36587 break;
36588
36589 case BUILT_IN_FMA:
36590 if (out_mode == DFmode && in_mode == DFmode)
36591 {
36592 if (out_n == 2 && in_n == 2)
36593 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36594 if (out_n == 4 && in_n == 4)
36595 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36596 }
36597 break;
36598
36599 case BUILT_IN_FMAF:
36600 if (out_mode == SFmode && in_mode == SFmode)
36601 {
36602 if (out_n == 4 && in_n == 4)
36603 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36604 if (out_n == 8 && in_n == 8)
36605 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36606 }
36607 break;
36608
36609 default:
36610 break;
36611 }
36612
36613 /* Dispatch to a handler for a vectorization library. */
36614 if (ix86_veclib_handler)
36615 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36616 type_in);
36617
36618 return NULL_TREE;
36619 }
36620
36621 /* Handler for an SVML-style interface to
36622 a library with vectorized intrinsics. */
36623
36624 static tree
36625 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36626 {
36627 char name[20];
36628 tree fntype, new_fndecl, args;
36629 unsigned arity;
36630 const char *bname;
36631 enum machine_mode el_mode, in_mode;
36632 int n, in_n;
36633
36634 /* The SVML is suitable for unsafe math only. */
36635 if (!flag_unsafe_math_optimizations)
36636 return NULL_TREE;
36637
36638 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36639 n = TYPE_VECTOR_SUBPARTS (type_out);
36640 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36641 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36642 if (el_mode != in_mode
36643 || n != in_n)
36644 return NULL_TREE;
36645
36646 switch (fn)
36647 {
36648 case BUILT_IN_EXP:
36649 case BUILT_IN_LOG:
36650 case BUILT_IN_LOG10:
36651 case BUILT_IN_POW:
36652 case BUILT_IN_TANH:
36653 case BUILT_IN_TAN:
36654 case BUILT_IN_ATAN:
36655 case BUILT_IN_ATAN2:
36656 case BUILT_IN_ATANH:
36657 case BUILT_IN_CBRT:
36658 case BUILT_IN_SINH:
36659 case BUILT_IN_SIN:
36660 case BUILT_IN_ASINH:
36661 case BUILT_IN_ASIN:
36662 case BUILT_IN_COSH:
36663 case BUILT_IN_COS:
36664 case BUILT_IN_ACOSH:
36665 case BUILT_IN_ACOS:
36666 if (el_mode != DFmode || n != 2)
36667 return NULL_TREE;
36668 break;
36669
36670 case BUILT_IN_EXPF:
36671 case BUILT_IN_LOGF:
36672 case BUILT_IN_LOG10F:
36673 case BUILT_IN_POWF:
36674 case BUILT_IN_TANHF:
36675 case BUILT_IN_TANF:
36676 case BUILT_IN_ATANF:
36677 case BUILT_IN_ATAN2F:
36678 case BUILT_IN_ATANHF:
36679 case BUILT_IN_CBRTF:
36680 case BUILT_IN_SINHF:
36681 case BUILT_IN_SINF:
36682 case BUILT_IN_ASINHF:
36683 case BUILT_IN_ASINF:
36684 case BUILT_IN_COSHF:
36685 case BUILT_IN_COSF:
36686 case BUILT_IN_ACOSHF:
36687 case BUILT_IN_ACOSF:
36688 if (el_mode != SFmode || n != 4)
36689 return NULL_TREE;
36690 break;
36691
36692 default:
36693 return NULL_TREE;
36694 }
36695
36696 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36697
36698 if (fn == BUILT_IN_LOGF)
36699 strcpy (name, "vmlsLn4");
36700 else if (fn == BUILT_IN_LOG)
36701 strcpy (name, "vmldLn2");
36702 else if (n == 4)
36703 {
36704 sprintf (name, "vmls%s", bname+10);
36705 name[strlen (name)-1] = '4';
36706 }
36707 else
36708 sprintf (name, "vmld%s2", bname+10);
36709
36710 /* Convert to uppercase. */
36711 name[4] &= ~0x20;
36712
36713 arity = 0;
36714 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36715 args;
36716 args = TREE_CHAIN (args))
36717 arity++;
36718
36719 if (arity == 1)
36720 fntype = build_function_type_list (type_out, type_in, NULL);
36721 else
36722 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36723
36724 /* Build a function declaration for the vectorized function. */
36725 new_fndecl = build_decl (BUILTINS_LOCATION,
36726 FUNCTION_DECL, get_identifier (name), fntype);
36727 TREE_PUBLIC (new_fndecl) = 1;
36728 DECL_EXTERNAL (new_fndecl) = 1;
36729 DECL_IS_NOVOPS (new_fndecl) = 1;
36730 TREE_READONLY (new_fndecl) = 1;
36731
36732 return new_fndecl;
36733 }
36734
36735 /* Handler for an ACML-style interface to
36736 a library with vectorized intrinsics. */
36737
36738 static tree
36739 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36740 {
36741 char name[20] = "__vr.._";
36742 tree fntype, new_fndecl, args;
36743 unsigned arity;
36744 const char *bname;
36745 enum machine_mode el_mode, in_mode;
36746 int n, in_n;
36747
36748 /* The ACML is 64bits only and suitable for unsafe math only as
36749 it does not correctly support parts of IEEE with the required
36750 precision such as denormals. */
36751 if (!TARGET_64BIT
36752 || !flag_unsafe_math_optimizations)
36753 return NULL_TREE;
36754
36755 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36756 n = TYPE_VECTOR_SUBPARTS (type_out);
36757 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36758 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36759 if (el_mode != in_mode
36760 || n != in_n)
36761 return NULL_TREE;
36762
36763 switch (fn)
36764 {
36765 case BUILT_IN_SIN:
36766 case BUILT_IN_COS:
36767 case BUILT_IN_EXP:
36768 case BUILT_IN_LOG:
36769 case BUILT_IN_LOG2:
36770 case BUILT_IN_LOG10:
36771 name[4] = 'd';
36772 name[5] = '2';
36773 if (el_mode != DFmode
36774 || n != 2)
36775 return NULL_TREE;
36776 break;
36777
36778 case BUILT_IN_SINF:
36779 case BUILT_IN_COSF:
36780 case BUILT_IN_EXPF:
36781 case BUILT_IN_POWF:
36782 case BUILT_IN_LOGF:
36783 case BUILT_IN_LOG2F:
36784 case BUILT_IN_LOG10F:
36785 name[4] = 's';
36786 name[5] = '4';
36787 if (el_mode != SFmode
36788 || n != 4)
36789 return NULL_TREE;
36790 break;
36791
36792 default:
36793 return NULL_TREE;
36794 }
36795
36796 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36797 sprintf (name + 7, "%s", bname+10);
36798
36799 arity = 0;
36800 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36801 args;
36802 args = TREE_CHAIN (args))
36803 arity++;
36804
36805 if (arity == 1)
36806 fntype = build_function_type_list (type_out, type_in, NULL);
36807 else
36808 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36809
36810 /* Build a function declaration for the vectorized function. */
36811 new_fndecl = build_decl (BUILTINS_LOCATION,
36812 FUNCTION_DECL, get_identifier (name), fntype);
36813 TREE_PUBLIC (new_fndecl) = 1;
36814 DECL_EXTERNAL (new_fndecl) = 1;
36815 DECL_IS_NOVOPS (new_fndecl) = 1;
36816 TREE_READONLY (new_fndecl) = 1;
36817
36818 return new_fndecl;
36819 }
36820
36821 /* Returns a decl of a function that implements gather load with
36822 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36823 Return NULL_TREE if it is not available. */
36824
36825 static tree
36826 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36827 const_tree index_type, int scale)
36828 {
36829 bool si;
36830 enum ix86_builtins code;
36831
36832 if (! TARGET_AVX2)
36833 return NULL_TREE;
36834
36835 if ((TREE_CODE (index_type) != INTEGER_TYPE
36836 && !POINTER_TYPE_P (index_type))
36837 || (TYPE_MODE (index_type) != SImode
36838 && TYPE_MODE (index_type) != DImode))
36839 return NULL_TREE;
36840
36841 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36842 return NULL_TREE;
36843
36844 /* v*gather* insn sign extends index to pointer mode. */
36845 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36846 && TYPE_UNSIGNED (index_type))
36847 return NULL_TREE;
36848
36849 if (scale <= 0
36850 || scale > 8
36851 || (scale & (scale - 1)) != 0)
36852 return NULL_TREE;
36853
36854 si = TYPE_MODE (index_type) == SImode;
36855 switch (TYPE_MODE (mem_vectype))
36856 {
36857 case V2DFmode:
36858 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36859 break;
36860 case V4DFmode:
36861 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36862 break;
36863 case V2DImode:
36864 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36865 break;
36866 case V4DImode:
36867 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36868 break;
36869 case V4SFmode:
36870 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36871 break;
36872 case V8SFmode:
36873 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36874 break;
36875 case V4SImode:
36876 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36877 break;
36878 case V8SImode:
36879 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36880 break;
36881 case V8DFmode:
36882 if (TARGET_AVX512F)
36883 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36884 else
36885 return NULL_TREE;
36886 break;
36887 case V8DImode:
36888 if (TARGET_AVX512F)
36889 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36890 else
36891 return NULL_TREE;
36892 break;
36893 case V16SFmode:
36894 if (TARGET_AVX512F)
36895 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36896 else
36897 return NULL_TREE;
36898 break;
36899 case V16SImode:
36900 if (TARGET_AVX512F)
36901 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36902 else
36903 return NULL_TREE;
36904 break;
36905 default:
36906 return NULL_TREE;
36907 }
36908
36909 return ix86_get_builtin (code);
36910 }
36911
36912 /* Returns a code for a target-specific builtin that implements
36913 reciprocal of the function, or NULL_TREE if not available. */
36914
36915 static tree
36916 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36917 bool sqrt ATTRIBUTE_UNUSED)
36918 {
36919 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36920 && flag_finite_math_only && !flag_trapping_math
36921 && flag_unsafe_math_optimizations))
36922 return NULL_TREE;
36923
36924 if (md_fn)
36925 /* Machine dependent builtins. */
36926 switch (fn)
36927 {
36928 /* Vectorized version of sqrt to rsqrt conversion. */
36929 case IX86_BUILTIN_SQRTPS_NR:
36930 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36931
36932 case IX86_BUILTIN_SQRTPS_NR256:
36933 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36934
36935 default:
36936 return NULL_TREE;
36937 }
36938 else
36939 /* Normal builtins. */
36940 switch (fn)
36941 {
36942 /* Sqrt to rsqrt conversion. */
36943 case BUILT_IN_SQRTF:
36944 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36945
36946 default:
36947 return NULL_TREE;
36948 }
36949 }
36950 \f
36951 /* Helper for avx_vpermilps256_operand et al. This is also used by
36952 the expansion functions to turn the parallel back into a mask.
36953 The return value is 0 for no match and the imm8+1 for a match. */
36954
36955 int
36956 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36957 {
36958 unsigned i, nelt = GET_MODE_NUNITS (mode);
36959 unsigned mask = 0;
36960 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36961
36962 if (XVECLEN (par, 0) != (int) nelt)
36963 return 0;
36964
36965 /* Validate that all of the elements are constants, and not totally
36966 out of range. Copy the data into an integral array to make the
36967 subsequent checks easier. */
36968 for (i = 0; i < nelt; ++i)
36969 {
36970 rtx er = XVECEXP (par, 0, i);
36971 unsigned HOST_WIDE_INT ei;
36972
36973 if (!CONST_INT_P (er))
36974 return 0;
36975 ei = INTVAL (er);
36976 if (ei >= nelt)
36977 return 0;
36978 ipar[i] = ei;
36979 }
36980
36981 switch (mode)
36982 {
36983 case V8DFmode:
36984 /* In the 512-bit DFmode case, we can only move elements within
36985 a 128-bit lane. First fill the second part of the mask,
36986 then fallthru. */
36987 for (i = 4; i < 6; ++i)
36988 {
36989 if (ipar[i] < 4 || ipar[i] >= 6)
36990 return 0;
36991 mask |= (ipar[i] - 4) << i;
36992 }
36993 for (i = 6; i < 8; ++i)
36994 {
36995 if (ipar[i] < 6)
36996 return 0;
36997 mask |= (ipar[i] - 6) << i;
36998 }
36999 /* FALLTHRU */
37000
37001 case V4DFmode:
37002 /* In the 256-bit DFmode case, we can only move elements within
37003 a 128-bit lane. */
37004 for (i = 0; i < 2; ++i)
37005 {
37006 if (ipar[i] >= 2)
37007 return 0;
37008 mask |= ipar[i] << i;
37009 }
37010 for (i = 2; i < 4; ++i)
37011 {
37012 if (ipar[i] < 2)
37013 return 0;
37014 mask |= (ipar[i] - 2) << i;
37015 }
37016 break;
37017
37018 case V16SFmode:
37019 /* In 512 bit SFmode case, permutation in the upper 256 bits
37020 must mirror the permutation in the lower 256-bits. */
37021 for (i = 0; i < 8; ++i)
37022 if (ipar[i] + 8 != ipar[i + 8])
37023 return 0;
37024 /* FALLTHRU */
37025
37026 case V8SFmode:
37027 /* In 256 bit SFmode case, we have full freedom of
37028 movement within the low 128-bit lane, but the high 128-bit
37029 lane must mirror the exact same pattern. */
37030 for (i = 0; i < 4; ++i)
37031 if (ipar[i] + 4 != ipar[i + 4])
37032 return 0;
37033 nelt = 4;
37034 /* FALLTHRU */
37035
37036 case V2DFmode:
37037 case V4SFmode:
37038 /* In the 128-bit case, we've full freedom in the placement of
37039 the elements from the source operand. */
37040 for (i = 0; i < nelt; ++i)
37041 mask |= ipar[i] << (i * (nelt / 2));
37042 break;
37043
37044 default:
37045 gcc_unreachable ();
37046 }
37047
37048 /* Make sure success has a non-zero value by adding one. */
37049 return mask + 1;
37050 }
37051
37052 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37053 the expansion functions to turn the parallel back into a mask.
37054 The return value is 0 for no match and the imm8+1 for a match. */
37055
37056 int
37057 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37058 {
37059 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37060 unsigned mask = 0;
37061 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37062
37063 if (XVECLEN (par, 0) != (int) nelt)
37064 return 0;
37065
37066 /* Validate that all of the elements are constants, and not totally
37067 out of range. Copy the data into an integral array to make the
37068 subsequent checks easier. */
37069 for (i = 0; i < nelt; ++i)
37070 {
37071 rtx er = XVECEXP (par, 0, i);
37072 unsigned HOST_WIDE_INT ei;
37073
37074 if (!CONST_INT_P (er))
37075 return 0;
37076 ei = INTVAL (er);
37077 if (ei >= 2 * nelt)
37078 return 0;
37079 ipar[i] = ei;
37080 }
37081
37082 /* Validate that the halves of the permute are halves. */
37083 for (i = 0; i < nelt2 - 1; ++i)
37084 if (ipar[i] + 1 != ipar[i + 1])
37085 return 0;
37086 for (i = nelt2; i < nelt - 1; ++i)
37087 if (ipar[i] + 1 != ipar[i + 1])
37088 return 0;
37089
37090 /* Reconstruct the mask. */
37091 for (i = 0; i < 2; ++i)
37092 {
37093 unsigned e = ipar[i * nelt2];
37094 if (e % nelt2)
37095 return 0;
37096 e /= nelt2;
37097 mask |= e << (i * 4);
37098 }
37099
37100 /* Make sure success has a non-zero value by adding one. */
37101 return mask + 1;
37102 }
37103 \f
37104 /* Return a register priority for hard reg REGNO. */
37105 static int
37106 ix86_register_priority (int hard_regno)
37107 {
37108 /* ebp and r13 as the base always wants a displacement, r12 as the
37109 base always wants an index. So discourage their usage in an
37110 address. */
37111 if (hard_regno == R12_REG || hard_regno == R13_REG)
37112 return 0;
37113 if (hard_regno == BP_REG)
37114 return 1;
37115 /* New x86-64 int registers result in bigger code size. Discourage
37116 them. */
37117 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37118 return 2;
37119 /* New x86-64 SSE registers result in bigger code size. Discourage
37120 them. */
37121 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37122 return 2;
37123 /* Usage of AX register results in smaller code. Prefer it. */
37124 if (hard_regno == 0)
37125 return 4;
37126 return 3;
37127 }
37128
37129 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37130
37131 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37132 QImode must go into class Q_REGS.
37133 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37134 movdf to do mem-to-mem moves through integer regs. */
37135
37136 static reg_class_t
37137 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37138 {
37139 enum machine_mode mode = GET_MODE (x);
37140
37141 /* We're only allowed to return a subclass of CLASS. Many of the
37142 following checks fail for NO_REGS, so eliminate that early. */
37143 if (regclass == NO_REGS)
37144 return NO_REGS;
37145
37146 /* All classes can load zeros. */
37147 if (x == CONST0_RTX (mode))
37148 return regclass;
37149
37150 /* Force constants into memory if we are loading a (nonzero) constant into
37151 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37152 instructions to load from a constant. */
37153 if (CONSTANT_P (x)
37154 && (MAYBE_MMX_CLASS_P (regclass)
37155 || MAYBE_SSE_CLASS_P (regclass)
37156 || MAYBE_MASK_CLASS_P (regclass)))
37157 return NO_REGS;
37158
37159 /* Prefer SSE regs only, if we can use them for math. */
37160 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37161 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37162
37163 /* Floating-point constants need more complex checks. */
37164 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37165 {
37166 /* General regs can load everything. */
37167 if (reg_class_subset_p (regclass, GENERAL_REGS))
37168 return regclass;
37169
37170 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37171 zero above. We only want to wind up preferring 80387 registers if
37172 we plan on doing computation with them. */
37173 if (TARGET_80387
37174 && standard_80387_constant_p (x) > 0)
37175 {
37176 /* Limit class to non-sse. */
37177 if (regclass == FLOAT_SSE_REGS)
37178 return FLOAT_REGS;
37179 if (regclass == FP_TOP_SSE_REGS)
37180 return FP_TOP_REG;
37181 if (regclass == FP_SECOND_SSE_REGS)
37182 return FP_SECOND_REG;
37183 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37184 return regclass;
37185 }
37186
37187 return NO_REGS;
37188 }
37189
37190 /* Generally when we see PLUS here, it's the function invariant
37191 (plus soft-fp const_int). Which can only be computed into general
37192 regs. */
37193 if (GET_CODE (x) == PLUS)
37194 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37195
37196 /* QImode constants are easy to load, but non-constant QImode data
37197 must go into Q_REGS. */
37198 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37199 {
37200 if (reg_class_subset_p (regclass, Q_REGS))
37201 return regclass;
37202 if (reg_class_subset_p (Q_REGS, regclass))
37203 return Q_REGS;
37204 return NO_REGS;
37205 }
37206
37207 return regclass;
37208 }
37209
37210 /* Discourage putting floating-point values in SSE registers unless
37211 SSE math is being used, and likewise for the 387 registers. */
37212 static reg_class_t
37213 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37214 {
37215 enum machine_mode mode = GET_MODE (x);
37216
37217 /* Restrict the output reload class to the register bank that we are doing
37218 math on. If we would like not to return a subset of CLASS, reject this
37219 alternative: if reload cannot do this, it will still use its choice. */
37220 mode = GET_MODE (x);
37221 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37222 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37223
37224 if (X87_FLOAT_MODE_P (mode))
37225 {
37226 if (regclass == FP_TOP_SSE_REGS)
37227 return FP_TOP_REG;
37228 else if (regclass == FP_SECOND_SSE_REGS)
37229 return FP_SECOND_REG;
37230 else
37231 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37232 }
37233
37234 return regclass;
37235 }
37236
37237 static reg_class_t
37238 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37239 enum machine_mode mode, secondary_reload_info *sri)
37240 {
37241 /* Double-word spills from general registers to non-offsettable memory
37242 references (zero-extended addresses) require special handling. */
37243 if (TARGET_64BIT
37244 && MEM_P (x)
37245 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37246 && INTEGER_CLASS_P (rclass)
37247 && !offsettable_memref_p (x))
37248 {
37249 sri->icode = (in_p
37250 ? CODE_FOR_reload_noff_load
37251 : CODE_FOR_reload_noff_store);
37252 /* Add the cost of moving address to a temporary. */
37253 sri->extra_cost = 1;
37254
37255 return NO_REGS;
37256 }
37257
37258 /* QImode spills from non-QI registers require
37259 intermediate register on 32bit targets. */
37260 if (mode == QImode
37261 && (MAYBE_MASK_CLASS_P (rclass)
37262 || (!TARGET_64BIT && !in_p
37263 && INTEGER_CLASS_P (rclass)
37264 && MAYBE_NON_Q_CLASS_P (rclass))))
37265 {
37266 int regno;
37267
37268 if (REG_P (x))
37269 regno = REGNO (x);
37270 else
37271 regno = -1;
37272
37273 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37274 regno = true_regnum (x);
37275
37276 /* Return Q_REGS if the operand is in memory. */
37277 if (regno == -1)
37278 return Q_REGS;
37279 }
37280
37281 /* This condition handles corner case where an expression involving
37282 pointers gets vectorized. We're trying to use the address of a
37283 stack slot as a vector initializer.
37284
37285 (set (reg:V2DI 74 [ vect_cst_.2 ])
37286 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37287
37288 Eventually frame gets turned into sp+offset like this:
37289
37290 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37291 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37292 (const_int 392 [0x188]))))
37293
37294 That later gets turned into:
37295
37296 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37297 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37298 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37299
37300 We'll have the following reload recorded:
37301
37302 Reload 0: reload_in (DI) =
37303 (plus:DI (reg/f:DI 7 sp)
37304 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37305 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37306 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37307 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37308 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37309 reload_reg_rtx: (reg:V2DI 22 xmm1)
37310
37311 Which isn't going to work since SSE instructions can't handle scalar
37312 additions. Returning GENERAL_REGS forces the addition into integer
37313 register and reload can handle subsequent reloads without problems. */
37314
37315 if (in_p && GET_CODE (x) == PLUS
37316 && SSE_CLASS_P (rclass)
37317 && SCALAR_INT_MODE_P (mode))
37318 return GENERAL_REGS;
37319
37320 return NO_REGS;
37321 }
37322
37323 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37324
37325 static bool
37326 ix86_class_likely_spilled_p (reg_class_t rclass)
37327 {
37328 switch (rclass)
37329 {
37330 case AREG:
37331 case DREG:
37332 case CREG:
37333 case BREG:
37334 case AD_REGS:
37335 case SIREG:
37336 case DIREG:
37337 case SSE_FIRST_REG:
37338 case FP_TOP_REG:
37339 case FP_SECOND_REG:
37340 return true;
37341
37342 default:
37343 break;
37344 }
37345
37346 return false;
37347 }
37348
37349 /* If we are copying between general and FP registers, we need a memory
37350 location. The same is true for SSE and MMX registers.
37351
37352 To optimize register_move_cost performance, allow inline variant.
37353
37354 The macro can't work reliably when one of the CLASSES is class containing
37355 registers from multiple units (SSE, MMX, integer). We avoid this by never
37356 combining those units in single alternative in the machine description.
37357 Ensure that this constraint holds to avoid unexpected surprises.
37358
37359 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37360 enforce these sanity checks. */
37361
37362 static inline bool
37363 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37364 enum machine_mode mode, int strict)
37365 {
37366 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37367 return false;
37368 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37369 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37370 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37371 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37372 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37373 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37374 {
37375 gcc_assert (!strict || lra_in_progress);
37376 return true;
37377 }
37378
37379 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37380 return true;
37381
37382 /* ??? This is a lie. We do have moves between mmx/general, and for
37383 mmx/sse2. But by saying we need secondary memory we discourage the
37384 register allocator from using the mmx registers unless needed. */
37385 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37386 return true;
37387
37388 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37389 {
37390 /* SSE1 doesn't have any direct moves from other classes. */
37391 if (!TARGET_SSE2)
37392 return true;
37393
37394 /* If the target says that inter-unit moves are more expensive
37395 than moving through memory, then don't generate them. */
37396 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37397 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37398 return true;
37399
37400 /* Between SSE and general, we have moves no larger than word size. */
37401 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37402 return true;
37403 }
37404
37405 return false;
37406 }
37407
37408 bool
37409 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37410 enum machine_mode mode, int strict)
37411 {
37412 return inline_secondary_memory_needed (class1, class2, mode, strict);
37413 }
37414
37415 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37416
37417 On the 80386, this is the size of MODE in words,
37418 except in the FP regs, where a single reg is always enough. */
37419
37420 static unsigned char
37421 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37422 {
37423 if (MAYBE_INTEGER_CLASS_P (rclass))
37424 {
37425 if (mode == XFmode)
37426 return (TARGET_64BIT ? 2 : 3);
37427 else if (mode == XCmode)
37428 return (TARGET_64BIT ? 4 : 6);
37429 else
37430 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37431 }
37432 else
37433 {
37434 if (COMPLEX_MODE_P (mode))
37435 return 2;
37436 else
37437 return 1;
37438 }
37439 }
37440
37441 /* Return true if the registers in CLASS cannot represent the change from
37442 modes FROM to TO. */
37443
37444 bool
37445 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37446 enum reg_class regclass)
37447 {
37448 if (from == to)
37449 return false;
37450
37451 /* x87 registers can't do subreg at all, as all values are reformatted
37452 to extended precision. */
37453 if (MAYBE_FLOAT_CLASS_P (regclass))
37454 return true;
37455
37456 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37457 {
37458 /* Vector registers do not support QI or HImode loads. If we don't
37459 disallow a change to these modes, reload will assume it's ok to
37460 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37461 the vec_dupv4hi pattern. */
37462 if (GET_MODE_SIZE (from) < 4)
37463 return true;
37464
37465 /* Vector registers do not support subreg with nonzero offsets, which
37466 are otherwise valid for integer registers. Since we can't see
37467 whether we have a nonzero offset from here, prohibit all
37468 nonparadoxical subregs changing size. */
37469 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37470 return true;
37471 }
37472
37473 return false;
37474 }
37475
37476 /* Return the cost of moving data of mode M between a
37477 register and memory. A value of 2 is the default; this cost is
37478 relative to those in `REGISTER_MOVE_COST'.
37479
37480 This function is used extensively by register_move_cost that is used to
37481 build tables at startup. Make it inline in this case.
37482 When IN is 2, return maximum of in and out move cost.
37483
37484 If moving between registers and memory is more expensive than
37485 between two registers, you should define this macro to express the
37486 relative cost.
37487
37488 Model also increased moving costs of QImode registers in non
37489 Q_REGS classes.
37490 */
37491 static inline int
37492 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37493 int in)
37494 {
37495 int cost;
37496 if (FLOAT_CLASS_P (regclass))
37497 {
37498 int index;
37499 switch (mode)
37500 {
37501 case SFmode:
37502 index = 0;
37503 break;
37504 case DFmode:
37505 index = 1;
37506 break;
37507 case XFmode:
37508 index = 2;
37509 break;
37510 default:
37511 return 100;
37512 }
37513 if (in == 2)
37514 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37515 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37516 }
37517 if (SSE_CLASS_P (regclass))
37518 {
37519 int index;
37520 switch (GET_MODE_SIZE (mode))
37521 {
37522 case 4:
37523 index = 0;
37524 break;
37525 case 8:
37526 index = 1;
37527 break;
37528 case 16:
37529 index = 2;
37530 break;
37531 default:
37532 return 100;
37533 }
37534 if (in == 2)
37535 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37536 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37537 }
37538 if (MMX_CLASS_P (regclass))
37539 {
37540 int index;
37541 switch (GET_MODE_SIZE (mode))
37542 {
37543 case 4:
37544 index = 0;
37545 break;
37546 case 8:
37547 index = 1;
37548 break;
37549 default:
37550 return 100;
37551 }
37552 if (in)
37553 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37554 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37555 }
37556 switch (GET_MODE_SIZE (mode))
37557 {
37558 case 1:
37559 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37560 {
37561 if (!in)
37562 return ix86_cost->int_store[0];
37563 if (TARGET_PARTIAL_REG_DEPENDENCY
37564 && optimize_function_for_speed_p (cfun))
37565 cost = ix86_cost->movzbl_load;
37566 else
37567 cost = ix86_cost->int_load[0];
37568 if (in == 2)
37569 return MAX (cost, ix86_cost->int_store[0]);
37570 return cost;
37571 }
37572 else
37573 {
37574 if (in == 2)
37575 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37576 if (in)
37577 return ix86_cost->movzbl_load;
37578 else
37579 return ix86_cost->int_store[0] + 4;
37580 }
37581 break;
37582 case 2:
37583 if (in == 2)
37584 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37585 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37586 default:
37587 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37588 if (mode == TFmode)
37589 mode = XFmode;
37590 if (in == 2)
37591 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37592 else if (in)
37593 cost = ix86_cost->int_load[2];
37594 else
37595 cost = ix86_cost->int_store[2];
37596 return (cost * (((int) GET_MODE_SIZE (mode)
37597 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37598 }
37599 }
37600
37601 static int
37602 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37603 bool in)
37604 {
37605 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37606 }
37607
37608
37609 /* Return the cost of moving data from a register in class CLASS1 to
37610 one in class CLASS2.
37611
37612 It is not required that the cost always equal 2 when FROM is the same as TO;
37613 on some machines it is expensive to move between registers if they are not
37614 general registers. */
37615
37616 static int
37617 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37618 reg_class_t class2_i)
37619 {
37620 enum reg_class class1 = (enum reg_class) class1_i;
37621 enum reg_class class2 = (enum reg_class) class2_i;
37622
37623 /* In case we require secondary memory, compute cost of the store followed
37624 by load. In order to avoid bad register allocation choices, we need
37625 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37626
37627 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37628 {
37629 int cost = 1;
37630
37631 cost += inline_memory_move_cost (mode, class1, 2);
37632 cost += inline_memory_move_cost (mode, class2, 2);
37633
37634 /* In case of copying from general_purpose_register we may emit multiple
37635 stores followed by single load causing memory size mismatch stall.
37636 Count this as arbitrarily high cost of 20. */
37637 if (targetm.class_max_nregs (class1, mode)
37638 > targetm.class_max_nregs (class2, mode))
37639 cost += 20;
37640
37641 /* In the case of FP/MMX moves, the registers actually overlap, and we
37642 have to switch modes in order to treat them differently. */
37643 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37644 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37645 cost += 20;
37646
37647 return cost;
37648 }
37649
37650 /* Moves between SSE/MMX and integer unit are expensive. */
37651 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37652 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37653
37654 /* ??? By keeping returned value relatively high, we limit the number
37655 of moves between integer and MMX/SSE registers for all targets.
37656 Additionally, high value prevents problem with x86_modes_tieable_p(),
37657 where integer modes in MMX/SSE registers are not tieable
37658 because of missing QImode and HImode moves to, from or between
37659 MMX/SSE registers. */
37660 return MAX (8, ix86_cost->mmxsse_to_integer);
37661
37662 if (MAYBE_FLOAT_CLASS_P (class1))
37663 return ix86_cost->fp_move;
37664 if (MAYBE_SSE_CLASS_P (class1))
37665 return ix86_cost->sse_move;
37666 if (MAYBE_MMX_CLASS_P (class1))
37667 return ix86_cost->mmx_move;
37668 return 2;
37669 }
37670
37671 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37672 MODE. */
37673
37674 bool
37675 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37676 {
37677 /* Flags and only flags can only hold CCmode values. */
37678 if (CC_REGNO_P (regno))
37679 return GET_MODE_CLASS (mode) == MODE_CC;
37680 if (GET_MODE_CLASS (mode) == MODE_CC
37681 || GET_MODE_CLASS (mode) == MODE_RANDOM
37682 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37683 return false;
37684 if (STACK_REGNO_P (regno))
37685 return VALID_FP_MODE_P (mode);
37686 if (MASK_REGNO_P (regno))
37687 return VALID_MASK_REG_MODE (mode);
37688 if (SSE_REGNO_P (regno))
37689 {
37690 /* We implement the move patterns for all vector modes into and
37691 out of SSE registers, even when no operation instructions
37692 are available. */
37693
37694 /* For AVX-512 we allow, regardless of regno:
37695 - XI mode
37696 - any of 512-bit wide vector mode
37697 - any scalar mode. */
37698 if (TARGET_AVX512F
37699 && (mode == XImode
37700 || VALID_AVX512F_REG_MODE (mode)
37701 || VALID_AVX512F_SCALAR_MODE (mode)))
37702 return true;
37703
37704 /* xmm16-xmm31 are only available for AVX-512. */
37705 if (EXT_REX_SSE_REGNO_P (regno))
37706 return false;
37707
37708 /* OImode and AVX modes are available only when AVX is enabled. */
37709 return ((TARGET_AVX
37710 && VALID_AVX256_REG_OR_OI_MODE (mode))
37711 || VALID_SSE_REG_MODE (mode)
37712 || VALID_SSE2_REG_MODE (mode)
37713 || VALID_MMX_REG_MODE (mode)
37714 || VALID_MMX_REG_MODE_3DNOW (mode));
37715 }
37716 if (MMX_REGNO_P (regno))
37717 {
37718 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37719 so if the register is available at all, then we can move data of
37720 the given mode into or out of it. */
37721 return (VALID_MMX_REG_MODE (mode)
37722 || VALID_MMX_REG_MODE_3DNOW (mode));
37723 }
37724
37725 if (mode == QImode)
37726 {
37727 /* Take care for QImode values - they can be in non-QI regs,
37728 but then they do cause partial register stalls. */
37729 if (ANY_QI_REGNO_P (regno))
37730 return true;
37731 if (!TARGET_PARTIAL_REG_STALL)
37732 return true;
37733 /* LRA checks if the hard register is OK for the given mode.
37734 QImode values can live in non-QI regs, so we allow all
37735 registers here. */
37736 if (lra_in_progress)
37737 return true;
37738 return !can_create_pseudo_p ();
37739 }
37740 /* We handle both integer and floats in the general purpose registers. */
37741 else if (VALID_INT_MODE_P (mode))
37742 return true;
37743 else if (VALID_FP_MODE_P (mode))
37744 return true;
37745 else if (VALID_DFP_MODE_P (mode))
37746 return true;
37747 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37748 on to use that value in smaller contexts, this can easily force a
37749 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37750 supporting DImode, allow it. */
37751 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37752 return true;
37753
37754 return false;
37755 }
37756
37757 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37758 tieable integer mode. */
37759
37760 static bool
37761 ix86_tieable_integer_mode_p (enum machine_mode mode)
37762 {
37763 switch (mode)
37764 {
37765 case HImode:
37766 case SImode:
37767 return true;
37768
37769 case QImode:
37770 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37771
37772 case DImode:
37773 return TARGET_64BIT;
37774
37775 default:
37776 return false;
37777 }
37778 }
37779
37780 /* Return true if MODE1 is accessible in a register that can hold MODE2
37781 without copying. That is, all register classes that can hold MODE2
37782 can also hold MODE1. */
37783
37784 bool
37785 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37786 {
37787 if (mode1 == mode2)
37788 return true;
37789
37790 if (ix86_tieable_integer_mode_p (mode1)
37791 && ix86_tieable_integer_mode_p (mode2))
37792 return true;
37793
37794 /* MODE2 being XFmode implies fp stack or general regs, which means we
37795 can tie any smaller floating point modes to it. Note that we do not
37796 tie this with TFmode. */
37797 if (mode2 == XFmode)
37798 return mode1 == SFmode || mode1 == DFmode;
37799
37800 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37801 that we can tie it with SFmode. */
37802 if (mode2 == DFmode)
37803 return mode1 == SFmode;
37804
37805 /* If MODE2 is only appropriate for an SSE register, then tie with
37806 any other mode acceptable to SSE registers. */
37807 if (GET_MODE_SIZE (mode2) == 32
37808 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37809 return (GET_MODE_SIZE (mode1) == 32
37810 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37811 if (GET_MODE_SIZE (mode2) == 16
37812 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37813 return (GET_MODE_SIZE (mode1) == 16
37814 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37815
37816 /* If MODE2 is appropriate for an MMX register, then tie
37817 with any other mode acceptable to MMX registers. */
37818 if (GET_MODE_SIZE (mode2) == 8
37819 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37820 return (GET_MODE_SIZE (mode1) == 8
37821 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37822
37823 return false;
37824 }
37825
37826 /* Return the cost of moving between two registers of mode MODE. */
37827
37828 static int
37829 ix86_set_reg_reg_cost (enum machine_mode mode)
37830 {
37831 unsigned int units = UNITS_PER_WORD;
37832
37833 switch (GET_MODE_CLASS (mode))
37834 {
37835 default:
37836 break;
37837
37838 case MODE_CC:
37839 units = GET_MODE_SIZE (CCmode);
37840 break;
37841
37842 case MODE_FLOAT:
37843 if ((TARGET_SSE && mode == TFmode)
37844 || (TARGET_80387 && mode == XFmode)
37845 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37846 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37847 units = GET_MODE_SIZE (mode);
37848 break;
37849
37850 case MODE_COMPLEX_FLOAT:
37851 if ((TARGET_SSE && mode == TCmode)
37852 || (TARGET_80387 && mode == XCmode)
37853 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37854 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37855 units = GET_MODE_SIZE (mode);
37856 break;
37857
37858 case MODE_VECTOR_INT:
37859 case MODE_VECTOR_FLOAT:
37860 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37861 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37862 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37863 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37864 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37865 units = GET_MODE_SIZE (mode);
37866 }
37867
37868 /* Return the cost of moving between two registers of mode MODE,
37869 assuming that the move will be in pieces of at most UNITS bytes. */
37870 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37871 }
37872
37873 /* Compute a (partial) cost for rtx X. Return true if the complete
37874 cost has been computed, and false if subexpressions should be
37875 scanned. In either case, *TOTAL contains the cost result. */
37876
37877 static bool
37878 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37879 bool speed)
37880 {
37881 rtx mask;
37882 enum rtx_code code = (enum rtx_code) code_i;
37883 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37884 enum machine_mode mode = GET_MODE (x);
37885 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37886
37887 switch (code)
37888 {
37889 case SET:
37890 if (register_operand (SET_DEST (x), VOIDmode)
37891 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37892 {
37893 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37894 return true;
37895 }
37896 return false;
37897
37898 case CONST_INT:
37899 case CONST:
37900 case LABEL_REF:
37901 case SYMBOL_REF:
37902 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37903 *total = 3;
37904 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37905 *total = 2;
37906 else if (flag_pic && SYMBOLIC_CONST (x)
37907 && !(TARGET_64BIT
37908 && (GET_CODE (x) == LABEL_REF
37909 || (GET_CODE (x) == SYMBOL_REF
37910 && SYMBOL_REF_LOCAL_P (x)))))
37911 *total = 1;
37912 else
37913 *total = 0;
37914 return true;
37915
37916 case CONST_DOUBLE:
37917 if (mode == VOIDmode)
37918 {
37919 *total = 0;
37920 return true;
37921 }
37922 switch (standard_80387_constant_p (x))
37923 {
37924 case 1: /* 0.0 */
37925 *total = 1;
37926 return true;
37927 default: /* Other constants */
37928 *total = 2;
37929 return true;
37930 case 0:
37931 case -1:
37932 break;
37933 }
37934 if (SSE_FLOAT_MODE_P (mode))
37935 {
37936 case CONST_VECTOR:
37937 switch (standard_sse_constant_p (x))
37938 {
37939 case 0:
37940 break;
37941 case 1: /* 0: xor eliminates false dependency */
37942 *total = 0;
37943 return true;
37944 default: /* -1: cmp contains false dependency */
37945 *total = 1;
37946 return true;
37947 }
37948 }
37949 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37950 it'll probably end up. Add a penalty for size. */
37951 *total = (COSTS_N_INSNS (1)
37952 + (flag_pic != 0 && !TARGET_64BIT)
37953 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37954 return true;
37955
37956 case ZERO_EXTEND:
37957 /* The zero extensions is often completely free on x86_64, so make
37958 it as cheap as possible. */
37959 if (TARGET_64BIT && mode == DImode
37960 && GET_MODE (XEXP (x, 0)) == SImode)
37961 *total = 1;
37962 else if (TARGET_ZERO_EXTEND_WITH_AND)
37963 *total = cost->add;
37964 else
37965 *total = cost->movzx;
37966 return false;
37967
37968 case SIGN_EXTEND:
37969 *total = cost->movsx;
37970 return false;
37971
37972 case ASHIFT:
37973 if (SCALAR_INT_MODE_P (mode)
37974 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37975 && CONST_INT_P (XEXP (x, 1)))
37976 {
37977 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37978 if (value == 1)
37979 {
37980 *total = cost->add;
37981 return false;
37982 }
37983 if ((value == 2 || value == 3)
37984 && cost->lea <= cost->shift_const)
37985 {
37986 *total = cost->lea;
37987 return false;
37988 }
37989 }
37990 /* FALLTHRU */
37991
37992 case ROTATE:
37993 case ASHIFTRT:
37994 case LSHIFTRT:
37995 case ROTATERT:
37996 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37997 {
37998 /* ??? Should be SSE vector operation cost. */
37999 /* At least for published AMD latencies, this really is the same
38000 as the latency for a simple fpu operation like fabs. */
38001 /* V*QImode is emulated with 1-11 insns. */
38002 if (mode == V16QImode || mode == V32QImode)
38003 {
38004 int count = 11;
38005 if (TARGET_XOP && mode == V16QImode)
38006 {
38007 /* For XOP we use vpshab, which requires a broadcast of the
38008 value to the variable shift insn. For constants this
38009 means a V16Q const in mem; even when we can perform the
38010 shift with one insn set the cost to prefer paddb. */
38011 if (CONSTANT_P (XEXP (x, 1)))
38012 {
38013 *total = (cost->fabs
38014 + rtx_cost (XEXP (x, 0), code, 0, speed)
38015 + (speed ? 2 : COSTS_N_BYTES (16)));
38016 return true;
38017 }
38018 count = 3;
38019 }
38020 else if (TARGET_SSSE3)
38021 count = 7;
38022 *total = cost->fabs * count;
38023 }
38024 else
38025 *total = cost->fabs;
38026 }
38027 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38028 {
38029 if (CONST_INT_P (XEXP (x, 1)))
38030 {
38031 if (INTVAL (XEXP (x, 1)) > 32)
38032 *total = cost->shift_const + COSTS_N_INSNS (2);
38033 else
38034 *total = cost->shift_const * 2;
38035 }
38036 else
38037 {
38038 if (GET_CODE (XEXP (x, 1)) == AND)
38039 *total = cost->shift_var * 2;
38040 else
38041 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38042 }
38043 }
38044 else
38045 {
38046 if (CONST_INT_P (XEXP (x, 1)))
38047 *total = cost->shift_const;
38048 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38049 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38050 {
38051 /* Return the cost after shift-and truncation. */
38052 *total = cost->shift_var;
38053 return true;
38054 }
38055 else
38056 *total = cost->shift_var;
38057 }
38058 return false;
38059
38060 case FMA:
38061 {
38062 rtx sub;
38063
38064 gcc_assert (FLOAT_MODE_P (mode));
38065 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38066
38067 /* ??? SSE scalar/vector cost should be used here. */
38068 /* ??? Bald assumption that fma has the same cost as fmul. */
38069 *total = cost->fmul;
38070 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38071
38072 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38073 sub = XEXP (x, 0);
38074 if (GET_CODE (sub) == NEG)
38075 sub = XEXP (sub, 0);
38076 *total += rtx_cost (sub, FMA, 0, speed);
38077
38078 sub = XEXP (x, 2);
38079 if (GET_CODE (sub) == NEG)
38080 sub = XEXP (sub, 0);
38081 *total += rtx_cost (sub, FMA, 2, speed);
38082 return true;
38083 }
38084
38085 case MULT:
38086 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38087 {
38088 /* ??? SSE scalar cost should be used here. */
38089 *total = cost->fmul;
38090 return false;
38091 }
38092 else if (X87_FLOAT_MODE_P (mode))
38093 {
38094 *total = cost->fmul;
38095 return false;
38096 }
38097 else if (FLOAT_MODE_P (mode))
38098 {
38099 /* ??? SSE vector cost should be used here. */
38100 *total = cost->fmul;
38101 return false;
38102 }
38103 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38104 {
38105 /* V*QImode is emulated with 7-13 insns. */
38106 if (mode == V16QImode || mode == V32QImode)
38107 {
38108 int extra = 11;
38109 if (TARGET_XOP && mode == V16QImode)
38110 extra = 5;
38111 else if (TARGET_SSSE3)
38112 extra = 6;
38113 *total = cost->fmul * 2 + cost->fabs * extra;
38114 }
38115 /* V*DImode is emulated with 5-8 insns. */
38116 else if (mode == V2DImode || mode == V4DImode)
38117 {
38118 if (TARGET_XOP && mode == V2DImode)
38119 *total = cost->fmul * 2 + cost->fabs * 3;
38120 else
38121 *total = cost->fmul * 3 + cost->fabs * 5;
38122 }
38123 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38124 insns, including two PMULUDQ. */
38125 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38126 *total = cost->fmul * 2 + cost->fabs * 5;
38127 else
38128 *total = cost->fmul;
38129 return false;
38130 }
38131 else
38132 {
38133 rtx op0 = XEXP (x, 0);
38134 rtx op1 = XEXP (x, 1);
38135 int nbits;
38136 if (CONST_INT_P (XEXP (x, 1)))
38137 {
38138 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38139 for (nbits = 0; value != 0; value &= value - 1)
38140 nbits++;
38141 }
38142 else
38143 /* This is arbitrary. */
38144 nbits = 7;
38145
38146 /* Compute costs correctly for widening multiplication. */
38147 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38148 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38149 == GET_MODE_SIZE (mode))
38150 {
38151 int is_mulwiden = 0;
38152 enum machine_mode inner_mode = GET_MODE (op0);
38153
38154 if (GET_CODE (op0) == GET_CODE (op1))
38155 is_mulwiden = 1, op1 = XEXP (op1, 0);
38156 else if (CONST_INT_P (op1))
38157 {
38158 if (GET_CODE (op0) == SIGN_EXTEND)
38159 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38160 == INTVAL (op1);
38161 else
38162 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38163 }
38164
38165 if (is_mulwiden)
38166 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38167 }
38168
38169 *total = (cost->mult_init[MODE_INDEX (mode)]
38170 + nbits * cost->mult_bit
38171 + rtx_cost (op0, outer_code, opno, speed)
38172 + rtx_cost (op1, outer_code, opno, speed));
38173
38174 return true;
38175 }
38176
38177 case DIV:
38178 case UDIV:
38179 case MOD:
38180 case UMOD:
38181 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38182 /* ??? SSE cost should be used here. */
38183 *total = cost->fdiv;
38184 else if (X87_FLOAT_MODE_P (mode))
38185 *total = cost->fdiv;
38186 else if (FLOAT_MODE_P (mode))
38187 /* ??? SSE vector cost should be used here. */
38188 *total = cost->fdiv;
38189 else
38190 *total = cost->divide[MODE_INDEX (mode)];
38191 return false;
38192
38193 case PLUS:
38194 if (GET_MODE_CLASS (mode) == MODE_INT
38195 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38196 {
38197 if (GET_CODE (XEXP (x, 0)) == PLUS
38198 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38199 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38200 && CONSTANT_P (XEXP (x, 1)))
38201 {
38202 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38203 if (val == 2 || val == 4 || val == 8)
38204 {
38205 *total = cost->lea;
38206 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38207 outer_code, opno, speed);
38208 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38209 outer_code, opno, speed);
38210 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38211 return true;
38212 }
38213 }
38214 else if (GET_CODE (XEXP (x, 0)) == MULT
38215 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38216 {
38217 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38218 if (val == 2 || val == 4 || val == 8)
38219 {
38220 *total = cost->lea;
38221 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38222 outer_code, opno, speed);
38223 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38224 return true;
38225 }
38226 }
38227 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38228 {
38229 *total = cost->lea;
38230 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38231 outer_code, opno, speed);
38232 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38233 outer_code, opno, speed);
38234 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38235 return true;
38236 }
38237 }
38238 /* FALLTHRU */
38239
38240 case MINUS:
38241 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38242 {
38243 /* ??? SSE cost should be used here. */
38244 *total = cost->fadd;
38245 return false;
38246 }
38247 else if (X87_FLOAT_MODE_P (mode))
38248 {
38249 *total = cost->fadd;
38250 return false;
38251 }
38252 else if (FLOAT_MODE_P (mode))
38253 {
38254 /* ??? SSE vector cost should be used here. */
38255 *total = cost->fadd;
38256 return false;
38257 }
38258 /* FALLTHRU */
38259
38260 case AND:
38261 case IOR:
38262 case XOR:
38263 if (GET_MODE_CLASS (mode) == MODE_INT
38264 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38265 {
38266 *total = (cost->add * 2
38267 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38268 << (GET_MODE (XEXP (x, 0)) != DImode))
38269 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38270 << (GET_MODE (XEXP (x, 1)) != DImode)));
38271 return true;
38272 }
38273 /* FALLTHRU */
38274
38275 case NEG:
38276 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38277 {
38278 /* ??? SSE cost should be used here. */
38279 *total = cost->fchs;
38280 return false;
38281 }
38282 else if (X87_FLOAT_MODE_P (mode))
38283 {
38284 *total = cost->fchs;
38285 return false;
38286 }
38287 else if (FLOAT_MODE_P (mode))
38288 {
38289 /* ??? SSE vector cost should be used here. */
38290 *total = cost->fchs;
38291 return false;
38292 }
38293 /* FALLTHRU */
38294
38295 case NOT:
38296 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38297 {
38298 /* ??? Should be SSE vector operation cost. */
38299 /* At least for published AMD latencies, this really is the same
38300 as the latency for a simple fpu operation like fabs. */
38301 *total = cost->fabs;
38302 }
38303 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38304 *total = cost->add * 2;
38305 else
38306 *total = cost->add;
38307 return false;
38308
38309 case COMPARE:
38310 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38311 && XEXP (XEXP (x, 0), 1) == const1_rtx
38312 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38313 && XEXP (x, 1) == const0_rtx)
38314 {
38315 /* This kind of construct is implemented using test[bwl].
38316 Treat it as if we had an AND. */
38317 *total = (cost->add
38318 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38319 + rtx_cost (const1_rtx, outer_code, opno, speed));
38320 return true;
38321 }
38322 return false;
38323
38324 case FLOAT_EXTEND:
38325 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38326 *total = 0;
38327 return false;
38328
38329 case ABS:
38330 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38331 /* ??? SSE cost should be used here. */
38332 *total = cost->fabs;
38333 else if (X87_FLOAT_MODE_P (mode))
38334 *total = cost->fabs;
38335 else if (FLOAT_MODE_P (mode))
38336 /* ??? SSE vector cost should be used here. */
38337 *total = cost->fabs;
38338 return false;
38339
38340 case SQRT:
38341 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38342 /* ??? SSE cost should be used here. */
38343 *total = cost->fsqrt;
38344 else if (X87_FLOAT_MODE_P (mode))
38345 *total = cost->fsqrt;
38346 else if (FLOAT_MODE_P (mode))
38347 /* ??? SSE vector cost should be used here. */
38348 *total = cost->fsqrt;
38349 return false;
38350
38351 case UNSPEC:
38352 if (XINT (x, 1) == UNSPEC_TP)
38353 *total = 0;
38354 return false;
38355
38356 case VEC_SELECT:
38357 case VEC_CONCAT:
38358 case VEC_DUPLICATE:
38359 /* ??? Assume all of these vector manipulation patterns are
38360 recognizable. In which case they all pretty much have the
38361 same cost. */
38362 *total = cost->fabs;
38363 return true;
38364 case VEC_MERGE:
38365 mask = XEXP (x, 2);
38366 /* This is masked instruction, assume the same cost,
38367 as nonmasked variant. */
38368 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38369 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38370 else
38371 *total = cost->fabs;
38372 return true;
38373
38374 default:
38375 return false;
38376 }
38377 }
38378
38379 #if TARGET_MACHO
38380
38381 static int current_machopic_label_num;
38382
38383 /* Given a symbol name and its associated stub, write out the
38384 definition of the stub. */
38385
38386 void
38387 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38388 {
38389 unsigned int length;
38390 char *binder_name, *symbol_name, lazy_ptr_name[32];
38391 int label = ++current_machopic_label_num;
38392
38393 /* For 64-bit we shouldn't get here. */
38394 gcc_assert (!TARGET_64BIT);
38395
38396 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38397 symb = targetm.strip_name_encoding (symb);
38398
38399 length = strlen (stub);
38400 binder_name = XALLOCAVEC (char, length + 32);
38401 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38402
38403 length = strlen (symb);
38404 symbol_name = XALLOCAVEC (char, length + 32);
38405 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38406
38407 sprintf (lazy_ptr_name, "L%d$lz", label);
38408
38409 if (MACHOPIC_ATT_STUB)
38410 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38411 else if (MACHOPIC_PURE)
38412 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38413 else
38414 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38415
38416 fprintf (file, "%s:\n", stub);
38417 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38418
38419 if (MACHOPIC_ATT_STUB)
38420 {
38421 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38422 }
38423 else if (MACHOPIC_PURE)
38424 {
38425 /* PIC stub. */
38426 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38427 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38428 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38429 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38430 label, lazy_ptr_name, label);
38431 fprintf (file, "\tjmp\t*%%ecx\n");
38432 }
38433 else
38434 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38435
38436 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38437 it needs no stub-binding-helper. */
38438 if (MACHOPIC_ATT_STUB)
38439 return;
38440
38441 fprintf (file, "%s:\n", binder_name);
38442
38443 if (MACHOPIC_PURE)
38444 {
38445 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38446 fprintf (file, "\tpushl\t%%ecx\n");
38447 }
38448 else
38449 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38450
38451 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38452
38453 /* N.B. Keep the correspondence of these
38454 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38455 old-pic/new-pic/non-pic stubs; altering this will break
38456 compatibility with existing dylibs. */
38457 if (MACHOPIC_PURE)
38458 {
38459 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38460 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38461 }
38462 else
38463 /* 16-byte -mdynamic-no-pic stub. */
38464 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38465
38466 fprintf (file, "%s:\n", lazy_ptr_name);
38467 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38468 fprintf (file, ASM_LONG "%s\n", binder_name);
38469 }
38470 #endif /* TARGET_MACHO */
38471
38472 /* Order the registers for register allocator. */
38473
38474 void
38475 x86_order_regs_for_local_alloc (void)
38476 {
38477 int pos = 0;
38478 int i;
38479
38480 /* First allocate the local general purpose registers. */
38481 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38482 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38483 reg_alloc_order [pos++] = i;
38484
38485 /* Global general purpose registers. */
38486 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38487 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38488 reg_alloc_order [pos++] = i;
38489
38490 /* x87 registers come first in case we are doing FP math
38491 using them. */
38492 if (!TARGET_SSE_MATH)
38493 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38494 reg_alloc_order [pos++] = i;
38495
38496 /* SSE registers. */
38497 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38498 reg_alloc_order [pos++] = i;
38499 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38500 reg_alloc_order [pos++] = i;
38501
38502 /* Extended REX SSE registers. */
38503 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38504 reg_alloc_order [pos++] = i;
38505
38506 /* Mask register. */
38507 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38508 reg_alloc_order [pos++] = i;
38509
38510 /* x87 registers. */
38511 if (TARGET_SSE_MATH)
38512 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38513 reg_alloc_order [pos++] = i;
38514
38515 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38516 reg_alloc_order [pos++] = i;
38517
38518 /* Initialize the rest of array as we do not allocate some registers
38519 at all. */
38520 while (pos < FIRST_PSEUDO_REGISTER)
38521 reg_alloc_order [pos++] = 0;
38522 }
38523
38524 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38525 in struct attribute_spec handler. */
38526 static tree
38527 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38528 tree args,
38529 int flags ATTRIBUTE_UNUSED,
38530 bool *no_add_attrs)
38531 {
38532 if (TREE_CODE (*node) != FUNCTION_TYPE
38533 && TREE_CODE (*node) != METHOD_TYPE
38534 && TREE_CODE (*node) != FIELD_DECL
38535 && TREE_CODE (*node) != TYPE_DECL)
38536 {
38537 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38538 name);
38539 *no_add_attrs = true;
38540 return NULL_TREE;
38541 }
38542 if (TARGET_64BIT)
38543 {
38544 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38545 name);
38546 *no_add_attrs = true;
38547 return NULL_TREE;
38548 }
38549 if (is_attribute_p ("callee_pop_aggregate_return", name))
38550 {
38551 tree cst;
38552
38553 cst = TREE_VALUE (args);
38554 if (TREE_CODE (cst) != INTEGER_CST)
38555 {
38556 warning (OPT_Wattributes,
38557 "%qE attribute requires an integer constant argument",
38558 name);
38559 *no_add_attrs = true;
38560 }
38561 else if (compare_tree_int (cst, 0) != 0
38562 && compare_tree_int (cst, 1) != 0)
38563 {
38564 warning (OPT_Wattributes,
38565 "argument to %qE attribute is neither zero, nor one",
38566 name);
38567 *no_add_attrs = true;
38568 }
38569
38570 return NULL_TREE;
38571 }
38572
38573 return NULL_TREE;
38574 }
38575
38576 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38577 struct attribute_spec.handler. */
38578 static tree
38579 ix86_handle_abi_attribute (tree *node, tree name,
38580 tree args ATTRIBUTE_UNUSED,
38581 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38582 {
38583 if (TREE_CODE (*node) != FUNCTION_TYPE
38584 && TREE_CODE (*node) != METHOD_TYPE
38585 && TREE_CODE (*node) != FIELD_DECL
38586 && TREE_CODE (*node) != TYPE_DECL)
38587 {
38588 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38589 name);
38590 *no_add_attrs = true;
38591 return NULL_TREE;
38592 }
38593
38594 /* Can combine regparm with all attributes but fastcall. */
38595 if (is_attribute_p ("ms_abi", name))
38596 {
38597 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38598 {
38599 error ("ms_abi and sysv_abi attributes are not compatible");
38600 }
38601
38602 return NULL_TREE;
38603 }
38604 else if (is_attribute_p ("sysv_abi", name))
38605 {
38606 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38607 {
38608 error ("ms_abi and sysv_abi attributes are not compatible");
38609 }
38610
38611 return NULL_TREE;
38612 }
38613
38614 return NULL_TREE;
38615 }
38616
38617 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38618 struct attribute_spec.handler. */
38619 static tree
38620 ix86_handle_struct_attribute (tree *node, tree name,
38621 tree args ATTRIBUTE_UNUSED,
38622 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38623 {
38624 tree *type = NULL;
38625 if (DECL_P (*node))
38626 {
38627 if (TREE_CODE (*node) == TYPE_DECL)
38628 type = &TREE_TYPE (*node);
38629 }
38630 else
38631 type = node;
38632
38633 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38634 {
38635 warning (OPT_Wattributes, "%qE attribute ignored",
38636 name);
38637 *no_add_attrs = true;
38638 }
38639
38640 else if ((is_attribute_p ("ms_struct", name)
38641 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38642 || ((is_attribute_p ("gcc_struct", name)
38643 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38644 {
38645 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38646 name);
38647 *no_add_attrs = true;
38648 }
38649
38650 return NULL_TREE;
38651 }
38652
38653 static tree
38654 ix86_handle_fndecl_attribute (tree *node, tree name,
38655 tree args ATTRIBUTE_UNUSED,
38656 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38657 {
38658 if (TREE_CODE (*node) != FUNCTION_DECL)
38659 {
38660 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38661 name);
38662 *no_add_attrs = true;
38663 }
38664 return NULL_TREE;
38665 }
38666
38667 static bool
38668 ix86_ms_bitfield_layout_p (const_tree record_type)
38669 {
38670 return ((TARGET_MS_BITFIELD_LAYOUT
38671 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38672 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38673 }
38674
38675 /* Returns an expression indicating where the this parameter is
38676 located on entry to the FUNCTION. */
38677
38678 static rtx
38679 x86_this_parameter (tree function)
38680 {
38681 tree type = TREE_TYPE (function);
38682 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38683 int nregs;
38684
38685 if (TARGET_64BIT)
38686 {
38687 const int *parm_regs;
38688
38689 if (ix86_function_type_abi (type) == MS_ABI)
38690 parm_regs = x86_64_ms_abi_int_parameter_registers;
38691 else
38692 parm_regs = x86_64_int_parameter_registers;
38693 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38694 }
38695
38696 nregs = ix86_function_regparm (type, function);
38697
38698 if (nregs > 0 && !stdarg_p (type))
38699 {
38700 int regno;
38701 unsigned int ccvt = ix86_get_callcvt (type);
38702
38703 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38704 regno = aggr ? DX_REG : CX_REG;
38705 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38706 {
38707 regno = CX_REG;
38708 if (aggr)
38709 return gen_rtx_MEM (SImode,
38710 plus_constant (Pmode, stack_pointer_rtx, 4));
38711 }
38712 else
38713 {
38714 regno = AX_REG;
38715 if (aggr)
38716 {
38717 regno = DX_REG;
38718 if (nregs == 1)
38719 return gen_rtx_MEM (SImode,
38720 plus_constant (Pmode,
38721 stack_pointer_rtx, 4));
38722 }
38723 }
38724 return gen_rtx_REG (SImode, regno);
38725 }
38726
38727 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38728 aggr ? 8 : 4));
38729 }
38730
38731 /* Determine whether x86_output_mi_thunk can succeed. */
38732
38733 static bool
38734 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38735 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38736 HOST_WIDE_INT vcall_offset, const_tree function)
38737 {
38738 /* 64-bit can handle anything. */
38739 if (TARGET_64BIT)
38740 return true;
38741
38742 /* For 32-bit, everything's fine if we have one free register. */
38743 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38744 return true;
38745
38746 /* Need a free register for vcall_offset. */
38747 if (vcall_offset)
38748 return false;
38749
38750 /* Need a free register for GOT references. */
38751 if (flag_pic && !targetm.binds_local_p (function))
38752 return false;
38753
38754 /* Otherwise ok. */
38755 return true;
38756 }
38757
38758 /* Output the assembler code for a thunk function. THUNK_DECL is the
38759 declaration for the thunk function itself, FUNCTION is the decl for
38760 the target function. DELTA is an immediate constant offset to be
38761 added to THIS. If VCALL_OFFSET is nonzero, the word at
38762 *(*this + vcall_offset) should be added to THIS. */
38763
38764 static void
38765 x86_output_mi_thunk (FILE *file,
38766 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38767 HOST_WIDE_INT vcall_offset, tree function)
38768 {
38769 rtx this_param = x86_this_parameter (function);
38770 rtx this_reg, tmp, fnaddr;
38771 unsigned int tmp_regno;
38772
38773 if (TARGET_64BIT)
38774 tmp_regno = R10_REG;
38775 else
38776 {
38777 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38778 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38779 tmp_regno = AX_REG;
38780 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38781 tmp_regno = DX_REG;
38782 else
38783 tmp_regno = CX_REG;
38784 }
38785
38786 emit_note (NOTE_INSN_PROLOGUE_END);
38787
38788 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38789 pull it in now and let DELTA benefit. */
38790 if (REG_P (this_param))
38791 this_reg = this_param;
38792 else if (vcall_offset)
38793 {
38794 /* Put the this parameter into %eax. */
38795 this_reg = gen_rtx_REG (Pmode, AX_REG);
38796 emit_move_insn (this_reg, this_param);
38797 }
38798 else
38799 this_reg = NULL_RTX;
38800
38801 /* Adjust the this parameter by a fixed constant. */
38802 if (delta)
38803 {
38804 rtx delta_rtx = GEN_INT (delta);
38805 rtx delta_dst = this_reg ? this_reg : this_param;
38806
38807 if (TARGET_64BIT)
38808 {
38809 if (!x86_64_general_operand (delta_rtx, Pmode))
38810 {
38811 tmp = gen_rtx_REG (Pmode, tmp_regno);
38812 emit_move_insn (tmp, delta_rtx);
38813 delta_rtx = tmp;
38814 }
38815 }
38816
38817 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38818 }
38819
38820 /* Adjust the this parameter by a value stored in the vtable. */
38821 if (vcall_offset)
38822 {
38823 rtx vcall_addr, vcall_mem, this_mem;
38824
38825 tmp = gen_rtx_REG (Pmode, tmp_regno);
38826
38827 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38828 if (Pmode != ptr_mode)
38829 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38830 emit_move_insn (tmp, this_mem);
38831
38832 /* Adjust the this parameter. */
38833 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38834 if (TARGET_64BIT
38835 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38836 {
38837 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38838 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38839 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38840 }
38841
38842 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38843 if (Pmode != ptr_mode)
38844 emit_insn (gen_addsi_1_zext (this_reg,
38845 gen_rtx_REG (ptr_mode,
38846 REGNO (this_reg)),
38847 vcall_mem));
38848 else
38849 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38850 }
38851
38852 /* If necessary, drop THIS back to its stack slot. */
38853 if (this_reg && this_reg != this_param)
38854 emit_move_insn (this_param, this_reg);
38855
38856 fnaddr = XEXP (DECL_RTL (function), 0);
38857 if (TARGET_64BIT)
38858 {
38859 if (!flag_pic || targetm.binds_local_p (function)
38860 || TARGET_PECOFF)
38861 ;
38862 else
38863 {
38864 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38865 tmp = gen_rtx_CONST (Pmode, tmp);
38866 fnaddr = gen_const_mem (Pmode, tmp);
38867 }
38868 }
38869 else
38870 {
38871 if (!flag_pic || targetm.binds_local_p (function))
38872 ;
38873 #if TARGET_MACHO
38874 else if (TARGET_MACHO)
38875 {
38876 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38877 fnaddr = XEXP (fnaddr, 0);
38878 }
38879 #endif /* TARGET_MACHO */
38880 else
38881 {
38882 tmp = gen_rtx_REG (Pmode, CX_REG);
38883 output_set_got (tmp, NULL_RTX);
38884
38885 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38886 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38887 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38888 fnaddr = gen_const_mem (Pmode, fnaddr);
38889 }
38890 }
38891
38892 /* Our sibling call patterns do not allow memories, because we have no
38893 predicate that can distinguish between frame and non-frame memory.
38894 For our purposes here, we can get away with (ab)using a jump pattern,
38895 because we're going to do no optimization. */
38896 if (MEM_P (fnaddr))
38897 {
38898 if (sibcall_insn_operand (fnaddr, word_mode))
38899 {
38900 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38901 tmp = emit_call_insn (tmp);
38902 SIBLING_CALL_P (tmp) = 1;
38903 }
38904 else
38905 emit_jump_insn (gen_indirect_jump (fnaddr));
38906 }
38907 else
38908 {
38909 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38910 fnaddr = legitimize_pic_address (fnaddr,
38911 gen_rtx_REG (Pmode, tmp_regno));
38912
38913 if (!sibcall_insn_operand (fnaddr, word_mode))
38914 {
38915 tmp = gen_rtx_REG (word_mode, tmp_regno);
38916 if (GET_MODE (fnaddr) != word_mode)
38917 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38918 emit_move_insn (tmp, fnaddr);
38919 fnaddr = tmp;
38920 }
38921
38922 tmp = gen_rtx_MEM (QImode, fnaddr);
38923 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38924 tmp = emit_call_insn (tmp);
38925 SIBLING_CALL_P (tmp) = 1;
38926 }
38927 emit_barrier ();
38928
38929 /* Emit just enough of rest_of_compilation to get the insns emitted.
38930 Note that use_thunk calls assemble_start_function et al. */
38931 tmp = get_insns ();
38932 shorten_branches (tmp);
38933 final_start_function (tmp, file, 1);
38934 final (tmp, file, 1);
38935 final_end_function ();
38936 }
38937
38938 static void
38939 x86_file_start (void)
38940 {
38941 default_file_start ();
38942 if (TARGET_16BIT)
38943 fputs ("\t.code16gcc\n", asm_out_file);
38944 #if TARGET_MACHO
38945 darwin_file_start ();
38946 #endif
38947 if (X86_FILE_START_VERSION_DIRECTIVE)
38948 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38949 if (X86_FILE_START_FLTUSED)
38950 fputs ("\t.global\t__fltused\n", asm_out_file);
38951 if (ix86_asm_dialect == ASM_INTEL)
38952 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38953 }
38954
38955 int
38956 x86_field_alignment (tree field, int computed)
38957 {
38958 enum machine_mode mode;
38959 tree type = TREE_TYPE (field);
38960
38961 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38962 return computed;
38963 mode = TYPE_MODE (strip_array_types (type));
38964 if (mode == DFmode || mode == DCmode
38965 || GET_MODE_CLASS (mode) == MODE_INT
38966 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38967 return MIN (32, computed);
38968 return computed;
38969 }
38970
38971 /* Output assembler code to FILE to increment profiler label # LABELNO
38972 for profiling a function entry. */
38973 void
38974 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38975 {
38976 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38977 : MCOUNT_NAME);
38978
38979 if (TARGET_64BIT)
38980 {
38981 #ifndef NO_PROFILE_COUNTERS
38982 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38983 #endif
38984
38985 if (!TARGET_PECOFF && flag_pic)
38986 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38987 else
38988 fprintf (file, "\tcall\t%s\n", mcount_name);
38989 }
38990 else if (flag_pic)
38991 {
38992 #ifndef NO_PROFILE_COUNTERS
38993 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38994 LPREFIX, labelno);
38995 #endif
38996 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38997 }
38998 else
38999 {
39000 #ifndef NO_PROFILE_COUNTERS
39001 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39002 LPREFIX, labelno);
39003 #endif
39004 fprintf (file, "\tcall\t%s\n", mcount_name);
39005 }
39006 }
39007
39008 /* We don't have exact information about the insn sizes, but we may assume
39009 quite safely that we are informed about all 1 byte insns and memory
39010 address sizes. This is enough to eliminate unnecessary padding in
39011 99% of cases. */
39012
39013 static int
39014 min_insn_size (rtx insn)
39015 {
39016 int l = 0, len;
39017
39018 if (!INSN_P (insn) || !active_insn_p (insn))
39019 return 0;
39020
39021 /* Discard alignments we've emit and jump instructions. */
39022 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39023 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39024 return 0;
39025
39026 /* Important case - calls are always 5 bytes.
39027 It is common to have many calls in the row. */
39028 if (CALL_P (insn)
39029 && symbolic_reference_mentioned_p (PATTERN (insn))
39030 && !SIBLING_CALL_P (insn))
39031 return 5;
39032 len = get_attr_length (insn);
39033 if (len <= 1)
39034 return 1;
39035
39036 /* For normal instructions we rely on get_attr_length being exact,
39037 with a few exceptions. */
39038 if (!JUMP_P (insn))
39039 {
39040 enum attr_type type = get_attr_type (insn);
39041
39042 switch (type)
39043 {
39044 case TYPE_MULTI:
39045 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39046 || asm_noperands (PATTERN (insn)) >= 0)
39047 return 0;
39048 break;
39049 case TYPE_OTHER:
39050 case TYPE_FCMP:
39051 break;
39052 default:
39053 /* Otherwise trust get_attr_length. */
39054 return len;
39055 }
39056
39057 l = get_attr_length_address (insn);
39058 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39059 l = 4;
39060 }
39061 if (l)
39062 return 1+l;
39063 else
39064 return 2;
39065 }
39066
39067 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39068
39069 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39070 window. */
39071
39072 static void
39073 ix86_avoid_jump_mispredicts (void)
39074 {
39075 rtx insn, start = get_insns ();
39076 int nbytes = 0, njumps = 0;
39077 int isjump = 0;
39078
39079 /* Look for all minimal intervals of instructions containing 4 jumps.
39080 The intervals are bounded by START and INSN. NBYTES is the total
39081 size of instructions in the interval including INSN and not including
39082 START. When the NBYTES is smaller than 16 bytes, it is possible
39083 that the end of START and INSN ends up in the same 16byte page.
39084
39085 The smallest offset in the page INSN can start is the case where START
39086 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39087 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39088
39089 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39090 have to, control transfer to label(s) can be performed through other
39091 means, and also we estimate minimum length of all asm stmts as 0. */
39092 for (insn = start; insn; insn = NEXT_INSN (insn))
39093 {
39094 int min_size;
39095
39096 if (LABEL_P (insn))
39097 {
39098 int align = label_to_alignment (insn);
39099 int max_skip = label_to_max_skip (insn);
39100
39101 if (max_skip > 15)
39102 max_skip = 15;
39103 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39104 already in the current 16 byte page, because otherwise
39105 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39106 bytes to reach 16 byte boundary. */
39107 if (align <= 0
39108 || (align <= 3 && max_skip != (1 << align) - 1))
39109 max_skip = 0;
39110 if (dump_file)
39111 fprintf (dump_file, "Label %i with max_skip %i\n",
39112 INSN_UID (insn), max_skip);
39113 if (max_skip)
39114 {
39115 while (nbytes + max_skip >= 16)
39116 {
39117 start = NEXT_INSN (start);
39118 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39119 || CALL_P (start))
39120 njumps--, isjump = 1;
39121 else
39122 isjump = 0;
39123 nbytes -= min_insn_size (start);
39124 }
39125 }
39126 continue;
39127 }
39128
39129 min_size = min_insn_size (insn);
39130 nbytes += min_size;
39131 if (dump_file)
39132 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39133 INSN_UID (insn), min_size);
39134 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39135 || CALL_P (insn))
39136 njumps++;
39137 else
39138 continue;
39139
39140 while (njumps > 3)
39141 {
39142 start = NEXT_INSN (start);
39143 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39144 || CALL_P (start))
39145 njumps--, isjump = 1;
39146 else
39147 isjump = 0;
39148 nbytes -= min_insn_size (start);
39149 }
39150 gcc_assert (njumps >= 0);
39151 if (dump_file)
39152 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39153 INSN_UID (start), INSN_UID (insn), nbytes);
39154
39155 if (njumps == 3 && isjump && nbytes < 16)
39156 {
39157 int padsize = 15 - nbytes + min_insn_size (insn);
39158
39159 if (dump_file)
39160 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39161 INSN_UID (insn), padsize);
39162 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39163 }
39164 }
39165 }
39166 #endif
39167
39168 /* AMD Athlon works faster
39169 when RET is not destination of conditional jump or directly preceded
39170 by other jump instruction. We avoid the penalty by inserting NOP just
39171 before the RET instructions in such cases. */
39172 static void
39173 ix86_pad_returns (void)
39174 {
39175 edge e;
39176 edge_iterator ei;
39177
39178 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39179 {
39180 basic_block bb = e->src;
39181 rtx ret = BB_END (bb);
39182 rtx prev;
39183 bool replace = false;
39184
39185 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39186 || optimize_bb_for_size_p (bb))
39187 continue;
39188 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39189 if (active_insn_p (prev) || LABEL_P (prev))
39190 break;
39191 if (prev && LABEL_P (prev))
39192 {
39193 edge e;
39194 edge_iterator ei;
39195
39196 FOR_EACH_EDGE (e, ei, bb->preds)
39197 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39198 && !(e->flags & EDGE_FALLTHRU))
39199 {
39200 replace = true;
39201 break;
39202 }
39203 }
39204 if (!replace)
39205 {
39206 prev = prev_active_insn (ret);
39207 if (prev
39208 && ((JUMP_P (prev) && any_condjump_p (prev))
39209 || CALL_P (prev)))
39210 replace = true;
39211 /* Empty functions get branch mispredict even when
39212 the jump destination is not visible to us. */
39213 if (!prev && !optimize_function_for_size_p (cfun))
39214 replace = true;
39215 }
39216 if (replace)
39217 {
39218 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39219 delete_insn (ret);
39220 }
39221 }
39222 }
39223
39224 /* Count the minimum number of instructions in BB. Return 4 if the
39225 number of instructions >= 4. */
39226
39227 static int
39228 ix86_count_insn_bb (basic_block bb)
39229 {
39230 rtx insn;
39231 int insn_count = 0;
39232
39233 /* Count number of instructions in this block. Return 4 if the number
39234 of instructions >= 4. */
39235 FOR_BB_INSNS (bb, insn)
39236 {
39237 /* Only happen in exit blocks. */
39238 if (JUMP_P (insn)
39239 && ANY_RETURN_P (PATTERN (insn)))
39240 break;
39241
39242 if (NONDEBUG_INSN_P (insn)
39243 && GET_CODE (PATTERN (insn)) != USE
39244 && GET_CODE (PATTERN (insn)) != CLOBBER)
39245 {
39246 insn_count++;
39247 if (insn_count >= 4)
39248 return insn_count;
39249 }
39250 }
39251
39252 return insn_count;
39253 }
39254
39255
39256 /* Count the minimum number of instructions in code path in BB.
39257 Return 4 if the number of instructions >= 4. */
39258
39259 static int
39260 ix86_count_insn (basic_block bb)
39261 {
39262 edge e;
39263 edge_iterator ei;
39264 int min_prev_count;
39265
39266 /* Only bother counting instructions along paths with no
39267 more than 2 basic blocks between entry and exit. Given
39268 that BB has an edge to exit, determine if a predecessor
39269 of BB has an edge from entry. If so, compute the number
39270 of instructions in the predecessor block. If there
39271 happen to be multiple such blocks, compute the minimum. */
39272 min_prev_count = 4;
39273 FOR_EACH_EDGE (e, ei, bb->preds)
39274 {
39275 edge prev_e;
39276 edge_iterator prev_ei;
39277
39278 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39279 {
39280 min_prev_count = 0;
39281 break;
39282 }
39283 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39284 {
39285 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39286 {
39287 int count = ix86_count_insn_bb (e->src);
39288 if (count < min_prev_count)
39289 min_prev_count = count;
39290 break;
39291 }
39292 }
39293 }
39294
39295 if (min_prev_count < 4)
39296 min_prev_count += ix86_count_insn_bb (bb);
39297
39298 return min_prev_count;
39299 }
39300
39301 /* Pad short function to 4 instructions. */
39302
39303 static void
39304 ix86_pad_short_function (void)
39305 {
39306 edge e;
39307 edge_iterator ei;
39308
39309 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39310 {
39311 rtx ret = BB_END (e->src);
39312 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39313 {
39314 int insn_count = ix86_count_insn (e->src);
39315
39316 /* Pad short function. */
39317 if (insn_count < 4)
39318 {
39319 rtx insn = ret;
39320
39321 /* Find epilogue. */
39322 while (insn
39323 && (!NOTE_P (insn)
39324 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39325 insn = PREV_INSN (insn);
39326
39327 if (!insn)
39328 insn = ret;
39329
39330 /* Two NOPs count as one instruction. */
39331 insn_count = 2 * (4 - insn_count);
39332 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39333 }
39334 }
39335 }
39336 }
39337
39338 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39339 the epilogue, the Windows system unwinder will apply epilogue logic and
39340 produce incorrect offsets. This can be avoided by adding a nop between
39341 the last insn that can throw and the first insn of the epilogue. */
39342
39343 static void
39344 ix86_seh_fixup_eh_fallthru (void)
39345 {
39346 edge e;
39347 edge_iterator ei;
39348
39349 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39350 {
39351 rtx insn, next;
39352
39353 /* Find the beginning of the epilogue. */
39354 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39355 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39356 break;
39357 if (insn == NULL)
39358 continue;
39359
39360 /* We only care about preceding insns that can throw. */
39361 insn = prev_active_insn (insn);
39362 if (insn == NULL || !can_throw_internal (insn))
39363 continue;
39364
39365 /* Do not separate calls from their debug information. */
39366 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39367 if (NOTE_P (next)
39368 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39369 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39370 insn = next;
39371 else
39372 break;
39373
39374 emit_insn_after (gen_nops (const1_rtx), insn);
39375 }
39376 }
39377
39378 /* Implement machine specific optimizations. We implement padding of returns
39379 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39380 static void
39381 ix86_reorg (void)
39382 {
39383 /* We are freeing block_for_insn in the toplev to keep compatibility
39384 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39385 compute_bb_for_insn ();
39386
39387 if (TARGET_SEH && current_function_has_exception_handlers ())
39388 ix86_seh_fixup_eh_fallthru ();
39389
39390 if (optimize && optimize_function_for_speed_p (cfun))
39391 {
39392 if (TARGET_PAD_SHORT_FUNCTION)
39393 ix86_pad_short_function ();
39394 else if (TARGET_PAD_RETURNS)
39395 ix86_pad_returns ();
39396 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39397 if (TARGET_FOUR_JUMP_LIMIT)
39398 ix86_avoid_jump_mispredicts ();
39399 #endif
39400 }
39401 }
39402
39403 /* Return nonzero when QImode register that must be represented via REX prefix
39404 is used. */
39405 bool
39406 x86_extended_QIreg_mentioned_p (rtx insn)
39407 {
39408 int i;
39409 extract_insn_cached (insn);
39410 for (i = 0; i < recog_data.n_operands; i++)
39411 if (GENERAL_REG_P (recog_data.operand[i])
39412 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39413 return true;
39414 return false;
39415 }
39416
39417 /* Return nonzero when P points to register encoded via REX prefix.
39418 Called via for_each_rtx. */
39419 static int
39420 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39421 {
39422 unsigned int regno;
39423 if (!REG_P (*p))
39424 return 0;
39425 regno = REGNO (*p);
39426 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39427 }
39428
39429 /* Return true when INSN mentions register that must be encoded using REX
39430 prefix. */
39431 bool
39432 x86_extended_reg_mentioned_p (rtx insn)
39433 {
39434 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39435 extended_reg_mentioned_1, NULL);
39436 }
39437
39438 /* If profitable, negate (without causing overflow) integer constant
39439 of mode MODE at location LOC. Return true in this case. */
39440 bool
39441 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39442 {
39443 HOST_WIDE_INT val;
39444
39445 if (!CONST_INT_P (*loc))
39446 return false;
39447
39448 switch (mode)
39449 {
39450 case DImode:
39451 /* DImode x86_64 constants must fit in 32 bits. */
39452 gcc_assert (x86_64_immediate_operand (*loc, mode));
39453
39454 mode = SImode;
39455 break;
39456
39457 case SImode:
39458 case HImode:
39459 case QImode:
39460 break;
39461
39462 default:
39463 gcc_unreachable ();
39464 }
39465
39466 /* Avoid overflows. */
39467 if (mode_signbit_p (mode, *loc))
39468 return false;
39469
39470 val = INTVAL (*loc);
39471
39472 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39473 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39474 if ((val < 0 && val != -128)
39475 || val == 128)
39476 {
39477 *loc = GEN_INT (-val);
39478 return true;
39479 }
39480
39481 return false;
39482 }
39483
39484 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39485 optabs would emit if we didn't have TFmode patterns. */
39486
39487 void
39488 x86_emit_floatuns (rtx operands[2])
39489 {
39490 rtx neglab, donelab, i0, i1, f0, in, out;
39491 enum machine_mode mode, inmode;
39492
39493 inmode = GET_MODE (operands[1]);
39494 gcc_assert (inmode == SImode || inmode == DImode);
39495
39496 out = operands[0];
39497 in = force_reg (inmode, operands[1]);
39498 mode = GET_MODE (out);
39499 neglab = gen_label_rtx ();
39500 donelab = gen_label_rtx ();
39501 f0 = gen_reg_rtx (mode);
39502
39503 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39504
39505 expand_float (out, in, 0);
39506
39507 emit_jump_insn (gen_jump (donelab));
39508 emit_barrier ();
39509
39510 emit_label (neglab);
39511
39512 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39513 1, OPTAB_DIRECT);
39514 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39515 1, OPTAB_DIRECT);
39516 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39517
39518 expand_float (f0, i0, 0);
39519
39520 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39521
39522 emit_label (donelab);
39523 }
39524 \f
39525 /* AVX512F does support 64-byte integer vector operations,
39526 thus the longest vector we are faced with is V64QImode. */
39527 #define MAX_VECT_LEN 64
39528
39529 struct expand_vec_perm_d
39530 {
39531 rtx target, op0, op1;
39532 unsigned char perm[MAX_VECT_LEN];
39533 enum machine_mode vmode;
39534 unsigned char nelt;
39535 bool one_operand_p;
39536 bool testing_p;
39537 };
39538
39539 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39540 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39541 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39542
39543 /* Get a vector mode of the same size as the original but with elements
39544 twice as wide. This is only guaranteed to apply to integral vectors. */
39545
39546 static inline enum machine_mode
39547 get_mode_wider_vector (enum machine_mode o)
39548 {
39549 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39550 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39551 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39552 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39553 return n;
39554 }
39555
39556 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39557 fill target with val via vec_duplicate. */
39558
39559 static bool
39560 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39561 {
39562 bool ok;
39563 rtx insn, dup;
39564
39565 /* First attempt to recognize VAL as-is. */
39566 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39567 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39568 if (recog_memoized (insn) < 0)
39569 {
39570 rtx seq;
39571 /* If that fails, force VAL into a register. */
39572
39573 start_sequence ();
39574 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39575 seq = get_insns ();
39576 end_sequence ();
39577 if (seq)
39578 emit_insn_before (seq, insn);
39579
39580 ok = recog_memoized (insn) >= 0;
39581 gcc_assert (ok);
39582 }
39583 return true;
39584 }
39585
39586 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39587 with all elements equal to VAR. Return true if successful. */
39588
39589 static bool
39590 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39591 rtx target, rtx val)
39592 {
39593 bool ok;
39594
39595 switch (mode)
39596 {
39597 case V2SImode:
39598 case V2SFmode:
39599 if (!mmx_ok)
39600 return false;
39601 /* FALLTHRU */
39602
39603 case V4DFmode:
39604 case V4DImode:
39605 case V8SFmode:
39606 case V8SImode:
39607 case V2DFmode:
39608 case V2DImode:
39609 case V4SFmode:
39610 case V4SImode:
39611 case V16SImode:
39612 case V8DImode:
39613 case V16SFmode:
39614 case V8DFmode:
39615 return ix86_vector_duplicate_value (mode, target, val);
39616
39617 case V4HImode:
39618 if (!mmx_ok)
39619 return false;
39620 if (TARGET_SSE || TARGET_3DNOW_A)
39621 {
39622 rtx x;
39623
39624 val = gen_lowpart (SImode, val);
39625 x = gen_rtx_TRUNCATE (HImode, val);
39626 x = gen_rtx_VEC_DUPLICATE (mode, x);
39627 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39628 return true;
39629 }
39630 goto widen;
39631
39632 case V8QImode:
39633 if (!mmx_ok)
39634 return false;
39635 goto widen;
39636
39637 case V8HImode:
39638 if (TARGET_SSE2)
39639 {
39640 struct expand_vec_perm_d dperm;
39641 rtx tmp1, tmp2;
39642
39643 permute:
39644 memset (&dperm, 0, sizeof (dperm));
39645 dperm.target = target;
39646 dperm.vmode = mode;
39647 dperm.nelt = GET_MODE_NUNITS (mode);
39648 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39649 dperm.one_operand_p = true;
39650
39651 /* Extend to SImode using a paradoxical SUBREG. */
39652 tmp1 = gen_reg_rtx (SImode);
39653 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39654
39655 /* Insert the SImode value as low element of a V4SImode vector. */
39656 tmp2 = gen_reg_rtx (V4SImode);
39657 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39658 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39659
39660 ok = (expand_vec_perm_1 (&dperm)
39661 || expand_vec_perm_broadcast_1 (&dperm));
39662 gcc_assert (ok);
39663 return ok;
39664 }
39665 goto widen;
39666
39667 case V16QImode:
39668 if (TARGET_SSE2)
39669 goto permute;
39670 goto widen;
39671
39672 widen:
39673 /* Replicate the value once into the next wider mode and recurse. */
39674 {
39675 enum machine_mode smode, wsmode, wvmode;
39676 rtx x;
39677
39678 smode = GET_MODE_INNER (mode);
39679 wvmode = get_mode_wider_vector (mode);
39680 wsmode = GET_MODE_INNER (wvmode);
39681
39682 val = convert_modes (wsmode, smode, val, true);
39683 x = expand_simple_binop (wsmode, ASHIFT, val,
39684 GEN_INT (GET_MODE_BITSIZE (smode)),
39685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39686 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39687
39688 x = gen_reg_rtx (wvmode);
39689 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39690 gcc_assert (ok);
39691 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39692 return ok;
39693 }
39694
39695 case V16HImode:
39696 case V32QImode:
39697 {
39698 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39699 rtx x = gen_reg_rtx (hvmode);
39700
39701 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39702 gcc_assert (ok);
39703
39704 x = gen_rtx_VEC_CONCAT (mode, x, x);
39705 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39706 }
39707 return true;
39708
39709 default:
39710 return false;
39711 }
39712 }
39713
39714 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39715 whose ONE_VAR element is VAR, and other elements are zero. Return true
39716 if successful. */
39717
39718 static bool
39719 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39720 rtx target, rtx var, int one_var)
39721 {
39722 enum machine_mode vsimode;
39723 rtx new_target;
39724 rtx x, tmp;
39725 bool use_vector_set = false;
39726
39727 switch (mode)
39728 {
39729 case V2DImode:
39730 /* For SSE4.1, we normally use vector set. But if the second
39731 element is zero and inter-unit moves are OK, we use movq
39732 instead. */
39733 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39734 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39735 && one_var == 0));
39736 break;
39737 case V16QImode:
39738 case V4SImode:
39739 case V4SFmode:
39740 use_vector_set = TARGET_SSE4_1;
39741 break;
39742 case V8HImode:
39743 use_vector_set = TARGET_SSE2;
39744 break;
39745 case V4HImode:
39746 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39747 break;
39748 case V32QImode:
39749 case V16HImode:
39750 case V8SImode:
39751 case V8SFmode:
39752 case V4DFmode:
39753 use_vector_set = TARGET_AVX;
39754 break;
39755 case V4DImode:
39756 /* Use ix86_expand_vector_set in 64bit mode only. */
39757 use_vector_set = TARGET_AVX && TARGET_64BIT;
39758 break;
39759 default:
39760 break;
39761 }
39762
39763 if (use_vector_set)
39764 {
39765 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39766 var = force_reg (GET_MODE_INNER (mode), var);
39767 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39768 return true;
39769 }
39770
39771 switch (mode)
39772 {
39773 case V2SFmode:
39774 case V2SImode:
39775 if (!mmx_ok)
39776 return false;
39777 /* FALLTHRU */
39778
39779 case V2DFmode:
39780 case V2DImode:
39781 if (one_var != 0)
39782 return false;
39783 var = force_reg (GET_MODE_INNER (mode), var);
39784 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39785 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39786 return true;
39787
39788 case V4SFmode:
39789 case V4SImode:
39790 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39791 new_target = gen_reg_rtx (mode);
39792 else
39793 new_target = target;
39794 var = force_reg (GET_MODE_INNER (mode), var);
39795 x = gen_rtx_VEC_DUPLICATE (mode, var);
39796 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39797 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39798 if (one_var != 0)
39799 {
39800 /* We need to shuffle the value to the correct position, so
39801 create a new pseudo to store the intermediate result. */
39802
39803 /* With SSE2, we can use the integer shuffle insns. */
39804 if (mode != V4SFmode && TARGET_SSE2)
39805 {
39806 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39807 const1_rtx,
39808 GEN_INT (one_var == 1 ? 0 : 1),
39809 GEN_INT (one_var == 2 ? 0 : 1),
39810 GEN_INT (one_var == 3 ? 0 : 1)));
39811 if (target != new_target)
39812 emit_move_insn (target, new_target);
39813 return true;
39814 }
39815
39816 /* Otherwise convert the intermediate result to V4SFmode and
39817 use the SSE1 shuffle instructions. */
39818 if (mode != V4SFmode)
39819 {
39820 tmp = gen_reg_rtx (V4SFmode);
39821 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39822 }
39823 else
39824 tmp = new_target;
39825
39826 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39827 const1_rtx,
39828 GEN_INT (one_var == 1 ? 0 : 1),
39829 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39830 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39831
39832 if (mode != V4SFmode)
39833 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39834 else if (tmp != target)
39835 emit_move_insn (target, tmp);
39836 }
39837 else if (target != new_target)
39838 emit_move_insn (target, new_target);
39839 return true;
39840
39841 case V8HImode:
39842 case V16QImode:
39843 vsimode = V4SImode;
39844 goto widen;
39845 case V4HImode:
39846 case V8QImode:
39847 if (!mmx_ok)
39848 return false;
39849 vsimode = V2SImode;
39850 goto widen;
39851 widen:
39852 if (one_var != 0)
39853 return false;
39854
39855 /* Zero extend the variable element to SImode and recurse. */
39856 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39857
39858 x = gen_reg_rtx (vsimode);
39859 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39860 var, one_var))
39861 gcc_unreachable ();
39862
39863 emit_move_insn (target, gen_lowpart (mode, x));
39864 return true;
39865
39866 default:
39867 return false;
39868 }
39869 }
39870
39871 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39872 consisting of the values in VALS. It is known that all elements
39873 except ONE_VAR are constants. Return true if successful. */
39874
39875 static bool
39876 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39877 rtx target, rtx vals, int one_var)
39878 {
39879 rtx var = XVECEXP (vals, 0, one_var);
39880 enum machine_mode wmode;
39881 rtx const_vec, x;
39882
39883 const_vec = copy_rtx (vals);
39884 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39885 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39886
39887 switch (mode)
39888 {
39889 case V2DFmode:
39890 case V2DImode:
39891 case V2SFmode:
39892 case V2SImode:
39893 /* For the two element vectors, it's just as easy to use
39894 the general case. */
39895 return false;
39896
39897 case V4DImode:
39898 /* Use ix86_expand_vector_set in 64bit mode only. */
39899 if (!TARGET_64BIT)
39900 return false;
39901 case V4DFmode:
39902 case V8SFmode:
39903 case V8SImode:
39904 case V16HImode:
39905 case V32QImode:
39906 case V4SFmode:
39907 case V4SImode:
39908 case V8HImode:
39909 case V4HImode:
39910 break;
39911
39912 case V16QImode:
39913 if (TARGET_SSE4_1)
39914 break;
39915 wmode = V8HImode;
39916 goto widen;
39917 case V8QImode:
39918 wmode = V4HImode;
39919 goto widen;
39920 widen:
39921 /* There's no way to set one QImode entry easily. Combine
39922 the variable value with its adjacent constant value, and
39923 promote to an HImode set. */
39924 x = XVECEXP (vals, 0, one_var ^ 1);
39925 if (one_var & 1)
39926 {
39927 var = convert_modes (HImode, QImode, var, true);
39928 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39929 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39930 x = GEN_INT (INTVAL (x) & 0xff);
39931 }
39932 else
39933 {
39934 var = convert_modes (HImode, QImode, var, true);
39935 x = gen_int_mode (INTVAL (x) << 8, HImode);
39936 }
39937 if (x != const0_rtx)
39938 var = expand_simple_binop (HImode, IOR, var, x, var,
39939 1, OPTAB_LIB_WIDEN);
39940
39941 x = gen_reg_rtx (wmode);
39942 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39943 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39944
39945 emit_move_insn (target, gen_lowpart (mode, x));
39946 return true;
39947
39948 default:
39949 return false;
39950 }
39951
39952 emit_move_insn (target, const_vec);
39953 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39954 return true;
39955 }
39956
39957 /* A subroutine of ix86_expand_vector_init_general. Use vector
39958 concatenate to handle the most general case: all values variable,
39959 and none identical. */
39960
39961 static void
39962 ix86_expand_vector_init_concat (enum machine_mode mode,
39963 rtx target, rtx *ops, int n)
39964 {
39965 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39966 rtx first[16], second[8], third[4];
39967 rtvec v;
39968 int i, j;
39969
39970 switch (n)
39971 {
39972 case 2:
39973 switch (mode)
39974 {
39975 case V16SImode:
39976 cmode = V8SImode;
39977 break;
39978 case V16SFmode:
39979 cmode = V8SFmode;
39980 break;
39981 case V8DImode:
39982 cmode = V4DImode;
39983 break;
39984 case V8DFmode:
39985 cmode = V4DFmode;
39986 break;
39987 case V8SImode:
39988 cmode = V4SImode;
39989 break;
39990 case V8SFmode:
39991 cmode = V4SFmode;
39992 break;
39993 case V4DImode:
39994 cmode = V2DImode;
39995 break;
39996 case V4DFmode:
39997 cmode = V2DFmode;
39998 break;
39999 case V4SImode:
40000 cmode = V2SImode;
40001 break;
40002 case V4SFmode:
40003 cmode = V2SFmode;
40004 break;
40005 case V2DImode:
40006 cmode = DImode;
40007 break;
40008 case V2SImode:
40009 cmode = SImode;
40010 break;
40011 case V2DFmode:
40012 cmode = DFmode;
40013 break;
40014 case V2SFmode:
40015 cmode = SFmode;
40016 break;
40017 default:
40018 gcc_unreachable ();
40019 }
40020
40021 if (!register_operand (ops[1], cmode))
40022 ops[1] = force_reg (cmode, ops[1]);
40023 if (!register_operand (ops[0], cmode))
40024 ops[0] = force_reg (cmode, ops[0]);
40025 emit_insn (gen_rtx_SET (VOIDmode, target,
40026 gen_rtx_VEC_CONCAT (mode, ops[0],
40027 ops[1])));
40028 break;
40029
40030 case 4:
40031 switch (mode)
40032 {
40033 case V4DImode:
40034 cmode = V2DImode;
40035 break;
40036 case V4DFmode:
40037 cmode = V2DFmode;
40038 break;
40039 case V4SImode:
40040 cmode = V2SImode;
40041 break;
40042 case V4SFmode:
40043 cmode = V2SFmode;
40044 break;
40045 default:
40046 gcc_unreachable ();
40047 }
40048 goto half;
40049
40050 case 8:
40051 switch (mode)
40052 {
40053 case V8DImode:
40054 cmode = V2DImode;
40055 hmode = V4DImode;
40056 break;
40057 case V8DFmode:
40058 cmode = V2DFmode;
40059 hmode = V4DFmode;
40060 break;
40061 case V8SImode:
40062 cmode = V2SImode;
40063 hmode = V4SImode;
40064 break;
40065 case V8SFmode:
40066 cmode = V2SFmode;
40067 hmode = V4SFmode;
40068 break;
40069 default:
40070 gcc_unreachable ();
40071 }
40072 goto half;
40073
40074 case 16:
40075 switch (mode)
40076 {
40077 case V16SImode:
40078 cmode = V2SImode;
40079 hmode = V4SImode;
40080 gmode = V8SImode;
40081 break;
40082 case V16SFmode:
40083 cmode = V2SFmode;
40084 hmode = V4SFmode;
40085 gmode = V8SFmode;
40086 break;
40087 default:
40088 gcc_unreachable ();
40089 }
40090 goto half;
40091
40092 half:
40093 /* FIXME: We process inputs backward to help RA. PR 36222. */
40094 i = n - 1;
40095 j = (n >> 1) - 1;
40096 for (; i > 0; i -= 2, j--)
40097 {
40098 first[j] = gen_reg_rtx (cmode);
40099 v = gen_rtvec (2, ops[i - 1], ops[i]);
40100 ix86_expand_vector_init (false, first[j],
40101 gen_rtx_PARALLEL (cmode, v));
40102 }
40103
40104 n >>= 1;
40105 if (n > 4)
40106 {
40107 gcc_assert (hmode != VOIDmode);
40108 gcc_assert (gmode != VOIDmode);
40109 for (i = j = 0; i < n; i += 2, j++)
40110 {
40111 second[j] = gen_reg_rtx (hmode);
40112 ix86_expand_vector_init_concat (hmode, second [j],
40113 &first [i], 2);
40114 }
40115 n >>= 1;
40116 for (i = j = 0; i < n; i += 2, j++)
40117 {
40118 third[j] = gen_reg_rtx (gmode);
40119 ix86_expand_vector_init_concat (gmode, third[j],
40120 &second[i], 2);
40121 }
40122 n >>= 1;
40123 ix86_expand_vector_init_concat (mode, target, third, n);
40124 }
40125 else if (n > 2)
40126 {
40127 gcc_assert (hmode != VOIDmode);
40128 for (i = j = 0; i < n; i += 2, j++)
40129 {
40130 second[j] = gen_reg_rtx (hmode);
40131 ix86_expand_vector_init_concat (hmode, second [j],
40132 &first [i], 2);
40133 }
40134 n >>= 1;
40135 ix86_expand_vector_init_concat (mode, target, second, n);
40136 }
40137 else
40138 ix86_expand_vector_init_concat (mode, target, first, n);
40139 break;
40140
40141 default:
40142 gcc_unreachable ();
40143 }
40144 }
40145
40146 /* A subroutine of ix86_expand_vector_init_general. Use vector
40147 interleave to handle the most general case: all values variable,
40148 and none identical. */
40149
40150 static void
40151 ix86_expand_vector_init_interleave (enum machine_mode mode,
40152 rtx target, rtx *ops, int n)
40153 {
40154 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40155 int i, j;
40156 rtx op0, op1;
40157 rtx (*gen_load_even) (rtx, rtx, rtx);
40158 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40159 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40160
40161 switch (mode)
40162 {
40163 case V8HImode:
40164 gen_load_even = gen_vec_setv8hi;
40165 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40166 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40167 inner_mode = HImode;
40168 first_imode = V4SImode;
40169 second_imode = V2DImode;
40170 third_imode = VOIDmode;
40171 break;
40172 case V16QImode:
40173 gen_load_even = gen_vec_setv16qi;
40174 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40175 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40176 inner_mode = QImode;
40177 first_imode = V8HImode;
40178 second_imode = V4SImode;
40179 third_imode = V2DImode;
40180 break;
40181 default:
40182 gcc_unreachable ();
40183 }
40184
40185 for (i = 0; i < n; i++)
40186 {
40187 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40188 op0 = gen_reg_rtx (SImode);
40189 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40190
40191 /* Insert the SImode value as low element of V4SImode vector. */
40192 op1 = gen_reg_rtx (V4SImode);
40193 op0 = gen_rtx_VEC_MERGE (V4SImode,
40194 gen_rtx_VEC_DUPLICATE (V4SImode,
40195 op0),
40196 CONST0_RTX (V4SImode),
40197 const1_rtx);
40198 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40199
40200 /* Cast the V4SImode vector back to a vector in orignal mode. */
40201 op0 = gen_reg_rtx (mode);
40202 emit_move_insn (op0, gen_lowpart (mode, op1));
40203
40204 /* Load even elements into the second position. */
40205 emit_insn (gen_load_even (op0,
40206 force_reg (inner_mode,
40207 ops [i + i + 1]),
40208 const1_rtx));
40209
40210 /* Cast vector to FIRST_IMODE vector. */
40211 ops[i] = gen_reg_rtx (first_imode);
40212 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40213 }
40214
40215 /* Interleave low FIRST_IMODE vectors. */
40216 for (i = j = 0; i < n; i += 2, j++)
40217 {
40218 op0 = gen_reg_rtx (first_imode);
40219 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40220
40221 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40222 ops[j] = gen_reg_rtx (second_imode);
40223 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40224 }
40225
40226 /* Interleave low SECOND_IMODE vectors. */
40227 switch (second_imode)
40228 {
40229 case V4SImode:
40230 for (i = j = 0; i < n / 2; i += 2, j++)
40231 {
40232 op0 = gen_reg_rtx (second_imode);
40233 emit_insn (gen_interleave_second_low (op0, ops[i],
40234 ops[i + 1]));
40235
40236 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40237 vector. */
40238 ops[j] = gen_reg_rtx (third_imode);
40239 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40240 }
40241 second_imode = V2DImode;
40242 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40243 /* FALLTHRU */
40244
40245 case V2DImode:
40246 op0 = gen_reg_rtx (second_imode);
40247 emit_insn (gen_interleave_second_low (op0, ops[0],
40248 ops[1]));
40249
40250 /* Cast the SECOND_IMODE vector back to a vector on original
40251 mode. */
40252 emit_insn (gen_rtx_SET (VOIDmode, target,
40253 gen_lowpart (mode, op0)));
40254 break;
40255
40256 default:
40257 gcc_unreachable ();
40258 }
40259 }
40260
40261 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40262 all values variable, and none identical. */
40263
40264 static void
40265 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40266 rtx target, rtx vals)
40267 {
40268 rtx ops[64], op0, op1;
40269 enum machine_mode half_mode = VOIDmode;
40270 int n, i;
40271
40272 switch (mode)
40273 {
40274 case V2SFmode:
40275 case V2SImode:
40276 if (!mmx_ok && !TARGET_SSE)
40277 break;
40278 /* FALLTHRU */
40279
40280 case V16SImode:
40281 case V16SFmode:
40282 case V8DFmode:
40283 case V8DImode:
40284 case V8SFmode:
40285 case V8SImode:
40286 case V4DFmode:
40287 case V4DImode:
40288 case V4SFmode:
40289 case V4SImode:
40290 case V2DFmode:
40291 case V2DImode:
40292 n = GET_MODE_NUNITS (mode);
40293 for (i = 0; i < n; i++)
40294 ops[i] = XVECEXP (vals, 0, i);
40295 ix86_expand_vector_init_concat (mode, target, ops, n);
40296 return;
40297
40298 case V32QImode:
40299 half_mode = V16QImode;
40300 goto half;
40301
40302 case V16HImode:
40303 half_mode = V8HImode;
40304 goto half;
40305
40306 half:
40307 n = GET_MODE_NUNITS (mode);
40308 for (i = 0; i < n; i++)
40309 ops[i] = XVECEXP (vals, 0, i);
40310 op0 = gen_reg_rtx (half_mode);
40311 op1 = gen_reg_rtx (half_mode);
40312 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40313 n >> 2);
40314 ix86_expand_vector_init_interleave (half_mode, op1,
40315 &ops [n >> 1], n >> 2);
40316 emit_insn (gen_rtx_SET (VOIDmode, target,
40317 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40318 return;
40319
40320 case V16QImode:
40321 if (!TARGET_SSE4_1)
40322 break;
40323 /* FALLTHRU */
40324
40325 case V8HImode:
40326 if (!TARGET_SSE2)
40327 break;
40328
40329 /* Don't use ix86_expand_vector_init_interleave if we can't
40330 move from GPR to SSE register directly. */
40331 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40332 break;
40333
40334 n = GET_MODE_NUNITS (mode);
40335 for (i = 0; i < n; i++)
40336 ops[i] = XVECEXP (vals, 0, i);
40337 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40338 return;
40339
40340 case V4HImode:
40341 case V8QImode:
40342 break;
40343
40344 default:
40345 gcc_unreachable ();
40346 }
40347
40348 {
40349 int i, j, n_elts, n_words, n_elt_per_word;
40350 enum machine_mode inner_mode;
40351 rtx words[4], shift;
40352
40353 inner_mode = GET_MODE_INNER (mode);
40354 n_elts = GET_MODE_NUNITS (mode);
40355 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40356 n_elt_per_word = n_elts / n_words;
40357 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40358
40359 for (i = 0; i < n_words; ++i)
40360 {
40361 rtx word = NULL_RTX;
40362
40363 for (j = 0; j < n_elt_per_word; ++j)
40364 {
40365 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40366 elt = convert_modes (word_mode, inner_mode, elt, true);
40367
40368 if (j == 0)
40369 word = elt;
40370 else
40371 {
40372 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40373 word, 1, OPTAB_LIB_WIDEN);
40374 word = expand_simple_binop (word_mode, IOR, word, elt,
40375 word, 1, OPTAB_LIB_WIDEN);
40376 }
40377 }
40378
40379 words[i] = word;
40380 }
40381
40382 if (n_words == 1)
40383 emit_move_insn (target, gen_lowpart (mode, words[0]));
40384 else if (n_words == 2)
40385 {
40386 rtx tmp = gen_reg_rtx (mode);
40387 emit_clobber (tmp);
40388 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40389 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40390 emit_move_insn (target, tmp);
40391 }
40392 else if (n_words == 4)
40393 {
40394 rtx tmp = gen_reg_rtx (V4SImode);
40395 gcc_assert (word_mode == SImode);
40396 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40397 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40398 emit_move_insn (target, gen_lowpart (mode, tmp));
40399 }
40400 else
40401 gcc_unreachable ();
40402 }
40403 }
40404
40405 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40406 instructions unless MMX_OK is true. */
40407
40408 void
40409 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40410 {
40411 enum machine_mode mode = GET_MODE (target);
40412 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40413 int n_elts = GET_MODE_NUNITS (mode);
40414 int n_var = 0, one_var = -1;
40415 bool all_same = true, all_const_zero = true;
40416 int i;
40417 rtx x;
40418
40419 for (i = 0; i < n_elts; ++i)
40420 {
40421 x = XVECEXP (vals, 0, i);
40422 if (!(CONST_INT_P (x)
40423 || GET_CODE (x) == CONST_DOUBLE
40424 || GET_CODE (x) == CONST_FIXED))
40425 n_var++, one_var = i;
40426 else if (x != CONST0_RTX (inner_mode))
40427 all_const_zero = false;
40428 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40429 all_same = false;
40430 }
40431
40432 /* Constants are best loaded from the constant pool. */
40433 if (n_var == 0)
40434 {
40435 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40436 return;
40437 }
40438
40439 /* If all values are identical, broadcast the value. */
40440 if (all_same
40441 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40442 XVECEXP (vals, 0, 0)))
40443 return;
40444
40445 /* Values where only one field is non-constant are best loaded from
40446 the pool and overwritten via move later. */
40447 if (n_var == 1)
40448 {
40449 if (all_const_zero
40450 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40451 XVECEXP (vals, 0, one_var),
40452 one_var))
40453 return;
40454
40455 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40456 return;
40457 }
40458
40459 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40460 }
40461
40462 void
40463 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40464 {
40465 enum machine_mode mode = GET_MODE (target);
40466 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40467 enum machine_mode half_mode;
40468 bool use_vec_merge = false;
40469 rtx tmp;
40470 static rtx (*gen_extract[6][2]) (rtx, rtx)
40471 = {
40472 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40473 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40474 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40475 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40476 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40477 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40478 };
40479 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40480 = {
40481 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40482 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40483 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40484 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40485 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40486 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40487 };
40488 int i, j, n;
40489
40490 switch (mode)
40491 {
40492 case V2SFmode:
40493 case V2SImode:
40494 if (mmx_ok)
40495 {
40496 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40497 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40498 if (elt == 0)
40499 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40500 else
40501 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40502 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40503 return;
40504 }
40505 break;
40506
40507 case V2DImode:
40508 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40509 if (use_vec_merge)
40510 break;
40511
40512 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40513 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40514 if (elt == 0)
40515 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40516 else
40517 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40518 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40519 return;
40520
40521 case V2DFmode:
40522 {
40523 rtx op0, op1;
40524
40525 /* For the two element vectors, we implement a VEC_CONCAT with
40526 the extraction of the other element. */
40527
40528 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40529 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40530
40531 if (elt == 0)
40532 op0 = val, op1 = tmp;
40533 else
40534 op0 = tmp, op1 = val;
40535
40536 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40537 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40538 }
40539 return;
40540
40541 case V4SFmode:
40542 use_vec_merge = TARGET_SSE4_1;
40543 if (use_vec_merge)
40544 break;
40545
40546 switch (elt)
40547 {
40548 case 0:
40549 use_vec_merge = true;
40550 break;
40551
40552 case 1:
40553 /* tmp = target = A B C D */
40554 tmp = copy_to_reg (target);
40555 /* target = A A B B */
40556 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40557 /* target = X A B B */
40558 ix86_expand_vector_set (false, target, val, 0);
40559 /* target = A X C D */
40560 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40561 const1_rtx, const0_rtx,
40562 GEN_INT (2+4), GEN_INT (3+4)));
40563 return;
40564
40565 case 2:
40566 /* tmp = target = A B C D */
40567 tmp = copy_to_reg (target);
40568 /* tmp = X B C D */
40569 ix86_expand_vector_set (false, tmp, val, 0);
40570 /* target = A B X D */
40571 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40572 const0_rtx, const1_rtx,
40573 GEN_INT (0+4), GEN_INT (3+4)));
40574 return;
40575
40576 case 3:
40577 /* tmp = target = A B C D */
40578 tmp = copy_to_reg (target);
40579 /* tmp = X B C D */
40580 ix86_expand_vector_set (false, tmp, val, 0);
40581 /* target = A B X D */
40582 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40583 const0_rtx, const1_rtx,
40584 GEN_INT (2+4), GEN_INT (0+4)));
40585 return;
40586
40587 default:
40588 gcc_unreachable ();
40589 }
40590 break;
40591
40592 case V4SImode:
40593 use_vec_merge = TARGET_SSE4_1;
40594 if (use_vec_merge)
40595 break;
40596
40597 /* Element 0 handled by vec_merge below. */
40598 if (elt == 0)
40599 {
40600 use_vec_merge = true;
40601 break;
40602 }
40603
40604 if (TARGET_SSE2)
40605 {
40606 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40607 store into element 0, then shuffle them back. */
40608
40609 rtx order[4];
40610
40611 order[0] = GEN_INT (elt);
40612 order[1] = const1_rtx;
40613 order[2] = const2_rtx;
40614 order[3] = GEN_INT (3);
40615 order[elt] = const0_rtx;
40616
40617 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40618 order[1], order[2], order[3]));
40619
40620 ix86_expand_vector_set (false, target, val, 0);
40621
40622 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40623 order[1], order[2], order[3]));
40624 }
40625 else
40626 {
40627 /* For SSE1, we have to reuse the V4SF code. */
40628 rtx t = gen_reg_rtx (V4SFmode);
40629 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40630 emit_move_insn (target, gen_lowpart (mode, t));
40631 }
40632 return;
40633
40634 case V8HImode:
40635 use_vec_merge = TARGET_SSE2;
40636 break;
40637 case V4HImode:
40638 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40639 break;
40640
40641 case V16QImode:
40642 use_vec_merge = TARGET_SSE4_1;
40643 break;
40644
40645 case V8QImode:
40646 break;
40647
40648 case V32QImode:
40649 half_mode = V16QImode;
40650 j = 0;
40651 n = 16;
40652 goto half;
40653
40654 case V16HImode:
40655 half_mode = V8HImode;
40656 j = 1;
40657 n = 8;
40658 goto half;
40659
40660 case V8SImode:
40661 half_mode = V4SImode;
40662 j = 2;
40663 n = 4;
40664 goto half;
40665
40666 case V4DImode:
40667 half_mode = V2DImode;
40668 j = 3;
40669 n = 2;
40670 goto half;
40671
40672 case V8SFmode:
40673 half_mode = V4SFmode;
40674 j = 4;
40675 n = 4;
40676 goto half;
40677
40678 case V4DFmode:
40679 half_mode = V2DFmode;
40680 j = 5;
40681 n = 2;
40682 goto half;
40683
40684 half:
40685 /* Compute offset. */
40686 i = elt / n;
40687 elt %= n;
40688
40689 gcc_assert (i <= 1);
40690
40691 /* Extract the half. */
40692 tmp = gen_reg_rtx (half_mode);
40693 emit_insn (gen_extract[j][i] (tmp, target));
40694
40695 /* Put val in tmp at elt. */
40696 ix86_expand_vector_set (false, tmp, val, elt);
40697
40698 /* Put it back. */
40699 emit_insn (gen_insert[j][i] (target, target, tmp));
40700 return;
40701
40702 default:
40703 break;
40704 }
40705
40706 if (use_vec_merge)
40707 {
40708 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40709 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40710 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40711 }
40712 else
40713 {
40714 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40715
40716 emit_move_insn (mem, target);
40717
40718 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40719 emit_move_insn (tmp, val);
40720
40721 emit_move_insn (target, mem);
40722 }
40723 }
40724
40725 void
40726 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40727 {
40728 enum machine_mode mode = GET_MODE (vec);
40729 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40730 bool use_vec_extr = false;
40731 rtx tmp;
40732
40733 switch (mode)
40734 {
40735 case V2SImode:
40736 case V2SFmode:
40737 if (!mmx_ok)
40738 break;
40739 /* FALLTHRU */
40740
40741 case V2DFmode:
40742 case V2DImode:
40743 use_vec_extr = true;
40744 break;
40745
40746 case V4SFmode:
40747 use_vec_extr = TARGET_SSE4_1;
40748 if (use_vec_extr)
40749 break;
40750
40751 switch (elt)
40752 {
40753 case 0:
40754 tmp = vec;
40755 break;
40756
40757 case 1:
40758 case 3:
40759 tmp = gen_reg_rtx (mode);
40760 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40761 GEN_INT (elt), GEN_INT (elt),
40762 GEN_INT (elt+4), GEN_INT (elt+4)));
40763 break;
40764
40765 case 2:
40766 tmp = gen_reg_rtx (mode);
40767 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40768 break;
40769
40770 default:
40771 gcc_unreachable ();
40772 }
40773 vec = tmp;
40774 use_vec_extr = true;
40775 elt = 0;
40776 break;
40777
40778 case V4SImode:
40779 use_vec_extr = TARGET_SSE4_1;
40780 if (use_vec_extr)
40781 break;
40782
40783 if (TARGET_SSE2)
40784 {
40785 switch (elt)
40786 {
40787 case 0:
40788 tmp = vec;
40789 break;
40790
40791 case 1:
40792 case 3:
40793 tmp = gen_reg_rtx (mode);
40794 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40795 GEN_INT (elt), GEN_INT (elt),
40796 GEN_INT (elt), GEN_INT (elt)));
40797 break;
40798
40799 case 2:
40800 tmp = gen_reg_rtx (mode);
40801 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40802 break;
40803
40804 default:
40805 gcc_unreachable ();
40806 }
40807 vec = tmp;
40808 use_vec_extr = true;
40809 elt = 0;
40810 }
40811 else
40812 {
40813 /* For SSE1, we have to reuse the V4SF code. */
40814 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40815 gen_lowpart (V4SFmode, vec), elt);
40816 return;
40817 }
40818 break;
40819
40820 case V8HImode:
40821 use_vec_extr = TARGET_SSE2;
40822 break;
40823 case V4HImode:
40824 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40825 break;
40826
40827 case V16QImode:
40828 use_vec_extr = TARGET_SSE4_1;
40829 break;
40830
40831 case V8SFmode:
40832 if (TARGET_AVX)
40833 {
40834 tmp = gen_reg_rtx (V4SFmode);
40835 if (elt < 4)
40836 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40837 else
40838 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40839 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40840 return;
40841 }
40842 break;
40843
40844 case V4DFmode:
40845 if (TARGET_AVX)
40846 {
40847 tmp = gen_reg_rtx (V2DFmode);
40848 if (elt < 2)
40849 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40850 else
40851 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40852 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40853 return;
40854 }
40855 break;
40856
40857 case V32QImode:
40858 if (TARGET_AVX)
40859 {
40860 tmp = gen_reg_rtx (V16QImode);
40861 if (elt < 16)
40862 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40863 else
40864 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40865 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40866 return;
40867 }
40868 break;
40869
40870 case V16HImode:
40871 if (TARGET_AVX)
40872 {
40873 tmp = gen_reg_rtx (V8HImode);
40874 if (elt < 8)
40875 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40876 else
40877 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40878 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40879 return;
40880 }
40881 break;
40882
40883 case V8SImode:
40884 if (TARGET_AVX)
40885 {
40886 tmp = gen_reg_rtx (V4SImode);
40887 if (elt < 4)
40888 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40889 else
40890 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40891 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40892 return;
40893 }
40894 break;
40895
40896 case V4DImode:
40897 if (TARGET_AVX)
40898 {
40899 tmp = gen_reg_rtx (V2DImode);
40900 if (elt < 2)
40901 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40902 else
40903 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40904 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40905 return;
40906 }
40907 break;
40908
40909 case V16SFmode:
40910 tmp = gen_reg_rtx (V8SFmode);
40911 if (elt < 8)
40912 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40913 else
40914 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40915 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40916 return;
40917
40918 case V8DFmode:
40919 tmp = gen_reg_rtx (V4DFmode);
40920 if (elt < 4)
40921 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40922 else
40923 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40924 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40925 return;
40926
40927 case V16SImode:
40928 tmp = gen_reg_rtx (V8SImode);
40929 if (elt < 8)
40930 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40931 else
40932 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40933 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40934 return;
40935
40936 case V8DImode:
40937 tmp = gen_reg_rtx (V4DImode);
40938 if (elt < 4)
40939 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40940 else
40941 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40942 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40943 return;
40944
40945 case V8QImode:
40946 /* ??? Could extract the appropriate HImode element and shift. */
40947 default:
40948 break;
40949 }
40950
40951 if (use_vec_extr)
40952 {
40953 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40954 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40955
40956 /* Let the rtl optimizers know about the zero extension performed. */
40957 if (inner_mode == QImode || inner_mode == HImode)
40958 {
40959 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40960 target = gen_lowpart (SImode, target);
40961 }
40962
40963 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40964 }
40965 else
40966 {
40967 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40968
40969 emit_move_insn (mem, vec);
40970
40971 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40972 emit_move_insn (target, tmp);
40973 }
40974 }
40975
40976 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40977 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40978 The upper bits of DEST are undefined, though they shouldn't cause
40979 exceptions (some bits from src or all zeros are ok). */
40980
40981 static void
40982 emit_reduc_half (rtx dest, rtx src, int i)
40983 {
40984 rtx tem, d = dest;
40985 switch (GET_MODE (src))
40986 {
40987 case V4SFmode:
40988 if (i == 128)
40989 tem = gen_sse_movhlps (dest, src, src);
40990 else
40991 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40992 GEN_INT (1 + 4), GEN_INT (1 + 4));
40993 break;
40994 case V2DFmode:
40995 tem = gen_vec_interleave_highv2df (dest, src, src);
40996 break;
40997 case V16QImode:
40998 case V8HImode:
40999 case V4SImode:
41000 case V2DImode:
41001 d = gen_reg_rtx (V1TImode);
41002 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41003 GEN_INT (i / 2));
41004 break;
41005 case V8SFmode:
41006 if (i == 256)
41007 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41008 else
41009 tem = gen_avx_shufps256 (dest, src, src,
41010 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41011 break;
41012 case V4DFmode:
41013 if (i == 256)
41014 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41015 else
41016 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41017 break;
41018 case V32QImode:
41019 case V16HImode:
41020 case V8SImode:
41021 case V4DImode:
41022 if (i == 256)
41023 {
41024 if (GET_MODE (dest) != V4DImode)
41025 d = gen_reg_rtx (V4DImode);
41026 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41027 gen_lowpart (V4DImode, src),
41028 const1_rtx);
41029 }
41030 else
41031 {
41032 d = gen_reg_rtx (V2TImode);
41033 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41034 GEN_INT (i / 2));
41035 }
41036 break;
41037 case V16SImode:
41038 case V16SFmode:
41039 case V8DImode:
41040 case V8DFmode:
41041 if (i > 128)
41042 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41043 gen_lowpart (V16SImode, src),
41044 gen_lowpart (V16SImode, src),
41045 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41046 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41047 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41048 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41049 GEN_INT (0xC), GEN_INT (0xD),
41050 GEN_INT (0xE), GEN_INT (0xF),
41051 GEN_INT (0x10), GEN_INT (0x11),
41052 GEN_INT (0x12), GEN_INT (0x13),
41053 GEN_INT (0x14), GEN_INT (0x15),
41054 GEN_INT (0x16), GEN_INT (0x17));
41055 else
41056 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41057 gen_lowpart (V16SImode, src),
41058 GEN_INT (i == 128 ? 0x2 : 0x1),
41059 GEN_INT (0x3),
41060 GEN_INT (0x3),
41061 GEN_INT (0x3),
41062 GEN_INT (i == 128 ? 0x6 : 0x5),
41063 GEN_INT (0x7),
41064 GEN_INT (0x7),
41065 GEN_INT (0x7),
41066 GEN_INT (i == 128 ? 0xA : 0x9),
41067 GEN_INT (0xB),
41068 GEN_INT (0xB),
41069 GEN_INT (0xB),
41070 GEN_INT (i == 128 ? 0xE : 0xD),
41071 GEN_INT (0xF),
41072 GEN_INT (0xF),
41073 GEN_INT (0xF));
41074 break;
41075 default:
41076 gcc_unreachable ();
41077 }
41078 emit_insn (tem);
41079 if (d != dest)
41080 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41081 }
41082
41083 /* Expand a vector reduction. FN is the binary pattern to reduce;
41084 DEST is the destination; IN is the input vector. */
41085
41086 void
41087 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41088 {
41089 rtx half, dst, vec = in;
41090 enum machine_mode mode = GET_MODE (in);
41091 int i;
41092
41093 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41094 if (TARGET_SSE4_1
41095 && mode == V8HImode
41096 && fn == gen_uminv8hi3)
41097 {
41098 emit_insn (gen_sse4_1_phminposuw (dest, in));
41099 return;
41100 }
41101
41102 for (i = GET_MODE_BITSIZE (mode);
41103 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41104 i >>= 1)
41105 {
41106 half = gen_reg_rtx (mode);
41107 emit_reduc_half (half, vec, i);
41108 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41109 dst = dest;
41110 else
41111 dst = gen_reg_rtx (mode);
41112 emit_insn (fn (dst, half, vec));
41113 vec = dst;
41114 }
41115 }
41116 \f
41117 /* Target hook for scalar_mode_supported_p. */
41118 static bool
41119 ix86_scalar_mode_supported_p (enum machine_mode mode)
41120 {
41121 if (DECIMAL_FLOAT_MODE_P (mode))
41122 return default_decimal_float_supported_p ();
41123 else if (mode == TFmode)
41124 return true;
41125 else
41126 return default_scalar_mode_supported_p (mode);
41127 }
41128
41129 /* Implements target hook vector_mode_supported_p. */
41130 static bool
41131 ix86_vector_mode_supported_p (enum machine_mode mode)
41132 {
41133 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41134 return true;
41135 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41136 return true;
41137 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41138 return true;
41139 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41140 return true;
41141 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41142 return true;
41143 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41144 return true;
41145 return false;
41146 }
41147
41148 /* Target hook for c_mode_for_suffix. */
41149 static enum machine_mode
41150 ix86_c_mode_for_suffix (char suffix)
41151 {
41152 if (suffix == 'q')
41153 return TFmode;
41154 if (suffix == 'w')
41155 return XFmode;
41156
41157 return VOIDmode;
41158 }
41159
41160 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41161
41162 We do this in the new i386 backend to maintain source compatibility
41163 with the old cc0-based compiler. */
41164
41165 static tree
41166 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41167 tree inputs ATTRIBUTE_UNUSED,
41168 tree clobbers)
41169 {
41170 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41171 clobbers);
41172 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41173 clobbers);
41174 return clobbers;
41175 }
41176
41177 /* Implements target vector targetm.asm.encode_section_info. */
41178
41179 static void ATTRIBUTE_UNUSED
41180 ix86_encode_section_info (tree decl, rtx rtl, int first)
41181 {
41182 default_encode_section_info (decl, rtl, first);
41183
41184 if (TREE_CODE (decl) == VAR_DECL
41185 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41186 && ix86_in_large_data_p (decl))
41187 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41188 }
41189
41190 /* Worker function for REVERSE_CONDITION. */
41191
41192 enum rtx_code
41193 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41194 {
41195 return (mode != CCFPmode && mode != CCFPUmode
41196 ? reverse_condition (code)
41197 : reverse_condition_maybe_unordered (code));
41198 }
41199
41200 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41201 to OPERANDS[0]. */
41202
41203 const char *
41204 output_387_reg_move (rtx insn, rtx *operands)
41205 {
41206 if (REG_P (operands[0]))
41207 {
41208 if (REG_P (operands[1])
41209 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41210 {
41211 if (REGNO (operands[0]) == FIRST_STACK_REG)
41212 return output_387_ffreep (operands, 0);
41213 return "fstp\t%y0";
41214 }
41215 if (STACK_TOP_P (operands[0]))
41216 return "fld%Z1\t%y1";
41217 return "fst\t%y0";
41218 }
41219 else if (MEM_P (operands[0]))
41220 {
41221 gcc_assert (REG_P (operands[1]));
41222 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41223 return "fstp%Z0\t%y0";
41224 else
41225 {
41226 /* There is no non-popping store to memory for XFmode.
41227 So if we need one, follow the store with a load. */
41228 if (GET_MODE (operands[0]) == XFmode)
41229 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41230 else
41231 return "fst%Z0\t%y0";
41232 }
41233 }
41234 else
41235 gcc_unreachable();
41236 }
41237
41238 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41239 FP status register is set. */
41240
41241 void
41242 ix86_emit_fp_unordered_jump (rtx label)
41243 {
41244 rtx reg = gen_reg_rtx (HImode);
41245 rtx temp;
41246
41247 emit_insn (gen_x86_fnstsw_1 (reg));
41248
41249 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41250 {
41251 emit_insn (gen_x86_sahf_1 (reg));
41252
41253 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41254 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41255 }
41256 else
41257 {
41258 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41259
41260 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41261 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41262 }
41263
41264 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41265 gen_rtx_LABEL_REF (VOIDmode, label),
41266 pc_rtx);
41267 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41268
41269 emit_jump_insn (temp);
41270 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41271 }
41272
41273 /* Output code to perform a log1p XFmode calculation. */
41274
41275 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41276 {
41277 rtx label1 = gen_label_rtx ();
41278 rtx label2 = gen_label_rtx ();
41279
41280 rtx tmp = gen_reg_rtx (XFmode);
41281 rtx tmp2 = gen_reg_rtx (XFmode);
41282 rtx test;
41283
41284 emit_insn (gen_absxf2 (tmp, op1));
41285 test = gen_rtx_GE (VOIDmode, tmp,
41286 CONST_DOUBLE_FROM_REAL_VALUE (
41287 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41288 XFmode));
41289 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41290
41291 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41292 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41293 emit_jump (label2);
41294
41295 emit_label (label1);
41296 emit_move_insn (tmp, CONST1_RTX (XFmode));
41297 emit_insn (gen_addxf3 (tmp, op1, tmp));
41298 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41299 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41300
41301 emit_label (label2);
41302 }
41303
41304 /* Emit code for round calculation. */
41305 void ix86_emit_i387_round (rtx op0, rtx op1)
41306 {
41307 enum machine_mode inmode = GET_MODE (op1);
41308 enum machine_mode outmode = GET_MODE (op0);
41309 rtx e1, e2, res, tmp, tmp1, half;
41310 rtx scratch = gen_reg_rtx (HImode);
41311 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41312 rtx jump_label = gen_label_rtx ();
41313 rtx insn;
41314 rtx (*gen_abs) (rtx, rtx);
41315 rtx (*gen_neg) (rtx, rtx);
41316
41317 switch (inmode)
41318 {
41319 case SFmode:
41320 gen_abs = gen_abssf2;
41321 break;
41322 case DFmode:
41323 gen_abs = gen_absdf2;
41324 break;
41325 case XFmode:
41326 gen_abs = gen_absxf2;
41327 break;
41328 default:
41329 gcc_unreachable ();
41330 }
41331
41332 switch (outmode)
41333 {
41334 case SFmode:
41335 gen_neg = gen_negsf2;
41336 break;
41337 case DFmode:
41338 gen_neg = gen_negdf2;
41339 break;
41340 case XFmode:
41341 gen_neg = gen_negxf2;
41342 break;
41343 case HImode:
41344 gen_neg = gen_neghi2;
41345 break;
41346 case SImode:
41347 gen_neg = gen_negsi2;
41348 break;
41349 case DImode:
41350 gen_neg = gen_negdi2;
41351 break;
41352 default:
41353 gcc_unreachable ();
41354 }
41355
41356 e1 = gen_reg_rtx (inmode);
41357 e2 = gen_reg_rtx (inmode);
41358 res = gen_reg_rtx (outmode);
41359
41360 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41361
41362 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41363
41364 /* scratch = fxam(op1) */
41365 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41366 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41367 UNSPEC_FXAM)));
41368 /* e1 = fabs(op1) */
41369 emit_insn (gen_abs (e1, op1));
41370
41371 /* e2 = e1 + 0.5 */
41372 half = force_reg (inmode, half);
41373 emit_insn (gen_rtx_SET (VOIDmode, e2,
41374 gen_rtx_PLUS (inmode, e1, half)));
41375
41376 /* res = floor(e2) */
41377 if (inmode != XFmode)
41378 {
41379 tmp1 = gen_reg_rtx (XFmode);
41380
41381 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41382 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41383 }
41384 else
41385 tmp1 = e2;
41386
41387 switch (outmode)
41388 {
41389 case SFmode:
41390 case DFmode:
41391 {
41392 rtx tmp0 = gen_reg_rtx (XFmode);
41393
41394 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41395
41396 emit_insn (gen_rtx_SET (VOIDmode, res,
41397 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41398 UNSPEC_TRUNC_NOOP)));
41399 }
41400 break;
41401 case XFmode:
41402 emit_insn (gen_frndintxf2_floor (res, tmp1));
41403 break;
41404 case HImode:
41405 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41406 break;
41407 case SImode:
41408 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41409 break;
41410 case DImode:
41411 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41412 break;
41413 default:
41414 gcc_unreachable ();
41415 }
41416
41417 /* flags = signbit(a) */
41418 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41419
41420 /* if (flags) then res = -res */
41421 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41422 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41423 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41424 pc_rtx);
41425 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41426 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41427 JUMP_LABEL (insn) = jump_label;
41428
41429 emit_insn (gen_neg (res, res));
41430
41431 emit_label (jump_label);
41432 LABEL_NUSES (jump_label) = 1;
41433
41434 emit_move_insn (op0, res);
41435 }
41436
41437 /* Output code to perform a Newton-Rhapson approximation of a single precision
41438 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41439
41440 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41441 {
41442 rtx x0, x1, e0, e1;
41443
41444 x0 = gen_reg_rtx (mode);
41445 e0 = gen_reg_rtx (mode);
41446 e1 = gen_reg_rtx (mode);
41447 x1 = gen_reg_rtx (mode);
41448
41449 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41450
41451 b = force_reg (mode, b);
41452
41453 /* x0 = rcp(b) estimate */
41454 if (mode == V16SFmode || mode == V8DFmode)
41455 emit_insn (gen_rtx_SET (VOIDmode, x0,
41456 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41457 UNSPEC_RCP14)));
41458 else
41459 emit_insn (gen_rtx_SET (VOIDmode, x0,
41460 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41461 UNSPEC_RCP)));
41462
41463 /* e0 = x0 * b */
41464 emit_insn (gen_rtx_SET (VOIDmode, e0,
41465 gen_rtx_MULT (mode, x0, b)));
41466
41467 /* e0 = x0 * e0 */
41468 emit_insn (gen_rtx_SET (VOIDmode, e0,
41469 gen_rtx_MULT (mode, x0, e0)));
41470
41471 /* e1 = x0 + x0 */
41472 emit_insn (gen_rtx_SET (VOIDmode, e1,
41473 gen_rtx_PLUS (mode, x0, x0)));
41474
41475 /* x1 = e1 - e0 */
41476 emit_insn (gen_rtx_SET (VOIDmode, x1,
41477 gen_rtx_MINUS (mode, e1, e0)));
41478
41479 /* res = a * x1 */
41480 emit_insn (gen_rtx_SET (VOIDmode, res,
41481 gen_rtx_MULT (mode, a, x1)));
41482 }
41483
41484 /* Output code to perform a Newton-Rhapson approximation of a
41485 single precision floating point [reciprocal] square root. */
41486
41487 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41488 bool recip)
41489 {
41490 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41491 REAL_VALUE_TYPE r;
41492 int unspec;
41493
41494 x0 = gen_reg_rtx (mode);
41495 e0 = gen_reg_rtx (mode);
41496 e1 = gen_reg_rtx (mode);
41497 e2 = gen_reg_rtx (mode);
41498 e3 = gen_reg_rtx (mode);
41499
41500 real_from_integer (&r, VOIDmode, -3, SIGNED);
41501 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41502
41503 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41504 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41505 unspec = UNSPEC_RSQRT;
41506
41507 if (VECTOR_MODE_P (mode))
41508 {
41509 mthree = ix86_build_const_vector (mode, true, mthree);
41510 mhalf = ix86_build_const_vector (mode, true, mhalf);
41511 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41512 if (GET_MODE_SIZE (mode) == 64)
41513 unspec = UNSPEC_RSQRT14;
41514 }
41515
41516 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41517 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41518
41519 a = force_reg (mode, a);
41520
41521 /* x0 = rsqrt(a) estimate */
41522 emit_insn (gen_rtx_SET (VOIDmode, x0,
41523 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41524 unspec)));
41525
41526 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41527 if (!recip)
41528 {
41529 rtx zero, mask;
41530
41531 zero = gen_reg_rtx (mode);
41532 mask = gen_reg_rtx (mode);
41533
41534 zero = force_reg (mode, CONST0_RTX(mode));
41535
41536 /* Handle masked compare. */
41537 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41538 {
41539 mask = gen_reg_rtx (HImode);
41540 /* Imm value 0x4 corresponds to not-equal comparison. */
41541 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41542 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41543 }
41544 else
41545 {
41546 emit_insn (gen_rtx_SET (VOIDmode, mask,
41547 gen_rtx_NE (mode, zero, a)));
41548
41549 emit_insn (gen_rtx_SET (VOIDmode, x0,
41550 gen_rtx_AND (mode, x0, mask)));
41551 }
41552 }
41553
41554 /* e0 = x0 * a */
41555 emit_insn (gen_rtx_SET (VOIDmode, e0,
41556 gen_rtx_MULT (mode, x0, a)));
41557 /* e1 = e0 * x0 */
41558 emit_insn (gen_rtx_SET (VOIDmode, e1,
41559 gen_rtx_MULT (mode, e0, x0)));
41560
41561 /* e2 = e1 - 3. */
41562 mthree = force_reg (mode, mthree);
41563 emit_insn (gen_rtx_SET (VOIDmode, e2,
41564 gen_rtx_PLUS (mode, e1, mthree)));
41565
41566 mhalf = force_reg (mode, mhalf);
41567 if (recip)
41568 /* e3 = -.5 * x0 */
41569 emit_insn (gen_rtx_SET (VOIDmode, e3,
41570 gen_rtx_MULT (mode, x0, mhalf)));
41571 else
41572 /* e3 = -.5 * e0 */
41573 emit_insn (gen_rtx_SET (VOIDmode, e3,
41574 gen_rtx_MULT (mode, e0, mhalf)));
41575 /* ret = e2 * e3 */
41576 emit_insn (gen_rtx_SET (VOIDmode, res,
41577 gen_rtx_MULT (mode, e2, e3)));
41578 }
41579
41580 #ifdef TARGET_SOLARIS
41581 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41582
41583 static void
41584 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41585 tree decl)
41586 {
41587 /* With Binutils 2.15, the "@unwind" marker must be specified on
41588 every occurrence of the ".eh_frame" section, not just the first
41589 one. */
41590 if (TARGET_64BIT
41591 && strcmp (name, ".eh_frame") == 0)
41592 {
41593 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41594 flags & SECTION_WRITE ? "aw" : "a");
41595 return;
41596 }
41597
41598 #ifndef USE_GAS
41599 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41600 {
41601 solaris_elf_asm_comdat_section (name, flags, decl);
41602 return;
41603 }
41604 #endif
41605
41606 default_elf_asm_named_section (name, flags, decl);
41607 }
41608 #endif /* TARGET_SOLARIS */
41609
41610 /* Return the mangling of TYPE if it is an extended fundamental type. */
41611
41612 static const char *
41613 ix86_mangle_type (const_tree type)
41614 {
41615 type = TYPE_MAIN_VARIANT (type);
41616
41617 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41618 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41619 return NULL;
41620
41621 switch (TYPE_MODE (type))
41622 {
41623 case TFmode:
41624 /* __float128 is "g". */
41625 return "g";
41626 case XFmode:
41627 /* "long double" or __float80 is "e". */
41628 return "e";
41629 default:
41630 return NULL;
41631 }
41632 }
41633
41634 /* For 32-bit code we can save PIC register setup by using
41635 __stack_chk_fail_local hidden function instead of calling
41636 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41637 register, so it is better to call __stack_chk_fail directly. */
41638
41639 static tree ATTRIBUTE_UNUSED
41640 ix86_stack_protect_fail (void)
41641 {
41642 return TARGET_64BIT
41643 ? default_external_stack_protect_fail ()
41644 : default_hidden_stack_protect_fail ();
41645 }
41646
41647 /* Select a format to encode pointers in exception handling data. CODE
41648 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41649 true if the symbol may be affected by dynamic relocations.
41650
41651 ??? All x86 object file formats are capable of representing this.
41652 After all, the relocation needed is the same as for the call insn.
41653 Whether or not a particular assembler allows us to enter such, I
41654 guess we'll have to see. */
41655 int
41656 asm_preferred_eh_data_format (int code, int global)
41657 {
41658 if (flag_pic)
41659 {
41660 int type = DW_EH_PE_sdata8;
41661 if (!TARGET_64BIT
41662 || ix86_cmodel == CM_SMALL_PIC
41663 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41664 type = DW_EH_PE_sdata4;
41665 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41666 }
41667 if (ix86_cmodel == CM_SMALL
41668 || (ix86_cmodel == CM_MEDIUM && code))
41669 return DW_EH_PE_udata4;
41670 return DW_EH_PE_absptr;
41671 }
41672 \f
41673 /* Expand copysign from SIGN to the positive value ABS_VALUE
41674 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41675 the sign-bit. */
41676 static void
41677 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41678 {
41679 enum machine_mode mode = GET_MODE (sign);
41680 rtx sgn = gen_reg_rtx (mode);
41681 if (mask == NULL_RTX)
41682 {
41683 enum machine_mode vmode;
41684
41685 if (mode == SFmode)
41686 vmode = V4SFmode;
41687 else if (mode == DFmode)
41688 vmode = V2DFmode;
41689 else
41690 vmode = mode;
41691
41692 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41693 if (!VECTOR_MODE_P (mode))
41694 {
41695 /* We need to generate a scalar mode mask in this case. */
41696 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41697 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41698 mask = gen_reg_rtx (mode);
41699 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41700 }
41701 }
41702 else
41703 mask = gen_rtx_NOT (mode, mask);
41704 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41705 gen_rtx_AND (mode, mask, sign)));
41706 emit_insn (gen_rtx_SET (VOIDmode, result,
41707 gen_rtx_IOR (mode, abs_value, sgn)));
41708 }
41709
41710 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41711 mask for masking out the sign-bit is stored in *SMASK, if that is
41712 non-null. */
41713 static rtx
41714 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41715 {
41716 enum machine_mode vmode, mode = GET_MODE (op0);
41717 rtx xa, mask;
41718
41719 xa = gen_reg_rtx (mode);
41720 if (mode == SFmode)
41721 vmode = V4SFmode;
41722 else if (mode == DFmode)
41723 vmode = V2DFmode;
41724 else
41725 vmode = mode;
41726 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41727 if (!VECTOR_MODE_P (mode))
41728 {
41729 /* We need to generate a scalar mode mask in this case. */
41730 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41731 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41732 mask = gen_reg_rtx (mode);
41733 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41734 }
41735 emit_insn (gen_rtx_SET (VOIDmode, xa,
41736 gen_rtx_AND (mode, op0, mask)));
41737
41738 if (smask)
41739 *smask = mask;
41740
41741 return xa;
41742 }
41743
41744 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41745 swapping the operands if SWAP_OPERANDS is true. The expanded
41746 code is a forward jump to a newly created label in case the
41747 comparison is true. The generated label rtx is returned. */
41748 static rtx
41749 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41750 bool swap_operands)
41751 {
41752 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41753 rtx label, tmp;
41754
41755 if (swap_operands)
41756 {
41757 tmp = op0;
41758 op0 = op1;
41759 op1 = tmp;
41760 }
41761
41762 label = gen_label_rtx ();
41763 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41764 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41765 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41766 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41767 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41768 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41769 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41770 JUMP_LABEL (tmp) = label;
41771
41772 return label;
41773 }
41774
41775 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41776 using comparison code CODE. Operands are swapped for the comparison if
41777 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41778 static rtx
41779 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41780 bool swap_operands)
41781 {
41782 rtx (*insn)(rtx, rtx, rtx, rtx);
41783 enum machine_mode mode = GET_MODE (op0);
41784 rtx mask = gen_reg_rtx (mode);
41785
41786 if (swap_operands)
41787 {
41788 rtx tmp = op0;
41789 op0 = op1;
41790 op1 = tmp;
41791 }
41792
41793 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41794
41795 emit_insn (insn (mask, op0, op1,
41796 gen_rtx_fmt_ee (code, mode, op0, op1)));
41797 return mask;
41798 }
41799
41800 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41801 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41802 static rtx
41803 ix86_gen_TWO52 (enum machine_mode mode)
41804 {
41805 REAL_VALUE_TYPE TWO52r;
41806 rtx TWO52;
41807
41808 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41809 TWO52 = const_double_from_real_value (TWO52r, mode);
41810 TWO52 = force_reg (mode, TWO52);
41811
41812 return TWO52;
41813 }
41814
41815 /* Expand SSE sequence for computing lround from OP1 storing
41816 into OP0. */
41817 void
41818 ix86_expand_lround (rtx op0, rtx op1)
41819 {
41820 /* C code for the stuff we're doing below:
41821 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41822 return (long)tmp;
41823 */
41824 enum machine_mode mode = GET_MODE (op1);
41825 const struct real_format *fmt;
41826 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41827 rtx adj;
41828
41829 /* load nextafter (0.5, 0.0) */
41830 fmt = REAL_MODE_FORMAT (mode);
41831 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41832 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41833
41834 /* adj = copysign (0.5, op1) */
41835 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41836 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41837
41838 /* adj = op1 + adj */
41839 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41840
41841 /* op0 = (imode)adj */
41842 expand_fix (op0, adj, 0);
41843 }
41844
41845 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41846 into OPERAND0. */
41847 void
41848 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41849 {
41850 /* C code for the stuff we're doing below (for do_floor):
41851 xi = (long)op1;
41852 xi -= (double)xi > op1 ? 1 : 0;
41853 return xi;
41854 */
41855 enum machine_mode fmode = GET_MODE (op1);
41856 enum machine_mode imode = GET_MODE (op0);
41857 rtx ireg, freg, label, tmp;
41858
41859 /* reg = (long)op1 */
41860 ireg = gen_reg_rtx (imode);
41861 expand_fix (ireg, op1, 0);
41862
41863 /* freg = (double)reg */
41864 freg = gen_reg_rtx (fmode);
41865 expand_float (freg, ireg, 0);
41866
41867 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41868 label = ix86_expand_sse_compare_and_jump (UNLE,
41869 freg, op1, !do_floor);
41870 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41871 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41872 emit_move_insn (ireg, tmp);
41873
41874 emit_label (label);
41875 LABEL_NUSES (label) = 1;
41876
41877 emit_move_insn (op0, ireg);
41878 }
41879
41880 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41881 result in OPERAND0. */
41882 void
41883 ix86_expand_rint (rtx operand0, rtx operand1)
41884 {
41885 /* C code for the stuff we're doing below:
41886 xa = fabs (operand1);
41887 if (!isless (xa, 2**52))
41888 return operand1;
41889 xa = xa + 2**52 - 2**52;
41890 return copysign (xa, operand1);
41891 */
41892 enum machine_mode mode = GET_MODE (operand0);
41893 rtx res, xa, label, TWO52, mask;
41894
41895 res = gen_reg_rtx (mode);
41896 emit_move_insn (res, operand1);
41897
41898 /* xa = abs (operand1) */
41899 xa = ix86_expand_sse_fabs (res, &mask);
41900
41901 /* if (!isless (xa, TWO52)) goto label; */
41902 TWO52 = ix86_gen_TWO52 (mode);
41903 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41904
41905 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41906 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41907
41908 ix86_sse_copysign_to_positive (res, xa, res, mask);
41909
41910 emit_label (label);
41911 LABEL_NUSES (label) = 1;
41912
41913 emit_move_insn (operand0, res);
41914 }
41915
41916 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41917 into OPERAND0. */
41918 void
41919 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41920 {
41921 /* C code for the stuff we expand below.
41922 double xa = fabs (x), x2;
41923 if (!isless (xa, TWO52))
41924 return x;
41925 xa = xa + TWO52 - TWO52;
41926 x2 = copysign (xa, x);
41927 Compensate. Floor:
41928 if (x2 > x)
41929 x2 -= 1;
41930 Compensate. Ceil:
41931 if (x2 < x)
41932 x2 -= -1;
41933 return x2;
41934 */
41935 enum machine_mode mode = GET_MODE (operand0);
41936 rtx xa, TWO52, tmp, label, one, res, mask;
41937
41938 TWO52 = ix86_gen_TWO52 (mode);
41939
41940 /* Temporary for holding the result, initialized to the input
41941 operand to ease control flow. */
41942 res = gen_reg_rtx (mode);
41943 emit_move_insn (res, operand1);
41944
41945 /* xa = abs (operand1) */
41946 xa = ix86_expand_sse_fabs (res, &mask);
41947
41948 /* if (!isless (xa, TWO52)) goto label; */
41949 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41950
41951 /* xa = xa + TWO52 - TWO52; */
41952 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41953 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41954
41955 /* xa = copysign (xa, operand1) */
41956 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41957
41958 /* generate 1.0 or -1.0 */
41959 one = force_reg (mode,
41960 const_double_from_real_value (do_floor
41961 ? dconst1 : dconstm1, mode));
41962
41963 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41964 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41965 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41966 gen_rtx_AND (mode, one, tmp)));
41967 /* We always need to subtract here to preserve signed zero. */
41968 tmp = expand_simple_binop (mode, MINUS,
41969 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41970 emit_move_insn (res, tmp);
41971
41972 emit_label (label);
41973 LABEL_NUSES (label) = 1;
41974
41975 emit_move_insn (operand0, res);
41976 }
41977
41978 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41979 into OPERAND0. */
41980 void
41981 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41982 {
41983 /* C code for the stuff we expand below.
41984 double xa = fabs (x), x2;
41985 if (!isless (xa, TWO52))
41986 return x;
41987 x2 = (double)(long)x;
41988 Compensate. Floor:
41989 if (x2 > x)
41990 x2 -= 1;
41991 Compensate. Ceil:
41992 if (x2 < x)
41993 x2 += 1;
41994 if (HONOR_SIGNED_ZEROS (mode))
41995 return copysign (x2, x);
41996 return x2;
41997 */
41998 enum machine_mode mode = GET_MODE (operand0);
41999 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42000
42001 TWO52 = ix86_gen_TWO52 (mode);
42002
42003 /* Temporary for holding the result, initialized to the input
42004 operand to ease control flow. */
42005 res = gen_reg_rtx (mode);
42006 emit_move_insn (res, operand1);
42007
42008 /* xa = abs (operand1) */
42009 xa = ix86_expand_sse_fabs (res, &mask);
42010
42011 /* if (!isless (xa, TWO52)) goto label; */
42012 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42013
42014 /* xa = (double)(long)x */
42015 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42016 expand_fix (xi, res, 0);
42017 expand_float (xa, xi, 0);
42018
42019 /* generate 1.0 */
42020 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42021
42022 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42023 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42024 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42025 gen_rtx_AND (mode, one, tmp)));
42026 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42027 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42028 emit_move_insn (res, tmp);
42029
42030 if (HONOR_SIGNED_ZEROS (mode))
42031 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42032
42033 emit_label (label);
42034 LABEL_NUSES (label) = 1;
42035
42036 emit_move_insn (operand0, res);
42037 }
42038
42039 /* Expand SSE sequence for computing round from OPERAND1 storing
42040 into OPERAND0. Sequence that works without relying on DImode truncation
42041 via cvttsd2siq that is only available on 64bit targets. */
42042 void
42043 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42044 {
42045 /* C code for the stuff we expand below.
42046 double xa = fabs (x), xa2, x2;
42047 if (!isless (xa, TWO52))
42048 return x;
42049 Using the absolute value and copying back sign makes
42050 -0.0 -> -0.0 correct.
42051 xa2 = xa + TWO52 - TWO52;
42052 Compensate.
42053 dxa = xa2 - xa;
42054 if (dxa <= -0.5)
42055 xa2 += 1;
42056 else if (dxa > 0.5)
42057 xa2 -= 1;
42058 x2 = copysign (xa2, x);
42059 return x2;
42060 */
42061 enum machine_mode mode = GET_MODE (operand0);
42062 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42063
42064 TWO52 = ix86_gen_TWO52 (mode);
42065
42066 /* Temporary for holding the result, initialized to the input
42067 operand to ease control flow. */
42068 res = gen_reg_rtx (mode);
42069 emit_move_insn (res, operand1);
42070
42071 /* xa = abs (operand1) */
42072 xa = ix86_expand_sse_fabs (res, &mask);
42073
42074 /* if (!isless (xa, TWO52)) goto label; */
42075 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42076
42077 /* xa2 = xa + TWO52 - TWO52; */
42078 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42079 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42080
42081 /* dxa = xa2 - xa; */
42082 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42083
42084 /* generate 0.5, 1.0 and -0.5 */
42085 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42086 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42087 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42088 0, OPTAB_DIRECT);
42089
42090 /* Compensate. */
42091 tmp = gen_reg_rtx (mode);
42092 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42093 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42094 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42095 gen_rtx_AND (mode, one, tmp)));
42096 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42097 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42098 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42099 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42100 gen_rtx_AND (mode, one, tmp)));
42101 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42102
42103 /* res = copysign (xa2, operand1) */
42104 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42105
42106 emit_label (label);
42107 LABEL_NUSES (label) = 1;
42108
42109 emit_move_insn (operand0, res);
42110 }
42111
42112 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42113 into OPERAND0. */
42114 void
42115 ix86_expand_trunc (rtx operand0, rtx operand1)
42116 {
42117 /* C code for SSE variant we expand below.
42118 double xa = fabs (x), x2;
42119 if (!isless (xa, TWO52))
42120 return x;
42121 x2 = (double)(long)x;
42122 if (HONOR_SIGNED_ZEROS (mode))
42123 return copysign (x2, x);
42124 return x2;
42125 */
42126 enum machine_mode mode = GET_MODE (operand0);
42127 rtx xa, xi, TWO52, label, res, mask;
42128
42129 TWO52 = ix86_gen_TWO52 (mode);
42130
42131 /* Temporary for holding the result, initialized to the input
42132 operand to ease control flow. */
42133 res = gen_reg_rtx (mode);
42134 emit_move_insn (res, operand1);
42135
42136 /* xa = abs (operand1) */
42137 xa = ix86_expand_sse_fabs (res, &mask);
42138
42139 /* if (!isless (xa, TWO52)) goto label; */
42140 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42141
42142 /* x = (double)(long)x */
42143 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42144 expand_fix (xi, res, 0);
42145 expand_float (res, xi, 0);
42146
42147 if (HONOR_SIGNED_ZEROS (mode))
42148 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42149
42150 emit_label (label);
42151 LABEL_NUSES (label) = 1;
42152
42153 emit_move_insn (operand0, res);
42154 }
42155
42156 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42157 into OPERAND0. */
42158 void
42159 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42160 {
42161 enum machine_mode mode = GET_MODE (operand0);
42162 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42163
42164 /* C code for SSE variant we expand below.
42165 double xa = fabs (x), x2;
42166 if (!isless (xa, TWO52))
42167 return x;
42168 xa2 = xa + TWO52 - TWO52;
42169 Compensate:
42170 if (xa2 > xa)
42171 xa2 -= 1.0;
42172 x2 = copysign (xa2, x);
42173 return x2;
42174 */
42175
42176 TWO52 = ix86_gen_TWO52 (mode);
42177
42178 /* Temporary for holding the result, initialized to the input
42179 operand to ease control flow. */
42180 res = gen_reg_rtx (mode);
42181 emit_move_insn (res, operand1);
42182
42183 /* xa = abs (operand1) */
42184 xa = ix86_expand_sse_fabs (res, &smask);
42185
42186 /* if (!isless (xa, TWO52)) goto label; */
42187 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42188
42189 /* res = xa + TWO52 - TWO52; */
42190 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42191 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42192 emit_move_insn (res, tmp);
42193
42194 /* generate 1.0 */
42195 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42196
42197 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42198 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42199 emit_insn (gen_rtx_SET (VOIDmode, mask,
42200 gen_rtx_AND (mode, mask, one)));
42201 tmp = expand_simple_binop (mode, MINUS,
42202 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42203 emit_move_insn (res, tmp);
42204
42205 /* res = copysign (res, operand1) */
42206 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42207
42208 emit_label (label);
42209 LABEL_NUSES (label) = 1;
42210
42211 emit_move_insn (operand0, res);
42212 }
42213
42214 /* Expand SSE sequence for computing round from OPERAND1 storing
42215 into OPERAND0. */
42216 void
42217 ix86_expand_round (rtx operand0, rtx operand1)
42218 {
42219 /* C code for the stuff we're doing below:
42220 double xa = fabs (x);
42221 if (!isless (xa, TWO52))
42222 return x;
42223 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42224 return copysign (xa, x);
42225 */
42226 enum machine_mode mode = GET_MODE (operand0);
42227 rtx res, TWO52, xa, label, xi, half, mask;
42228 const struct real_format *fmt;
42229 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42230
42231 /* Temporary for holding the result, initialized to the input
42232 operand to ease control flow. */
42233 res = gen_reg_rtx (mode);
42234 emit_move_insn (res, operand1);
42235
42236 TWO52 = ix86_gen_TWO52 (mode);
42237 xa = ix86_expand_sse_fabs (res, &mask);
42238 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42239
42240 /* load nextafter (0.5, 0.0) */
42241 fmt = REAL_MODE_FORMAT (mode);
42242 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42243 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42244
42245 /* xa = xa + 0.5 */
42246 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42247 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42248
42249 /* xa = (double)(int64_t)xa */
42250 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42251 expand_fix (xi, xa, 0);
42252 expand_float (xa, xi, 0);
42253
42254 /* res = copysign (xa, operand1) */
42255 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42256
42257 emit_label (label);
42258 LABEL_NUSES (label) = 1;
42259
42260 emit_move_insn (operand0, res);
42261 }
42262
42263 /* Expand SSE sequence for computing round
42264 from OP1 storing into OP0 using sse4 round insn. */
42265 void
42266 ix86_expand_round_sse4 (rtx op0, rtx op1)
42267 {
42268 enum machine_mode mode = GET_MODE (op0);
42269 rtx e1, e2, res, half;
42270 const struct real_format *fmt;
42271 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42272 rtx (*gen_copysign) (rtx, rtx, rtx);
42273 rtx (*gen_round) (rtx, rtx, rtx);
42274
42275 switch (mode)
42276 {
42277 case SFmode:
42278 gen_copysign = gen_copysignsf3;
42279 gen_round = gen_sse4_1_roundsf2;
42280 break;
42281 case DFmode:
42282 gen_copysign = gen_copysigndf3;
42283 gen_round = gen_sse4_1_rounddf2;
42284 break;
42285 default:
42286 gcc_unreachable ();
42287 }
42288
42289 /* round (a) = trunc (a + copysign (0.5, a)) */
42290
42291 /* load nextafter (0.5, 0.0) */
42292 fmt = REAL_MODE_FORMAT (mode);
42293 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42294 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42295 half = const_double_from_real_value (pred_half, mode);
42296
42297 /* e1 = copysign (0.5, op1) */
42298 e1 = gen_reg_rtx (mode);
42299 emit_insn (gen_copysign (e1, half, op1));
42300
42301 /* e2 = op1 + e1 */
42302 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42303
42304 /* res = trunc (e2) */
42305 res = gen_reg_rtx (mode);
42306 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42307
42308 emit_move_insn (op0, res);
42309 }
42310 \f
42311
42312 /* Table of valid machine attributes. */
42313 static const struct attribute_spec ix86_attribute_table[] =
42314 {
42315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42316 affects_type_identity } */
42317 /* Stdcall attribute says callee is responsible for popping arguments
42318 if they are not variable. */
42319 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42320 true },
42321 /* Fastcall attribute says callee is responsible for popping arguments
42322 if they are not variable. */
42323 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42324 true },
42325 /* Thiscall attribute says callee is responsible for popping arguments
42326 if they are not variable. */
42327 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42328 true },
42329 /* Cdecl attribute says the callee is a normal C declaration */
42330 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42331 true },
42332 /* Regparm attribute specifies how many integer arguments are to be
42333 passed in registers. */
42334 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42335 true },
42336 /* Sseregparm attribute says we are using x86_64 calling conventions
42337 for FP arguments. */
42338 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42339 true },
42340 /* The transactional memory builtins are implicitly regparm or fastcall
42341 depending on the ABI. Override the generic do-nothing attribute that
42342 these builtins were declared with. */
42343 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42344 true },
42345 /* force_align_arg_pointer says this function realigns the stack at entry. */
42346 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42347 false, true, true, ix86_handle_cconv_attribute, false },
42348 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42351 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42352 false },
42353 #endif
42354 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42355 false },
42356 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42357 false },
42358 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42359 SUBTARGET_ATTRIBUTE_TABLE,
42360 #endif
42361 /* ms_abi and sysv_abi calling convention function attributes. */
42362 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42363 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42364 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42365 false },
42366 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42367 ix86_handle_callee_pop_aggregate_return, true },
42368 /* End element. */
42369 { NULL, 0, 0, false, false, false, NULL, false }
42370 };
42371
42372 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42373 static int
42374 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42375 tree vectype,
42376 int misalign ATTRIBUTE_UNUSED)
42377 {
42378 unsigned elements;
42379
42380 switch (type_of_cost)
42381 {
42382 case scalar_stmt:
42383 return ix86_cost->scalar_stmt_cost;
42384
42385 case scalar_load:
42386 return ix86_cost->scalar_load_cost;
42387
42388 case scalar_store:
42389 return ix86_cost->scalar_store_cost;
42390
42391 case vector_stmt:
42392 return ix86_cost->vec_stmt_cost;
42393
42394 case vector_load:
42395 return ix86_cost->vec_align_load_cost;
42396
42397 case vector_store:
42398 return ix86_cost->vec_store_cost;
42399
42400 case vec_to_scalar:
42401 return ix86_cost->vec_to_scalar_cost;
42402
42403 case scalar_to_vec:
42404 return ix86_cost->scalar_to_vec_cost;
42405
42406 case unaligned_load:
42407 case unaligned_store:
42408 return ix86_cost->vec_unalign_load_cost;
42409
42410 case cond_branch_taken:
42411 return ix86_cost->cond_taken_branch_cost;
42412
42413 case cond_branch_not_taken:
42414 return ix86_cost->cond_not_taken_branch_cost;
42415
42416 case vec_perm:
42417 case vec_promote_demote:
42418 return ix86_cost->vec_stmt_cost;
42419
42420 case vec_construct:
42421 elements = TYPE_VECTOR_SUBPARTS (vectype);
42422 return elements / 2 + 1;
42423
42424 default:
42425 gcc_unreachable ();
42426 }
42427 }
42428
42429 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42430 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42431 insn every time. */
42432
42433 static GTY(()) rtx vselect_insn;
42434
42435 /* Initialize vselect_insn. */
42436
42437 static void
42438 init_vselect_insn (void)
42439 {
42440 unsigned i;
42441 rtx x;
42442
42443 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42444 for (i = 0; i < MAX_VECT_LEN; ++i)
42445 XVECEXP (x, 0, i) = const0_rtx;
42446 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42447 const0_rtx), x);
42448 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42449 start_sequence ();
42450 vselect_insn = emit_insn (x);
42451 end_sequence ();
42452 }
42453
42454 /* Construct (set target (vec_select op0 (parallel perm))) and
42455 return true if that's a valid instruction in the active ISA. */
42456
42457 static bool
42458 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42459 unsigned nelt, bool testing_p)
42460 {
42461 unsigned int i;
42462 rtx x, save_vconcat;
42463 int icode;
42464
42465 if (vselect_insn == NULL_RTX)
42466 init_vselect_insn ();
42467
42468 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42469 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42470 for (i = 0; i < nelt; ++i)
42471 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42472 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42473 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42474 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42475 SET_DEST (PATTERN (vselect_insn)) = target;
42476 icode = recog_memoized (vselect_insn);
42477
42478 if (icode >= 0 && !testing_p)
42479 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42480
42481 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42482 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42483 INSN_CODE (vselect_insn) = -1;
42484
42485 return icode >= 0;
42486 }
42487
42488 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42489
42490 static bool
42491 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42492 const unsigned char *perm, unsigned nelt,
42493 bool testing_p)
42494 {
42495 enum machine_mode v2mode;
42496 rtx x;
42497 bool ok;
42498
42499 if (vselect_insn == NULL_RTX)
42500 init_vselect_insn ();
42501
42502 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42503 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42504 PUT_MODE (x, v2mode);
42505 XEXP (x, 0) = op0;
42506 XEXP (x, 1) = op1;
42507 ok = expand_vselect (target, x, perm, nelt, testing_p);
42508 XEXP (x, 0) = const0_rtx;
42509 XEXP (x, 1) = const0_rtx;
42510 return ok;
42511 }
42512
42513 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42514 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42515
42516 static bool
42517 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42518 {
42519 enum machine_mode vmode = d->vmode;
42520 unsigned i, mask, nelt = d->nelt;
42521 rtx target, op0, op1, x;
42522 rtx rperm[32], vperm;
42523
42524 if (d->one_operand_p)
42525 return false;
42526 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42527 ;
42528 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42529 ;
42530 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42531 ;
42532 else
42533 return false;
42534
42535 /* This is a blend, not a permute. Elements must stay in their
42536 respective lanes. */
42537 for (i = 0; i < nelt; ++i)
42538 {
42539 unsigned e = d->perm[i];
42540 if (!(e == i || e == i + nelt))
42541 return false;
42542 }
42543
42544 if (d->testing_p)
42545 return true;
42546
42547 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42548 decision should be extracted elsewhere, so that we only try that
42549 sequence once all budget==3 options have been tried. */
42550 target = d->target;
42551 op0 = d->op0;
42552 op1 = d->op1;
42553 mask = 0;
42554
42555 switch (vmode)
42556 {
42557 case V4DFmode:
42558 case V8SFmode:
42559 case V2DFmode:
42560 case V4SFmode:
42561 case V8HImode:
42562 case V8SImode:
42563 for (i = 0; i < nelt; ++i)
42564 mask |= (d->perm[i] >= nelt) << i;
42565 break;
42566
42567 case V2DImode:
42568 for (i = 0; i < 2; ++i)
42569 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42570 vmode = V8HImode;
42571 goto do_subreg;
42572
42573 case V4SImode:
42574 for (i = 0; i < 4; ++i)
42575 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42576 vmode = V8HImode;
42577 goto do_subreg;
42578
42579 case V16QImode:
42580 /* See if bytes move in pairs so we can use pblendw with
42581 an immediate argument, rather than pblendvb with a vector
42582 argument. */
42583 for (i = 0; i < 16; i += 2)
42584 if (d->perm[i] + 1 != d->perm[i + 1])
42585 {
42586 use_pblendvb:
42587 for (i = 0; i < nelt; ++i)
42588 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42589
42590 finish_pblendvb:
42591 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42592 vperm = force_reg (vmode, vperm);
42593
42594 if (GET_MODE_SIZE (vmode) == 16)
42595 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42596 else
42597 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42598 if (target != d->target)
42599 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42600 return true;
42601 }
42602
42603 for (i = 0; i < 8; ++i)
42604 mask |= (d->perm[i * 2] >= 16) << i;
42605 vmode = V8HImode;
42606 /* FALLTHRU */
42607
42608 do_subreg:
42609 target = gen_reg_rtx (vmode);
42610 op0 = gen_lowpart (vmode, op0);
42611 op1 = gen_lowpart (vmode, op1);
42612 break;
42613
42614 case V32QImode:
42615 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42616 for (i = 0; i < 32; i += 2)
42617 if (d->perm[i] + 1 != d->perm[i + 1])
42618 goto use_pblendvb;
42619 /* See if bytes move in quadruplets. If yes, vpblendd
42620 with immediate can be used. */
42621 for (i = 0; i < 32; i += 4)
42622 if (d->perm[i] + 2 != d->perm[i + 2])
42623 break;
42624 if (i < 32)
42625 {
42626 /* See if bytes move the same in both lanes. If yes,
42627 vpblendw with immediate can be used. */
42628 for (i = 0; i < 16; i += 2)
42629 if (d->perm[i] + 16 != d->perm[i + 16])
42630 goto use_pblendvb;
42631
42632 /* Use vpblendw. */
42633 for (i = 0; i < 16; ++i)
42634 mask |= (d->perm[i * 2] >= 32) << i;
42635 vmode = V16HImode;
42636 goto do_subreg;
42637 }
42638
42639 /* Use vpblendd. */
42640 for (i = 0; i < 8; ++i)
42641 mask |= (d->perm[i * 4] >= 32) << i;
42642 vmode = V8SImode;
42643 goto do_subreg;
42644
42645 case V16HImode:
42646 /* See if words move in pairs. If yes, vpblendd can be used. */
42647 for (i = 0; i < 16; i += 2)
42648 if (d->perm[i] + 1 != d->perm[i + 1])
42649 break;
42650 if (i < 16)
42651 {
42652 /* See if words move the same in both lanes. If not,
42653 vpblendvb must be used. */
42654 for (i = 0; i < 8; i++)
42655 if (d->perm[i] + 8 != d->perm[i + 8])
42656 {
42657 /* Use vpblendvb. */
42658 for (i = 0; i < 32; ++i)
42659 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42660
42661 vmode = V32QImode;
42662 nelt = 32;
42663 target = gen_reg_rtx (vmode);
42664 op0 = gen_lowpart (vmode, op0);
42665 op1 = gen_lowpart (vmode, op1);
42666 goto finish_pblendvb;
42667 }
42668
42669 /* Use vpblendw. */
42670 for (i = 0; i < 16; ++i)
42671 mask |= (d->perm[i] >= 16) << i;
42672 break;
42673 }
42674
42675 /* Use vpblendd. */
42676 for (i = 0; i < 8; ++i)
42677 mask |= (d->perm[i * 2] >= 16) << i;
42678 vmode = V8SImode;
42679 goto do_subreg;
42680
42681 case V4DImode:
42682 /* Use vpblendd. */
42683 for (i = 0; i < 4; ++i)
42684 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42685 vmode = V8SImode;
42686 goto do_subreg;
42687
42688 default:
42689 gcc_unreachable ();
42690 }
42691
42692 /* This matches five different patterns with the different modes. */
42693 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42694 x = gen_rtx_SET (VOIDmode, target, x);
42695 emit_insn (x);
42696 if (target != d->target)
42697 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42698
42699 return true;
42700 }
42701
42702 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42703 in terms of the variable form of vpermilps.
42704
42705 Note that we will have already failed the immediate input vpermilps,
42706 which requires that the high and low part shuffle be identical; the
42707 variable form doesn't require that. */
42708
42709 static bool
42710 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42711 {
42712 rtx rperm[8], vperm;
42713 unsigned i;
42714
42715 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42716 return false;
42717
42718 /* We can only permute within the 128-bit lane. */
42719 for (i = 0; i < 8; ++i)
42720 {
42721 unsigned e = d->perm[i];
42722 if (i < 4 ? e >= 4 : e < 4)
42723 return false;
42724 }
42725
42726 if (d->testing_p)
42727 return true;
42728
42729 for (i = 0; i < 8; ++i)
42730 {
42731 unsigned e = d->perm[i];
42732
42733 /* Within each 128-bit lane, the elements of op0 are numbered
42734 from 0 and the elements of op1 are numbered from 4. */
42735 if (e >= 8 + 4)
42736 e -= 8;
42737 else if (e >= 4)
42738 e -= 4;
42739
42740 rperm[i] = GEN_INT (e);
42741 }
42742
42743 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42744 vperm = force_reg (V8SImode, vperm);
42745 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42746
42747 return true;
42748 }
42749
42750 /* Return true if permutation D can be performed as VMODE permutation
42751 instead. */
42752
42753 static bool
42754 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42755 {
42756 unsigned int i, j, chunk;
42757
42758 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42759 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42760 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42761 return false;
42762
42763 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42764 return true;
42765
42766 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42767 for (i = 0; i < d->nelt; i += chunk)
42768 if (d->perm[i] & (chunk - 1))
42769 return false;
42770 else
42771 for (j = 1; j < chunk; ++j)
42772 if (d->perm[i] + j != d->perm[i + j])
42773 return false;
42774
42775 return true;
42776 }
42777
42778 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42779 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42780
42781 static bool
42782 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42783 {
42784 unsigned i, nelt, eltsz, mask;
42785 unsigned char perm[32];
42786 enum machine_mode vmode = V16QImode;
42787 rtx rperm[32], vperm, target, op0, op1;
42788
42789 nelt = d->nelt;
42790
42791 if (!d->one_operand_p)
42792 {
42793 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42794 {
42795 if (TARGET_AVX2
42796 && valid_perm_using_mode_p (V2TImode, d))
42797 {
42798 if (d->testing_p)
42799 return true;
42800
42801 /* Use vperm2i128 insn. The pattern uses
42802 V4DImode instead of V2TImode. */
42803 target = d->target;
42804 if (d->vmode != V4DImode)
42805 target = gen_reg_rtx (V4DImode);
42806 op0 = gen_lowpart (V4DImode, d->op0);
42807 op1 = gen_lowpart (V4DImode, d->op1);
42808 rperm[0]
42809 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42810 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42811 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42812 if (target != d->target)
42813 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42814 return true;
42815 }
42816 return false;
42817 }
42818 }
42819 else
42820 {
42821 if (GET_MODE_SIZE (d->vmode) == 16)
42822 {
42823 if (!TARGET_SSSE3)
42824 return false;
42825 }
42826 else if (GET_MODE_SIZE (d->vmode) == 32)
42827 {
42828 if (!TARGET_AVX2)
42829 return false;
42830
42831 /* V4DImode should be already handled through
42832 expand_vselect by vpermq instruction. */
42833 gcc_assert (d->vmode != V4DImode);
42834
42835 vmode = V32QImode;
42836 if (d->vmode == V8SImode
42837 || d->vmode == V16HImode
42838 || d->vmode == V32QImode)
42839 {
42840 /* First see if vpermq can be used for
42841 V8SImode/V16HImode/V32QImode. */
42842 if (valid_perm_using_mode_p (V4DImode, d))
42843 {
42844 for (i = 0; i < 4; i++)
42845 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42846 if (d->testing_p)
42847 return true;
42848 target = gen_reg_rtx (V4DImode);
42849 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42850 perm, 4, false))
42851 {
42852 emit_move_insn (d->target,
42853 gen_lowpart (d->vmode, target));
42854 return true;
42855 }
42856 return false;
42857 }
42858
42859 /* Next see if vpermd can be used. */
42860 if (valid_perm_using_mode_p (V8SImode, d))
42861 vmode = V8SImode;
42862 }
42863 /* Or if vpermps can be used. */
42864 else if (d->vmode == V8SFmode)
42865 vmode = V8SImode;
42866
42867 if (vmode == V32QImode)
42868 {
42869 /* vpshufb only works intra lanes, it is not
42870 possible to shuffle bytes in between the lanes. */
42871 for (i = 0; i < nelt; ++i)
42872 if ((d->perm[i] ^ i) & (nelt / 2))
42873 return false;
42874 }
42875 }
42876 else
42877 return false;
42878 }
42879
42880 if (d->testing_p)
42881 return true;
42882
42883 if (vmode == V8SImode)
42884 for (i = 0; i < 8; ++i)
42885 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42886 else
42887 {
42888 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42889 if (!d->one_operand_p)
42890 mask = 2 * nelt - 1;
42891 else if (vmode == V16QImode)
42892 mask = nelt - 1;
42893 else
42894 mask = nelt / 2 - 1;
42895
42896 for (i = 0; i < nelt; ++i)
42897 {
42898 unsigned j, e = d->perm[i] & mask;
42899 for (j = 0; j < eltsz; ++j)
42900 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42901 }
42902 }
42903
42904 vperm = gen_rtx_CONST_VECTOR (vmode,
42905 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42906 vperm = force_reg (vmode, vperm);
42907
42908 target = d->target;
42909 if (d->vmode != vmode)
42910 target = gen_reg_rtx (vmode);
42911 op0 = gen_lowpart (vmode, d->op0);
42912 if (d->one_operand_p)
42913 {
42914 if (vmode == V16QImode)
42915 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42916 else if (vmode == V32QImode)
42917 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42918 else if (vmode == V8SFmode)
42919 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42920 else
42921 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42922 }
42923 else
42924 {
42925 op1 = gen_lowpart (vmode, d->op1);
42926 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42927 }
42928 if (target != d->target)
42929 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42930
42931 return true;
42932 }
42933
42934 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42935 in a single instruction. */
42936
42937 static bool
42938 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42939 {
42940 unsigned i, nelt = d->nelt;
42941 unsigned char perm2[MAX_VECT_LEN];
42942
42943 /* Check plain VEC_SELECT first, because AVX has instructions that could
42944 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42945 input where SEL+CONCAT may not. */
42946 if (d->one_operand_p)
42947 {
42948 int mask = nelt - 1;
42949 bool identity_perm = true;
42950 bool broadcast_perm = true;
42951
42952 for (i = 0; i < nelt; i++)
42953 {
42954 perm2[i] = d->perm[i] & mask;
42955 if (perm2[i] != i)
42956 identity_perm = false;
42957 if (perm2[i])
42958 broadcast_perm = false;
42959 }
42960
42961 if (identity_perm)
42962 {
42963 if (!d->testing_p)
42964 emit_move_insn (d->target, d->op0);
42965 return true;
42966 }
42967 else if (broadcast_perm && TARGET_AVX2)
42968 {
42969 /* Use vpbroadcast{b,w,d}. */
42970 rtx (*gen) (rtx, rtx) = NULL;
42971 switch (d->vmode)
42972 {
42973 case V32QImode:
42974 gen = gen_avx2_pbroadcastv32qi_1;
42975 break;
42976 case V16HImode:
42977 gen = gen_avx2_pbroadcastv16hi_1;
42978 break;
42979 case V8SImode:
42980 gen = gen_avx2_pbroadcastv8si_1;
42981 break;
42982 case V16QImode:
42983 gen = gen_avx2_pbroadcastv16qi;
42984 break;
42985 case V8HImode:
42986 gen = gen_avx2_pbroadcastv8hi;
42987 break;
42988 case V8SFmode:
42989 gen = gen_avx2_vec_dupv8sf_1;
42990 break;
42991 /* For other modes prefer other shuffles this function creates. */
42992 default: break;
42993 }
42994 if (gen != NULL)
42995 {
42996 if (!d->testing_p)
42997 emit_insn (gen (d->target, d->op0));
42998 return true;
42999 }
43000 }
43001
43002 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43003 return true;
43004
43005 /* There are plenty of patterns in sse.md that are written for
43006 SEL+CONCAT and are not replicated for a single op. Perhaps
43007 that should be changed, to avoid the nastiness here. */
43008
43009 /* Recognize interleave style patterns, which means incrementing
43010 every other permutation operand. */
43011 for (i = 0; i < nelt; i += 2)
43012 {
43013 perm2[i] = d->perm[i] & mask;
43014 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43015 }
43016 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43017 d->testing_p))
43018 return true;
43019
43020 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43021 if (nelt >= 4)
43022 {
43023 for (i = 0; i < nelt; i += 4)
43024 {
43025 perm2[i + 0] = d->perm[i + 0] & mask;
43026 perm2[i + 1] = d->perm[i + 1] & mask;
43027 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43028 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43029 }
43030
43031 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43032 d->testing_p))
43033 return true;
43034 }
43035 }
43036
43037 /* Finally, try the fully general two operand permute. */
43038 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43039 d->testing_p))
43040 return true;
43041
43042 /* Recognize interleave style patterns with reversed operands. */
43043 if (!d->one_operand_p)
43044 {
43045 for (i = 0; i < nelt; ++i)
43046 {
43047 unsigned e = d->perm[i];
43048 if (e >= nelt)
43049 e -= nelt;
43050 else
43051 e += nelt;
43052 perm2[i] = e;
43053 }
43054
43055 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43056 d->testing_p))
43057 return true;
43058 }
43059
43060 /* Try the SSE4.1 blend variable merge instructions. */
43061 if (expand_vec_perm_blend (d))
43062 return true;
43063
43064 /* Try one of the AVX vpermil variable permutations. */
43065 if (expand_vec_perm_vpermil (d))
43066 return true;
43067
43068 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43069 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43070 if (expand_vec_perm_pshufb (d))
43071 return true;
43072
43073 /* Try the AVX512F vpermi2 instructions. */
43074 rtx vec[64];
43075 enum machine_mode mode = d->vmode;
43076 if (mode == V8DFmode)
43077 mode = V8DImode;
43078 else if (mode == V16SFmode)
43079 mode = V16SImode;
43080 for (i = 0; i < nelt; ++i)
43081 vec[i] = GEN_INT (d->perm[i]);
43082 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43083 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43084 return true;
43085
43086 return false;
43087 }
43088
43089 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43090 in terms of a pair of pshuflw + pshufhw instructions. */
43091
43092 static bool
43093 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43094 {
43095 unsigned char perm2[MAX_VECT_LEN];
43096 unsigned i;
43097 bool ok;
43098
43099 if (d->vmode != V8HImode || !d->one_operand_p)
43100 return false;
43101
43102 /* The two permutations only operate in 64-bit lanes. */
43103 for (i = 0; i < 4; ++i)
43104 if (d->perm[i] >= 4)
43105 return false;
43106 for (i = 4; i < 8; ++i)
43107 if (d->perm[i] < 4)
43108 return false;
43109
43110 if (d->testing_p)
43111 return true;
43112
43113 /* Emit the pshuflw. */
43114 memcpy (perm2, d->perm, 4);
43115 for (i = 4; i < 8; ++i)
43116 perm2[i] = i;
43117 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43118 gcc_assert (ok);
43119
43120 /* Emit the pshufhw. */
43121 memcpy (perm2 + 4, d->perm + 4, 4);
43122 for (i = 0; i < 4; ++i)
43123 perm2[i] = i;
43124 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43125 gcc_assert (ok);
43126
43127 return true;
43128 }
43129
43130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43131 the permutation using the SSSE3 palignr instruction. This succeeds
43132 when all of the elements in PERM fit within one vector and we merely
43133 need to shift them down so that a single vector permutation has a
43134 chance to succeed. */
43135
43136 static bool
43137 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43138 {
43139 unsigned i, nelt = d->nelt;
43140 unsigned min, max;
43141 bool in_order, ok;
43142 rtx shift, target;
43143 struct expand_vec_perm_d dcopy;
43144
43145 /* Even with AVX, palignr only operates on 128-bit vectors. */
43146 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43147 return false;
43148
43149 min = nelt, max = 0;
43150 for (i = 0; i < nelt; ++i)
43151 {
43152 unsigned e = d->perm[i];
43153 if (e < min)
43154 min = e;
43155 if (e > max)
43156 max = e;
43157 }
43158 if (min == 0 || max - min >= nelt)
43159 return false;
43160
43161 /* Given that we have SSSE3, we know we'll be able to implement the
43162 single operand permutation after the palignr with pshufb. */
43163 if (d->testing_p)
43164 return true;
43165
43166 dcopy = *d;
43167 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43168 target = gen_reg_rtx (TImode);
43169 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43170 gen_lowpart (TImode, d->op0), shift));
43171
43172 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43173 dcopy.one_operand_p = true;
43174
43175 in_order = true;
43176 for (i = 0; i < nelt; ++i)
43177 {
43178 unsigned e = dcopy.perm[i] - min;
43179 if (e != i)
43180 in_order = false;
43181 dcopy.perm[i] = e;
43182 }
43183
43184 /* Test for the degenerate case where the alignment by itself
43185 produces the desired permutation. */
43186 if (in_order)
43187 {
43188 emit_move_insn (d->target, dcopy.op0);
43189 return true;
43190 }
43191
43192 ok = expand_vec_perm_1 (&dcopy);
43193 gcc_assert (ok);
43194
43195 return ok;
43196 }
43197
43198 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43199
43200 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43201 a two vector permutation into a single vector permutation by using
43202 an interleave operation to merge the vectors. */
43203
43204 static bool
43205 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43206 {
43207 struct expand_vec_perm_d dremap, dfinal;
43208 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43209 unsigned HOST_WIDE_INT contents;
43210 unsigned char remap[2 * MAX_VECT_LEN];
43211 rtx seq;
43212 bool ok, same_halves = false;
43213
43214 if (GET_MODE_SIZE (d->vmode) == 16)
43215 {
43216 if (d->one_operand_p)
43217 return false;
43218 }
43219 else if (GET_MODE_SIZE (d->vmode) == 32)
43220 {
43221 if (!TARGET_AVX)
43222 return false;
43223 /* For 32-byte modes allow even d->one_operand_p.
43224 The lack of cross-lane shuffling in some instructions
43225 might prevent a single insn shuffle. */
43226 dfinal = *d;
43227 dfinal.testing_p = true;
43228 /* If expand_vec_perm_interleave3 can expand this into
43229 a 3 insn sequence, give up and let it be expanded as
43230 3 insn sequence. While that is one insn longer,
43231 it doesn't need a memory operand and in the common
43232 case that both interleave low and high permutations
43233 with the same operands are adjacent needs 4 insns
43234 for both after CSE. */
43235 if (expand_vec_perm_interleave3 (&dfinal))
43236 return false;
43237 }
43238 else
43239 return false;
43240
43241 /* Examine from whence the elements come. */
43242 contents = 0;
43243 for (i = 0; i < nelt; ++i)
43244 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43245
43246 memset (remap, 0xff, sizeof (remap));
43247 dremap = *d;
43248
43249 if (GET_MODE_SIZE (d->vmode) == 16)
43250 {
43251 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43252
43253 /* Split the two input vectors into 4 halves. */
43254 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43255 h2 = h1 << nelt2;
43256 h3 = h2 << nelt2;
43257 h4 = h3 << nelt2;
43258
43259 /* If the elements from the low halves use interleave low, and similarly
43260 for interleave high. If the elements are from mis-matched halves, we
43261 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43262 if ((contents & (h1 | h3)) == contents)
43263 {
43264 /* punpckl* */
43265 for (i = 0; i < nelt2; ++i)
43266 {
43267 remap[i] = i * 2;
43268 remap[i + nelt] = i * 2 + 1;
43269 dremap.perm[i * 2] = i;
43270 dremap.perm[i * 2 + 1] = i + nelt;
43271 }
43272 if (!TARGET_SSE2 && d->vmode == V4SImode)
43273 dremap.vmode = V4SFmode;
43274 }
43275 else if ((contents & (h2 | h4)) == contents)
43276 {
43277 /* punpckh* */
43278 for (i = 0; i < nelt2; ++i)
43279 {
43280 remap[i + nelt2] = i * 2;
43281 remap[i + nelt + nelt2] = i * 2 + 1;
43282 dremap.perm[i * 2] = i + nelt2;
43283 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43284 }
43285 if (!TARGET_SSE2 && d->vmode == V4SImode)
43286 dremap.vmode = V4SFmode;
43287 }
43288 else if ((contents & (h1 | h4)) == contents)
43289 {
43290 /* shufps */
43291 for (i = 0; i < nelt2; ++i)
43292 {
43293 remap[i] = i;
43294 remap[i + nelt + nelt2] = i + nelt2;
43295 dremap.perm[i] = i;
43296 dremap.perm[i + nelt2] = i + nelt + nelt2;
43297 }
43298 if (nelt != 4)
43299 {
43300 /* shufpd */
43301 dremap.vmode = V2DImode;
43302 dremap.nelt = 2;
43303 dremap.perm[0] = 0;
43304 dremap.perm[1] = 3;
43305 }
43306 }
43307 else if ((contents & (h2 | h3)) == contents)
43308 {
43309 /* shufps */
43310 for (i = 0; i < nelt2; ++i)
43311 {
43312 remap[i + nelt2] = i;
43313 remap[i + nelt] = i + nelt2;
43314 dremap.perm[i] = i + nelt2;
43315 dremap.perm[i + nelt2] = i + nelt;
43316 }
43317 if (nelt != 4)
43318 {
43319 /* shufpd */
43320 dremap.vmode = V2DImode;
43321 dremap.nelt = 2;
43322 dremap.perm[0] = 1;
43323 dremap.perm[1] = 2;
43324 }
43325 }
43326 else
43327 return false;
43328 }
43329 else
43330 {
43331 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43332 unsigned HOST_WIDE_INT q[8];
43333 unsigned int nonzero_halves[4];
43334
43335 /* Split the two input vectors into 8 quarters. */
43336 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43337 for (i = 1; i < 8; ++i)
43338 q[i] = q[0] << (nelt4 * i);
43339 for (i = 0; i < 4; ++i)
43340 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43341 {
43342 nonzero_halves[nzcnt] = i;
43343 ++nzcnt;
43344 }
43345
43346 if (nzcnt == 1)
43347 {
43348 gcc_assert (d->one_operand_p);
43349 nonzero_halves[1] = nonzero_halves[0];
43350 same_halves = true;
43351 }
43352 else if (d->one_operand_p)
43353 {
43354 gcc_assert (nonzero_halves[0] == 0);
43355 gcc_assert (nonzero_halves[1] == 1);
43356 }
43357
43358 if (nzcnt <= 2)
43359 {
43360 if (d->perm[0] / nelt2 == nonzero_halves[1])
43361 {
43362 /* Attempt to increase the likelihood that dfinal
43363 shuffle will be intra-lane. */
43364 char tmph = nonzero_halves[0];
43365 nonzero_halves[0] = nonzero_halves[1];
43366 nonzero_halves[1] = tmph;
43367 }
43368
43369 /* vperm2f128 or vperm2i128. */
43370 for (i = 0; i < nelt2; ++i)
43371 {
43372 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43373 remap[i + nonzero_halves[0] * nelt2] = i;
43374 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43375 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43376 }
43377
43378 if (d->vmode != V8SFmode
43379 && d->vmode != V4DFmode
43380 && d->vmode != V8SImode)
43381 {
43382 dremap.vmode = V8SImode;
43383 dremap.nelt = 8;
43384 for (i = 0; i < 4; ++i)
43385 {
43386 dremap.perm[i] = i + nonzero_halves[0] * 4;
43387 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43388 }
43389 }
43390 }
43391 else if (d->one_operand_p)
43392 return false;
43393 else if (TARGET_AVX2
43394 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43395 {
43396 /* vpunpckl* */
43397 for (i = 0; i < nelt4; ++i)
43398 {
43399 remap[i] = i * 2;
43400 remap[i + nelt] = i * 2 + 1;
43401 remap[i + nelt2] = i * 2 + nelt2;
43402 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43403 dremap.perm[i * 2] = i;
43404 dremap.perm[i * 2 + 1] = i + nelt;
43405 dremap.perm[i * 2 + nelt2] = i + nelt2;
43406 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43407 }
43408 }
43409 else if (TARGET_AVX2
43410 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43411 {
43412 /* vpunpckh* */
43413 for (i = 0; i < nelt4; ++i)
43414 {
43415 remap[i + nelt4] = i * 2;
43416 remap[i + nelt + nelt4] = i * 2 + 1;
43417 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43418 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43419 dremap.perm[i * 2] = i + nelt4;
43420 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43421 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43422 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43423 }
43424 }
43425 else
43426 return false;
43427 }
43428
43429 /* Use the remapping array set up above to move the elements from their
43430 swizzled locations into their final destinations. */
43431 dfinal = *d;
43432 for (i = 0; i < nelt; ++i)
43433 {
43434 unsigned e = remap[d->perm[i]];
43435 gcc_assert (e < nelt);
43436 /* If same_halves is true, both halves of the remapped vector are the
43437 same. Avoid cross-lane accesses if possible. */
43438 if (same_halves && i >= nelt2)
43439 {
43440 gcc_assert (e < nelt2);
43441 dfinal.perm[i] = e + nelt2;
43442 }
43443 else
43444 dfinal.perm[i] = e;
43445 }
43446 if (!d->testing_p)
43447 {
43448 dremap.target = gen_reg_rtx (dremap.vmode);
43449 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43450 }
43451 dfinal.op1 = dfinal.op0;
43452 dfinal.one_operand_p = true;
43453
43454 /* Test if the final remap can be done with a single insn. For V4SFmode or
43455 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43456 start_sequence ();
43457 ok = expand_vec_perm_1 (&dfinal);
43458 seq = get_insns ();
43459 end_sequence ();
43460
43461 if (!ok)
43462 return false;
43463
43464 if (d->testing_p)
43465 return true;
43466
43467 if (dremap.vmode != dfinal.vmode)
43468 {
43469 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43470 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43471 }
43472
43473 ok = expand_vec_perm_1 (&dremap);
43474 gcc_assert (ok);
43475
43476 emit_insn (seq);
43477 return true;
43478 }
43479
43480 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43481 a single vector cross-lane permutation into vpermq followed
43482 by any of the single insn permutations. */
43483
43484 static bool
43485 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43486 {
43487 struct expand_vec_perm_d dremap, dfinal;
43488 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43489 unsigned contents[2];
43490 bool ok;
43491
43492 if (!(TARGET_AVX2
43493 && (d->vmode == V32QImode || d->vmode == V16HImode)
43494 && d->one_operand_p))
43495 return false;
43496
43497 contents[0] = 0;
43498 contents[1] = 0;
43499 for (i = 0; i < nelt2; ++i)
43500 {
43501 contents[0] |= 1u << (d->perm[i] / nelt4);
43502 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43503 }
43504
43505 for (i = 0; i < 2; ++i)
43506 {
43507 unsigned int cnt = 0;
43508 for (j = 0; j < 4; ++j)
43509 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43510 return false;
43511 }
43512
43513 if (d->testing_p)
43514 return true;
43515
43516 dremap = *d;
43517 dremap.vmode = V4DImode;
43518 dremap.nelt = 4;
43519 dremap.target = gen_reg_rtx (V4DImode);
43520 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43521 dremap.op1 = dremap.op0;
43522 dremap.one_operand_p = true;
43523 for (i = 0; i < 2; ++i)
43524 {
43525 unsigned int cnt = 0;
43526 for (j = 0; j < 4; ++j)
43527 if ((contents[i] & (1u << j)) != 0)
43528 dremap.perm[2 * i + cnt++] = j;
43529 for (; cnt < 2; ++cnt)
43530 dremap.perm[2 * i + cnt] = 0;
43531 }
43532
43533 dfinal = *d;
43534 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43535 dfinal.op1 = dfinal.op0;
43536 dfinal.one_operand_p = true;
43537 for (i = 0, j = 0; i < nelt; ++i)
43538 {
43539 if (i == nelt2)
43540 j = 2;
43541 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43542 if ((d->perm[i] / nelt4) == dremap.perm[j])
43543 ;
43544 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43545 dfinal.perm[i] |= nelt4;
43546 else
43547 gcc_unreachable ();
43548 }
43549
43550 ok = expand_vec_perm_1 (&dremap);
43551 gcc_assert (ok);
43552
43553 ok = expand_vec_perm_1 (&dfinal);
43554 gcc_assert (ok);
43555
43556 return true;
43557 }
43558
43559 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43560 a vector permutation using two instructions, vperm2f128 resp.
43561 vperm2i128 followed by any single in-lane permutation. */
43562
43563 static bool
43564 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43565 {
43566 struct expand_vec_perm_d dfirst, dsecond;
43567 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43568 bool ok;
43569
43570 if (!TARGET_AVX
43571 || GET_MODE_SIZE (d->vmode) != 32
43572 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43573 return false;
43574
43575 dsecond = *d;
43576 dsecond.one_operand_p = false;
43577 dsecond.testing_p = true;
43578
43579 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43580 immediate. For perm < 16 the second permutation uses
43581 d->op0 as first operand, for perm >= 16 it uses d->op1
43582 as first operand. The second operand is the result of
43583 vperm2[fi]128. */
43584 for (perm = 0; perm < 32; perm++)
43585 {
43586 /* Ignore permutations which do not move anything cross-lane. */
43587 if (perm < 16)
43588 {
43589 /* The second shuffle for e.g. V4DFmode has
43590 0123 and ABCD operands.
43591 Ignore AB23, as 23 is already in the second lane
43592 of the first operand. */
43593 if ((perm & 0xc) == (1 << 2)) continue;
43594 /* And 01CD, as 01 is in the first lane of the first
43595 operand. */
43596 if ((perm & 3) == 0) continue;
43597 /* And 4567, as then the vperm2[fi]128 doesn't change
43598 anything on the original 4567 second operand. */
43599 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43600 }
43601 else
43602 {
43603 /* The second shuffle for e.g. V4DFmode has
43604 4567 and ABCD operands.
43605 Ignore AB67, as 67 is already in the second lane
43606 of the first operand. */
43607 if ((perm & 0xc) == (3 << 2)) continue;
43608 /* And 45CD, as 45 is in the first lane of the first
43609 operand. */
43610 if ((perm & 3) == 2) continue;
43611 /* And 0123, as then the vperm2[fi]128 doesn't change
43612 anything on the original 0123 first operand. */
43613 if ((perm & 0xf) == (1 << 2)) continue;
43614 }
43615
43616 for (i = 0; i < nelt; i++)
43617 {
43618 j = d->perm[i] / nelt2;
43619 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43620 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43621 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43622 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43623 else
43624 break;
43625 }
43626
43627 if (i == nelt)
43628 {
43629 start_sequence ();
43630 ok = expand_vec_perm_1 (&dsecond);
43631 end_sequence ();
43632 }
43633 else
43634 ok = false;
43635
43636 if (ok)
43637 {
43638 if (d->testing_p)
43639 return true;
43640
43641 /* Found a usable second shuffle. dfirst will be
43642 vperm2f128 on d->op0 and d->op1. */
43643 dsecond.testing_p = false;
43644 dfirst = *d;
43645 dfirst.target = gen_reg_rtx (d->vmode);
43646 for (i = 0; i < nelt; i++)
43647 dfirst.perm[i] = (i & (nelt2 - 1))
43648 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43649
43650 ok = expand_vec_perm_1 (&dfirst);
43651 gcc_assert (ok);
43652
43653 /* And dsecond is some single insn shuffle, taking
43654 d->op0 and result of vperm2f128 (if perm < 16) or
43655 d->op1 and result of vperm2f128 (otherwise). */
43656 dsecond.op1 = dfirst.target;
43657 if (perm >= 16)
43658 dsecond.op0 = dfirst.op1;
43659
43660 ok = expand_vec_perm_1 (&dsecond);
43661 gcc_assert (ok);
43662
43663 return true;
43664 }
43665
43666 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43667 if (d->one_operand_p)
43668 return false;
43669 }
43670
43671 return false;
43672 }
43673
43674 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43675 a two vector permutation using 2 intra-lane interleave insns
43676 and cross-lane shuffle for 32-byte vectors. */
43677
43678 static bool
43679 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43680 {
43681 unsigned i, nelt;
43682 rtx (*gen) (rtx, rtx, rtx);
43683
43684 if (d->one_operand_p)
43685 return false;
43686 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43687 ;
43688 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43689 ;
43690 else
43691 return false;
43692
43693 nelt = d->nelt;
43694 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43695 return false;
43696 for (i = 0; i < nelt; i += 2)
43697 if (d->perm[i] != d->perm[0] + i / 2
43698 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43699 return false;
43700
43701 if (d->testing_p)
43702 return true;
43703
43704 switch (d->vmode)
43705 {
43706 case V32QImode:
43707 if (d->perm[0])
43708 gen = gen_vec_interleave_highv32qi;
43709 else
43710 gen = gen_vec_interleave_lowv32qi;
43711 break;
43712 case V16HImode:
43713 if (d->perm[0])
43714 gen = gen_vec_interleave_highv16hi;
43715 else
43716 gen = gen_vec_interleave_lowv16hi;
43717 break;
43718 case V8SImode:
43719 if (d->perm[0])
43720 gen = gen_vec_interleave_highv8si;
43721 else
43722 gen = gen_vec_interleave_lowv8si;
43723 break;
43724 case V4DImode:
43725 if (d->perm[0])
43726 gen = gen_vec_interleave_highv4di;
43727 else
43728 gen = gen_vec_interleave_lowv4di;
43729 break;
43730 case V8SFmode:
43731 if (d->perm[0])
43732 gen = gen_vec_interleave_highv8sf;
43733 else
43734 gen = gen_vec_interleave_lowv8sf;
43735 break;
43736 case V4DFmode:
43737 if (d->perm[0])
43738 gen = gen_vec_interleave_highv4df;
43739 else
43740 gen = gen_vec_interleave_lowv4df;
43741 break;
43742 default:
43743 gcc_unreachable ();
43744 }
43745
43746 emit_insn (gen (d->target, d->op0, d->op1));
43747 return true;
43748 }
43749
43750 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43751 a single vector permutation using a single intra-lane vector
43752 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43753 the non-swapped and swapped vectors together. */
43754
43755 static bool
43756 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43757 {
43758 struct expand_vec_perm_d dfirst, dsecond;
43759 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43760 rtx seq;
43761 bool ok;
43762 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43763
43764 if (!TARGET_AVX
43765 || TARGET_AVX2
43766 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43767 || !d->one_operand_p)
43768 return false;
43769
43770 dfirst = *d;
43771 for (i = 0; i < nelt; i++)
43772 dfirst.perm[i] = 0xff;
43773 for (i = 0, msk = 0; i < nelt; i++)
43774 {
43775 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43776 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43777 return false;
43778 dfirst.perm[j] = d->perm[i];
43779 if (j != i)
43780 msk |= (1 << i);
43781 }
43782 for (i = 0; i < nelt; i++)
43783 if (dfirst.perm[i] == 0xff)
43784 dfirst.perm[i] = i;
43785
43786 if (!d->testing_p)
43787 dfirst.target = gen_reg_rtx (dfirst.vmode);
43788
43789 start_sequence ();
43790 ok = expand_vec_perm_1 (&dfirst);
43791 seq = get_insns ();
43792 end_sequence ();
43793
43794 if (!ok)
43795 return false;
43796
43797 if (d->testing_p)
43798 return true;
43799
43800 emit_insn (seq);
43801
43802 dsecond = *d;
43803 dsecond.op0 = dfirst.target;
43804 dsecond.op1 = dfirst.target;
43805 dsecond.one_operand_p = true;
43806 dsecond.target = gen_reg_rtx (dsecond.vmode);
43807 for (i = 0; i < nelt; i++)
43808 dsecond.perm[i] = i ^ nelt2;
43809
43810 ok = expand_vec_perm_1 (&dsecond);
43811 gcc_assert (ok);
43812
43813 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43814 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43815 return true;
43816 }
43817
43818 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43819 permutation using two vperm2f128, followed by a vshufpd insn blending
43820 the two vectors together. */
43821
43822 static bool
43823 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43824 {
43825 struct expand_vec_perm_d dfirst, dsecond, dthird;
43826 bool ok;
43827
43828 if (!TARGET_AVX || (d->vmode != V4DFmode))
43829 return false;
43830
43831 if (d->testing_p)
43832 return true;
43833
43834 dfirst = *d;
43835 dsecond = *d;
43836 dthird = *d;
43837
43838 dfirst.perm[0] = (d->perm[0] & ~1);
43839 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43840 dfirst.perm[2] = (d->perm[2] & ~1);
43841 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43842 dsecond.perm[0] = (d->perm[1] & ~1);
43843 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43844 dsecond.perm[2] = (d->perm[3] & ~1);
43845 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43846 dthird.perm[0] = (d->perm[0] % 2);
43847 dthird.perm[1] = (d->perm[1] % 2) + 4;
43848 dthird.perm[2] = (d->perm[2] % 2) + 2;
43849 dthird.perm[3] = (d->perm[3] % 2) + 6;
43850
43851 dfirst.target = gen_reg_rtx (dfirst.vmode);
43852 dsecond.target = gen_reg_rtx (dsecond.vmode);
43853 dthird.op0 = dfirst.target;
43854 dthird.op1 = dsecond.target;
43855 dthird.one_operand_p = false;
43856
43857 canonicalize_perm (&dfirst);
43858 canonicalize_perm (&dsecond);
43859
43860 ok = expand_vec_perm_1 (&dfirst)
43861 && expand_vec_perm_1 (&dsecond)
43862 && expand_vec_perm_1 (&dthird);
43863
43864 gcc_assert (ok);
43865
43866 return true;
43867 }
43868
43869 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43870 permutation with two pshufb insns and an ior. We should have already
43871 failed all two instruction sequences. */
43872
43873 static bool
43874 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43875 {
43876 rtx rperm[2][16], vperm, l, h, op, m128;
43877 unsigned int i, nelt, eltsz;
43878
43879 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43880 return false;
43881 gcc_assert (!d->one_operand_p);
43882
43883 if (d->testing_p)
43884 return true;
43885
43886 nelt = d->nelt;
43887 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43888
43889 /* Generate two permutation masks. If the required element is within
43890 the given vector it is shuffled into the proper lane. If the required
43891 element is in the other vector, force a zero into the lane by setting
43892 bit 7 in the permutation mask. */
43893 m128 = GEN_INT (-128);
43894 for (i = 0; i < nelt; ++i)
43895 {
43896 unsigned j, e = d->perm[i];
43897 unsigned which = (e >= nelt);
43898 if (e >= nelt)
43899 e -= nelt;
43900
43901 for (j = 0; j < eltsz; ++j)
43902 {
43903 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43904 rperm[1-which][i*eltsz + j] = m128;
43905 }
43906 }
43907
43908 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43909 vperm = force_reg (V16QImode, vperm);
43910
43911 l = gen_reg_rtx (V16QImode);
43912 op = gen_lowpart (V16QImode, d->op0);
43913 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43914
43915 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43916 vperm = force_reg (V16QImode, vperm);
43917
43918 h = gen_reg_rtx (V16QImode);
43919 op = gen_lowpart (V16QImode, d->op1);
43920 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43921
43922 op = d->target;
43923 if (d->vmode != V16QImode)
43924 op = gen_reg_rtx (V16QImode);
43925 emit_insn (gen_iorv16qi3 (op, l, h));
43926 if (op != d->target)
43927 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43928
43929 return true;
43930 }
43931
43932 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43933 with two vpshufb insns, vpermq and vpor. We should have already failed
43934 all two or three instruction sequences. */
43935
43936 static bool
43937 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43938 {
43939 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43940 unsigned int i, nelt, eltsz;
43941
43942 if (!TARGET_AVX2
43943 || !d->one_operand_p
43944 || (d->vmode != V32QImode && d->vmode != V16HImode))
43945 return false;
43946
43947 if (d->testing_p)
43948 return true;
43949
43950 nelt = d->nelt;
43951 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43952
43953 /* Generate two permutation masks. If the required element is within
43954 the same lane, it is shuffled in. If the required element from the
43955 other lane, force a zero by setting bit 7 in the permutation mask.
43956 In the other mask the mask has non-negative elements if element
43957 is requested from the other lane, but also moved to the other lane,
43958 so that the result of vpshufb can have the two V2TImode halves
43959 swapped. */
43960 m128 = GEN_INT (-128);
43961 for (i = 0; i < nelt; ++i)
43962 {
43963 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43964 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43965
43966 for (j = 0; j < eltsz; ++j)
43967 {
43968 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43969 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43970 }
43971 }
43972
43973 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43974 vperm = force_reg (V32QImode, vperm);
43975
43976 h = gen_reg_rtx (V32QImode);
43977 op = gen_lowpart (V32QImode, d->op0);
43978 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43979
43980 /* Swap the 128-byte lanes of h into hp. */
43981 hp = gen_reg_rtx (V4DImode);
43982 op = gen_lowpart (V4DImode, h);
43983 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43984 const1_rtx));
43985
43986 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43987 vperm = force_reg (V32QImode, vperm);
43988
43989 l = gen_reg_rtx (V32QImode);
43990 op = gen_lowpart (V32QImode, d->op0);
43991 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43992
43993 op = d->target;
43994 if (d->vmode != V32QImode)
43995 op = gen_reg_rtx (V32QImode);
43996 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43997 if (op != d->target)
43998 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43999
44000 return true;
44001 }
44002
44003 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44004 and extract-odd permutations of two V32QImode and V16QImode operand
44005 with two vpshufb insns, vpor and vpermq. We should have already
44006 failed all two or three instruction sequences. */
44007
44008 static bool
44009 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44010 {
44011 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44012 unsigned int i, nelt, eltsz;
44013
44014 if (!TARGET_AVX2
44015 || d->one_operand_p
44016 || (d->vmode != V32QImode && d->vmode != V16HImode))
44017 return false;
44018
44019 for (i = 0; i < d->nelt; ++i)
44020 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44021 return false;
44022
44023 if (d->testing_p)
44024 return true;
44025
44026 nelt = d->nelt;
44027 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44028
44029 /* Generate two permutation masks. In the first permutation mask
44030 the first quarter will contain indexes for the first half
44031 of the op0, the second quarter will contain bit 7 set, third quarter
44032 will contain indexes for the second half of the op0 and the
44033 last quarter bit 7 set. In the second permutation mask
44034 the first quarter will contain bit 7 set, the second quarter
44035 indexes for the first half of the op1, the third quarter bit 7 set
44036 and last quarter indexes for the second half of the op1.
44037 I.e. the first mask e.g. for V32QImode extract even will be:
44038 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44039 (all values masked with 0xf except for -128) and second mask
44040 for extract even will be
44041 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44042 m128 = GEN_INT (-128);
44043 for (i = 0; i < nelt; ++i)
44044 {
44045 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44046 unsigned which = d->perm[i] >= nelt;
44047 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44048
44049 for (j = 0; j < eltsz; ++j)
44050 {
44051 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44052 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44053 }
44054 }
44055
44056 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44057 vperm = force_reg (V32QImode, vperm);
44058
44059 l = gen_reg_rtx (V32QImode);
44060 op = gen_lowpart (V32QImode, d->op0);
44061 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44062
44063 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44064 vperm = force_reg (V32QImode, vperm);
44065
44066 h = gen_reg_rtx (V32QImode);
44067 op = gen_lowpart (V32QImode, d->op1);
44068 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44069
44070 ior = gen_reg_rtx (V32QImode);
44071 emit_insn (gen_iorv32qi3 (ior, l, h));
44072
44073 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44074 op = gen_reg_rtx (V4DImode);
44075 ior = gen_lowpart (V4DImode, ior);
44076 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44077 const1_rtx, GEN_INT (3)));
44078 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44079
44080 return true;
44081 }
44082
44083 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44084 and extract-odd permutations. */
44085
44086 static bool
44087 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44088 {
44089 rtx t1, t2, t3, t4, t5;
44090
44091 switch (d->vmode)
44092 {
44093 case V4DFmode:
44094 if (d->testing_p)
44095 break;
44096 t1 = gen_reg_rtx (V4DFmode);
44097 t2 = gen_reg_rtx (V4DFmode);
44098
44099 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44100 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44101 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44102
44103 /* Now an unpck[lh]pd will produce the result required. */
44104 if (odd)
44105 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44106 else
44107 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44108 emit_insn (t3);
44109 break;
44110
44111 case V8SFmode:
44112 {
44113 int mask = odd ? 0xdd : 0x88;
44114
44115 if (d->testing_p)
44116 break;
44117 t1 = gen_reg_rtx (V8SFmode);
44118 t2 = gen_reg_rtx (V8SFmode);
44119 t3 = gen_reg_rtx (V8SFmode);
44120
44121 /* Shuffle within the 128-bit lanes to produce:
44122 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44123 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44124 GEN_INT (mask)));
44125
44126 /* Shuffle the lanes around to produce:
44127 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44128 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44129 GEN_INT (0x3)));
44130
44131 /* Shuffle within the 128-bit lanes to produce:
44132 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44133 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44134
44135 /* Shuffle within the 128-bit lanes to produce:
44136 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44137 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44138
44139 /* Shuffle the lanes around to produce:
44140 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44141 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44142 GEN_INT (0x20)));
44143 }
44144 break;
44145
44146 case V2DFmode:
44147 case V4SFmode:
44148 case V2DImode:
44149 case V4SImode:
44150 /* These are always directly implementable by expand_vec_perm_1. */
44151 gcc_unreachable ();
44152
44153 case V8HImode:
44154 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44155 return expand_vec_perm_pshufb2 (d);
44156 else
44157 {
44158 if (d->testing_p)
44159 break;
44160 /* We need 2*log2(N)-1 operations to achieve odd/even
44161 with interleave. */
44162 t1 = gen_reg_rtx (V8HImode);
44163 t2 = gen_reg_rtx (V8HImode);
44164 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44165 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44166 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44167 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44168 if (odd)
44169 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44170 else
44171 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44172 emit_insn (t3);
44173 }
44174 break;
44175
44176 case V16QImode:
44177 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44178 return expand_vec_perm_pshufb2 (d);
44179 else
44180 {
44181 if (d->testing_p)
44182 break;
44183 t1 = gen_reg_rtx (V16QImode);
44184 t2 = gen_reg_rtx (V16QImode);
44185 t3 = gen_reg_rtx (V16QImode);
44186 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44187 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44188 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44189 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44190 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44191 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44192 if (odd)
44193 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44194 else
44195 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44196 emit_insn (t3);
44197 }
44198 break;
44199
44200 case V16HImode:
44201 case V32QImode:
44202 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44203
44204 case V4DImode:
44205 if (!TARGET_AVX2)
44206 {
44207 struct expand_vec_perm_d d_copy = *d;
44208 d_copy.vmode = V4DFmode;
44209 if (d->testing_p)
44210 d_copy.target = gen_lowpart (V4DFmode, d->target);
44211 else
44212 d_copy.target = gen_reg_rtx (V4DFmode);
44213 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44214 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44215 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44216 {
44217 if (!d->testing_p)
44218 emit_move_insn (d->target,
44219 gen_lowpart (V4DImode, d_copy.target));
44220 return true;
44221 }
44222 return false;
44223 }
44224
44225 if (d->testing_p)
44226 break;
44227
44228 t1 = gen_reg_rtx (V4DImode);
44229 t2 = gen_reg_rtx (V4DImode);
44230
44231 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44232 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44233 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44234
44235 /* Now an vpunpck[lh]qdq will produce the result required. */
44236 if (odd)
44237 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44238 else
44239 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44240 emit_insn (t3);
44241 break;
44242
44243 case V8SImode:
44244 if (!TARGET_AVX2)
44245 {
44246 struct expand_vec_perm_d d_copy = *d;
44247 d_copy.vmode = V8SFmode;
44248 if (d->testing_p)
44249 d_copy.target = gen_lowpart (V8SFmode, d->target);
44250 else
44251 d_copy.target = gen_reg_rtx (V8SFmode);
44252 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44253 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44254 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44255 {
44256 if (!d->testing_p)
44257 emit_move_insn (d->target,
44258 gen_lowpart (V8SImode, d_copy.target));
44259 return true;
44260 }
44261 return false;
44262 }
44263
44264 if (d->testing_p)
44265 break;
44266
44267 t1 = gen_reg_rtx (V8SImode);
44268 t2 = gen_reg_rtx (V8SImode);
44269 t3 = gen_reg_rtx (V4DImode);
44270 t4 = gen_reg_rtx (V4DImode);
44271 t5 = gen_reg_rtx (V4DImode);
44272
44273 /* Shuffle the lanes around into
44274 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44275 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44276 gen_lowpart (V4DImode, d->op1),
44277 GEN_INT (0x20)));
44278 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44279 gen_lowpart (V4DImode, d->op1),
44280 GEN_INT (0x31)));
44281
44282 /* Swap the 2nd and 3rd position in each lane into
44283 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44284 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44285 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44286 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44287 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44288
44289 /* Now an vpunpck[lh]qdq will produce
44290 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44291 if (odd)
44292 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44293 gen_lowpart (V4DImode, t2));
44294 else
44295 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44296 gen_lowpart (V4DImode, t2));
44297 emit_insn (t3);
44298 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44299 break;
44300
44301 default:
44302 gcc_unreachable ();
44303 }
44304
44305 return true;
44306 }
44307
44308 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44309 extract-even and extract-odd permutations. */
44310
44311 static bool
44312 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44313 {
44314 unsigned i, odd, nelt = d->nelt;
44315
44316 odd = d->perm[0];
44317 if (odd != 0 && odd != 1)
44318 return false;
44319
44320 for (i = 1; i < nelt; ++i)
44321 if (d->perm[i] != 2 * i + odd)
44322 return false;
44323
44324 return expand_vec_perm_even_odd_1 (d, odd);
44325 }
44326
44327 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44328 permutations. We assume that expand_vec_perm_1 has already failed. */
44329
44330 static bool
44331 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44332 {
44333 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44334 enum machine_mode vmode = d->vmode;
44335 unsigned char perm2[4];
44336 rtx op0 = d->op0, dest;
44337 bool ok;
44338
44339 switch (vmode)
44340 {
44341 case V4DFmode:
44342 case V8SFmode:
44343 /* These are special-cased in sse.md so that we can optionally
44344 use the vbroadcast instruction. They expand to two insns
44345 if the input happens to be in a register. */
44346 gcc_unreachable ();
44347
44348 case V2DFmode:
44349 case V2DImode:
44350 case V4SFmode:
44351 case V4SImode:
44352 /* These are always implementable using standard shuffle patterns. */
44353 gcc_unreachable ();
44354
44355 case V8HImode:
44356 case V16QImode:
44357 /* These can be implemented via interleave. We save one insn by
44358 stopping once we have promoted to V4SImode and then use pshufd. */
44359 if (d->testing_p)
44360 return true;
44361 do
44362 {
44363 rtx dest;
44364 rtx (*gen) (rtx, rtx, rtx)
44365 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44366 : gen_vec_interleave_lowv8hi;
44367
44368 if (elt >= nelt2)
44369 {
44370 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44371 : gen_vec_interleave_highv8hi;
44372 elt -= nelt2;
44373 }
44374 nelt2 /= 2;
44375
44376 dest = gen_reg_rtx (vmode);
44377 emit_insn (gen (dest, op0, op0));
44378 vmode = get_mode_wider_vector (vmode);
44379 op0 = gen_lowpart (vmode, dest);
44380 }
44381 while (vmode != V4SImode);
44382
44383 memset (perm2, elt, 4);
44384 dest = gen_reg_rtx (V4SImode);
44385 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44386 gcc_assert (ok);
44387 if (!d->testing_p)
44388 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44389 return true;
44390
44391 case V32QImode:
44392 case V16HImode:
44393 case V8SImode:
44394 case V4DImode:
44395 /* For AVX2 broadcasts of the first element vpbroadcast* or
44396 vpermq should be used by expand_vec_perm_1. */
44397 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44398 return false;
44399
44400 default:
44401 gcc_unreachable ();
44402 }
44403 }
44404
44405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44406 broadcast permutations. */
44407
44408 static bool
44409 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44410 {
44411 unsigned i, elt, nelt = d->nelt;
44412
44413 if (!d->one_operand_p)
44414 return false;
44415
44416 elt = d->perm[0];
44417 for (i = 1; i < nelt; ++i)
44418 if (d->perm[i] != elt)
44419 return false;
44420
44421 return expand_vec_perm_broadcast_1 (d);
44422 }
44423
44424 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44425 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44426 all the shorter instruction sequences. */
44427
44428 static bool
44429 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44430 {
44431 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44432 unsigned int i, nelt, eltsz;
44433 bool used[4];
44434
44435 if (!TARGET_AVX2
44436 || d->one_operand_p
44437 || (d->vmode != V32QImode && d->vmode != V16HImode))
44438 return false;
44439
44440 if (d->testing_p)
44441 return true;
44442
44443 nelt = d->nelt;
44444 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44445
44446 /* Generate 4 permutation masks. If the required element is within
44447 the same lane, it is shuffled in. If the required element from the
44448 other lane, force a zero by setting bit 7 in the permutation mask.
44449 In the other mask the mask has non-negative elements if element
44450 is requested from the other lane, but also moved to the other lane,
44451 so that the result of vpshufb can have the two V2TImode halves
44452 swapped. */
44453 m128 = GEN_INT (-128);
44454 for (i = 0; i < 32; ++i)
44455 {
44456 rperm[0][i] = m128;
44457 rperm[1][i] = m128;
44458 rperm[2][i] = m128;
44459 rperm[3][i] = m128;
44460 }
44461 used[0] = false;
44462 used[1] = false;
44463 used[2] = false;
44464 used[3] = false;
44465 for (i = 0; i < nelt; ++i)
44466 {
44467 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44468 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44469 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44470
44471 for (j = 0; j < eltsz; ++j)
44472 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44473 used[which] = true;
44474 }
44475
44476 for (i = 0; i < 2; ++i)
44477 {
44478 if (!used[2 * i + 1])
44479 {
44480 h[i] = NULL_RTX;
44481 continue;
44482 }
44483 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44484 gen_rtvec_v (32, rperm[2 * i + 1]));
44485 vperm = force_reg (V32QImode, vperm);
44486 h[i] = gen_reg_rtx (V32QImode);
44487 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44488 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44489 }
44490
44491 /* Swap the 128-byte lanes of h[X]. */
44492 for (i = 0; i < 2; ++i)
44493 {
44494 if (h[i] == NULL_RTX)
44495 continue;
44496 op = gen_reg_rtx (V4DImode);
44497 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44498 const2_rtx, GEN_INT (3), const0_rtx,
44499 const1_rtx));
44500 h[i] = gen_lowpart (V32QImode, op);
44501 }
44502
44503 for (i = 0; i < 2; ++i)
44504 {
44505 if (!used[2 * i])
44506 {
44507 l[i] = NULL_RTX;
44508 continue;
44509 }
44510 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44511 vperm = force_reg (V32QImode, vperm);
44512 l[i] = gen_reg_rtx (V32QImode);
44513 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44514 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44515 }
44516
44517 for (i = 0; i < 2; ++i)
44518 {
44519 if (h[i] && l[i])
44520 {
44521 op = gen_reg_rtx (V32QImode);
44522 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44523 l[i] = op;
44524 }
44525 else if (h[i])
44526 l[i] = h[i];
44527 }
44528
44529 gcc_assert (l[0] && l[1]);
44530 op = d->target;
44531 if (d->vmode != V32QImode)
44532 op = gen_reg_rtx (V32QImode);
44533 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44534 if (op != d->target)
44535 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44536 return true;
44537 }
44538
44539 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44540 With all of the interface bits taken care of, perform the expansion
44541 in D and return true on success. */
44542
44543 static bool
44544 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44545 {
44546 /* Try a single instruction expansion. */
44547 if (expand_vec_perm_1 (d))
44548 return true;
44549
44550 /* Try sequences of two instructions. */
44551
44552 if (expand_vec_perm_pshuflw_pshufhw (d))
44553 return true;
44554
44555 if (expand_vec_perm_palignr (d))
44556 return true;
44557
44558 if (expand_vec_perm_interleave2 (d))
44559 return true;
44560
44561 if (expand_vec_perm_broadcast (d))
44562 return true;
44563
44564 if (expand_vec_perm_vpermq_perm_1 (d))
44565 return true;
44566
44567 if (expand_vec_perm_vperm2f128 (d))
44568 return true;
44569
44570 /* Try sequences of three instructions. */
44571
44572 if (expand_vec_perm_2vperm2f128_vshuf (d))
44573 return true;
44574
44575 if (expand_vec_perm_pshufb2 (d))
44576 return true;
44577
44578 if (expand_vec_perm_interleave3 (d))
44579 return true;
44580
44581 if (expand_vec_perm_vperm2f128_vblend (d))
44582 return true;
44583
44584 /* Try sequences of four instructions. */
44585
44586 if (expand_vec_perm_vpshufb2_vpermq (d))
44587 return true;
44588
44589 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44590 return true;
44591
44592 /* ??? Look for narrow permutations whose element orderings would
44593 allow the promotion to a wider mode. */
44594
44595 /* ??? Look for sequences of interleave or a wider permute that place
44596 the data into the correct lanes for a half-vector shuffle like
44597 pshuf[lh]w or vpermilps. */
44598
44599 /* ??? Look for sequences of interleave that produce the desired results.
44600 The combinatorics of punpck[lh] get pretty ugly... */
44601
44602 if (expand_vec_perm_even_odd (d))
44603 return true;
44604
44605 /* Even longer sequences. */
44606 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44607 return true;
44608
44609 return false;
44610 }
44611
44612 /* If a permutation only uses one operand, make it clear. Returns true
44613 if the permutation references both operands. */
44614
44615 static bool
44616 canonicalize_perm (struct expand_vec_perm_d *d)
44617 {
44618 int i, which, nelt = d->nelt;
44619
44620 for (i = which = 0; i < nelt; ++i)
44621 which |= (d->perm[i] < nelt ? 1 : 2);
44622
44623 d->one_operand_p = true;
44624 switch (which)
44625 {
44626 default:
44627 gcc_unreachable();
44628
44629 case 3:
44630 if (!rtx_equal_p (d->op0, d->op1))
44631 {
44632 d->one_operand_p = false;
44633 break;
44634 }
44635 /* The elements of PERM do not suggest that only the first operand
44636 is used, but both operands are identical. Allow easier matching
44637 of the permutation by folding the permutation into the single
44638 input vector. */
44639 /* FALLTHRU */
44640
44641 case 2:
44642 for (i = 0; i < nelt; ++i)
44643 d->perm[i] &= nelt - 1;
44644 d->op0 = d->op1;
44645 break;
44646
44647 case 1:
44648 d->op1 = d->op0;
44649 break;
44650 }
44651
44652 return (which == 3);
44653 }
44654
44655 bool
44656 ix86_expand_vec_perm_const (rtx operands[4])
44657 {
44658 struct expand_vec_perm_d d;
44659 unsigned char perm[MAX_VECT_LEN];
44660 int i, nelt;
44661 bool two_args;
44662 rtx sel;
44663
44664 d.target = operands[0];
44665 d.op0 = operands[1];
44666 d.op1 = operands[2];
44667 sel = operands[3];
44668
44669 d.vmode = GET_MODE (d.target);
44670 gcc_assert (VECTOR_MODE_P (d.vmode));
44671 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44672 d.testing_p = false;
44673
44674 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44675 gcc_assert (XVECLEN (sel, 0) == nelt);
44676 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44677
44678 for (i = 0; i < nelt; ++i)
44679 {
44680 rtx e = XVECEXP (sel, 0, i);
44681 int ei = INTVAL (e) & (2 * nelt - 1);
44682 d.perm[i] = ei;
44683 perm[i] = ei;
44684 }
44685
44686 two_args = canonicalize_perm (&d);
44687
44688 if (ix86_expand_vec_perm_const_1 (&d))
44689 return true;
44690
44691 /* If the selector says both arguments are needed, but the operands are the
44692 same, the above tried to expand with one_operand_p and flattened selector.
44693 If that didn't work, retry without one_operand_p; we succeeded with that
44694 during testing. */
44695 if (two_args && d.one_operand_p)
44696 {
44697 d.one_operand_p = false;
44698 memcpy (d.perm, perm, sizeof (perm));
44699 return ix86_expand_vec_perm_const_1 (&d);
44700 }
44701
44702 return false;
44703 }
44704
44705 /* Implement targetm.vectorize.vec_perm_const_ok. */
44706
44707 static bool
44708 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44709 const unsigned char *sel)
44710 {
44711 struct expand_vec_perm_d d;
44712 unsigned int i, nelt, which;
44713 bool ret;
44714
44715 d.vmode = vmode;
44716 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44717 d.testing_p = true;
44718
44719 /* Given sufficient ISA support we can just return true here
44720 for selected vector modes. */
44721 if (d.vmode == V16SImode || d.vmode == V16SFmode
44722 || d.vmode == V8DFmode || d.vmode == V8DImode)
44723 /* All implementable with a single vpermi2 insn. */
44724 return true;
44725 if (GET_MODE_SIZE (d.vmode) == 16)
44726 {
44727 /* All implementable with a single vpperm insn. */
44728 if (TARGET_XOP)
44729 return true;
44730 /* All implementable with 2 pshufb + 1 ior. */
44731 if (TARGET_SSSE3)
44732 return true;
44733 /* All implementable with shufpd or unpck[lh]pd. */
44734 if (d.nelt == 2)
44735 return true;
44736 }
44737
44738 /* Extract the values from the vector CST into the permutation
44739 array in D. */
44740 memcpy (d.perm, sel, nelt);
44741 for (i = which = 0; i < nelt; ++i)
44742 {
44743 unsigned char e = d.perm[i];
44744 gcc_assert (e < 2 * nelt);
44745 which |= (e < nelt ? 1 : 2);
44746 }
44747
44748 /* For all elements from second vector, fold the elements to first. */
44749 if (which == 2)
44750 for (i = 0; i < nelt; ++i)
44751 d.perm[i] -= nelt;
44752
44753 /* Check whether the mask can be applied to the vector type. */
44754 d.one_operand_p = (which != 3);
44755
44756 /* Implementable with shufps or pshufd. */
44757 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44758 return true;
44759
44760 /* Otherwise we have to go through the motions and see if we can
44761 figure out how to generate the requested permutation. */
44762 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44763 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44764 if (!d.one_operand_p)
44765 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44766
44767 start_sequence ();
44768 ret = ix86_expand_vec_perm_const_1 (&d);
44769 end_sequence ();
44770
44771 return ret;
44772 }
44773
44774 void
44775 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44776 {
44777 struct expand_vec_perm_d d;
44778 unsigned i, nelt;
44779
44780 d.target = targ;
44781 d.op0 = op0;
44782 d.op1 = op1;
44783 d.vmode = GET_MODE (targ);
44784 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44785 d.one_operand_p = false;
44786 d.testing_p = false;
44787
44788 for (i = 0; i < nelt; ++i)
44789 d.perm[i] = i * 2 + odd;
44790
44791 /* We'll either be able to implement the permutation directly... */
44792 if (expand_vec_perm_1 (&d))
44793 return;
44794
44795 /* ... or we use the special-case patterns. */
44796 expand_vec_perm_even_odd_1 (&d, odd);
44797 }
44798
44799 static void
44800 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44801 {
44802 struct expand_vec_perm_d d;
44803 unsigned i, nelt, base;
44804 bool ok;
44805
44806 d.target = targ;
44807 d.op0 = op0;
44808 d.op1 = op1;
44809 d.vmode = GET_MODE (targ);
44810 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44811 d.one_operand_p = false;
44812 d.testing_p = false;
44813
44814 base = high_p ? nelt / 2 : 0;
44815 for (i = 0; i < nelt / 2; ++i)
44816 {
44817 d.perm[i * 2] = i + base;
44818 d.perm[i * 2 + 1] = i + base + nelt;
44819 }
44820
44821 /* Note that for AVX this isn't one instruction. */
44822 ok = ix86_expand_vec_perm_const_1 (&d);
44823 gcc_assert (ok);
44824 }
44825
44826
44827 /* Expand a vector operation CODE for a V*QImode in terms of the
44828 same operation on V*HImode. */
44829
44830 void
44831 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44832 {
44833 enum machine_mode qimode = GET_MODE (dest);
44834 enum machine_mode himode;
44835 rtx (*gen_il) (rtx, rtx, rtx);
44836 rtx (*gen_ih) (rtx, rtx, rtx);
44837 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44838 struct expand_vec_perm_d d;
44839 bool ok, full_interleave;
44840 bool uns_p = false;
44841 int i;
44842
44843 switch (qimode)
44844 {
44845 case V16QImode:
44846 himode = V8HImode;
44847 gen_il = gen_vec_interleave_lowv16qi;
44848 gen_ih = gen_vec_interleave_highv16qi;
44849 break;
44850 case V32QImode:
44851 himode = V16HImode;
44852 gen_il = gen_avx2_interleave_lowv32qi;
44853 gen_ih = gen_avx2_interleave_highv32qi;
44854 break;
44855 default:
44856 gcc_unreachable ();
44857 }
44858
44859 op2_l = op2_h = op2;
44860 switch (code)
44861 {
44862 case MULT:
44863 /* Unpack data such that we've got a source byte in each low byte of
44864 each word. We don't care what goes into the high byte of each word.
44865 Rather than trying to get zero in there, most convenient is to let
44866 it be a copy of the low byte. */
44867 op2_l = gen_reg_rtx (qimode);
44868 op2_h = gen_reg_rtx (qimode);
44869 emit_insn (gen_il (op2_l, op2, op2));
44870 emit_insn (gen_ih (op2_h, op2, op2));
44871 /* FALLTHRU */
44872
44873 op1_l = gen_reg_rtx (qimode);
44874 op1_h = gen_reg_rtx (qimode);
44875 emit_insn (gen_il (op1_l, op1, op1));
44876 emit_insn (gen_ih (op1_h, op1, op1));
44877 full_interleave = qimode == V16QImode;
44878 break;
44879
44880 case ASHIFT:
44881 case LSHIFTRT:
44882 uns_p = true;
44883 /* FALLTHRU */
44884 case ASHIFTRT:
44885 op1_l = gen_reg_rtx (himode);
44886 op1_h = gen_reg_rtx (himode);
44887 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44888 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44889 full_interleave = true;
44890 break;
44891 default:
44892 gcc_unreachable ();
44893 }
44894
44895 /* Perform the operation. */
44896 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44897 1, OPTAB_DIRECT);
44898 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44899 1, OPTAB_DIRECT);
44900 gcc_assert (res_l && res_h);
44901
44902 /* Merge the data back into the right place. */
44903 d.target = dest;
44904 d.op0 = gen_lowpart (qimode, res_l);
44905 d.op1 = gen_lowpart (qimode, res_h);
44906 d.vmode = qimode;
44907 d.nelt = GET_MODE_NUNITS (qimode);
44908 d.one_operand_p = false;
44909 d.testing_p = false;
44910
44911 if (full_interleave)
44912 {
44913 /* For SSE2, we used an full interleave, so the desired
44914 results are in the even elements. */
44915 for (i = 0; i < 32; ++i)
44916 d.perm[i] = i * 2;
44917 }
44918 else
44919 {
44920 /* For AVX, the interleave used above was not cross-lane. So the
44921 extraction is evens but with the second and third quarter swapped.
44922 Happily, that is even one insn shorter than even extraction. */
44923 for (i = 0; i < 32; ++i)
44924 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44925 }
44926
44927 ok = ix86_expand_vec_perm_const_1 (&d);
44928 gcc_assert (ok);
44929
44930 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44931 gen_rtx_fmt_ee (code, qimode, op1, op2));
44932 }
44933
44934 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44935 if op is CONST_VECTOR with all odd elements equal to their
44936 preceding element. */
44937
44938 static bool
44939 const_vector_equal_evenodd_p (rtx op)
44940 {
44941 enum machine_mode mode = GET_MODE (op);
44942 int i, nunits = GET_MODE_NUNITS (mode);
44943 if (GET_CODE (op) != CONST_VECTOR
44944 || nunits != CONST_VECTOR_NUNITS (op))
44945 return false;
44946 for (i = 0; i < nunits; i += 2)
44947 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44948 return false;
44949 return true;
44950 }
44951
44952 void
44953 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44954 bool uns_p, bool odd_p)
44955 {
44956 enum machine_mode mode = GET_MODE (op1);
44957 enum machine_mode wmode = GET_MODE (dest);
44958 rtx x;
44959 rtx orig_op1 = op1, orig_op2 = op2;
44960
44961 if (!nonimmediate_operand (op1, mode))
44962 op1 = force_reg (mode, op1);
44963 if (!nonimmediate_operand (op2, mode))
44964 op2 = force_reg (mode, op2);
44965
44966 /* We only play even/odd games with vectors of SImode. */
44967 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44968
44969 /* If we're looking for the odd results, shift those members down to
44970 the even slots. For some cpus this is faster than a PSHUFD. */
44971 if (odd_p)
44972 {
44973 /* For XOP use vpmacsdqh, but only for smult, as it is only
44974 signed. */
44975 if (TARGET_XOP && mode == V4SImode && !uns_p)
44976 {
44977 x = force_reg (wmode, CONST0_RTX (wmode));
44978 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44979 return;
44980 }
44981
44982 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44983 if (!const_vector_equal_evenodd_p (orig_op1))
44984 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44985 x, NULL, 1, OPTAB_DIRECT);
44986 if (!const_vector_equal_evenodd_p (orig_op2))
44987 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44988 x, NULL, 1, OPTAB_DIRECT);
44989 op1 = gen_lowpart (mode, op1);
44990 op2 = gen_lowpart (mode, op2);
44991 }
44992
44993 if (mode == V16SImode)
44994 {
44995 if (uns_p)
44996 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44997 else
44998 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44999 }
45000 else if (mode == V8SImode)
45001 {
45002 if (uns_p)
45003 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45004 else
45005 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45006 }
45007 else if (uns_p)
45008 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45009 else if (TARGET_SSE4_1)
45010 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45011 else
45012 {
45013 rtx s1, s2, t0, t1, t2;
45014
45015 /* The easiest way to implement this without PMULDQ is to go through
45016 the motions as if we are performing a full 64-bit multiply. With
45017 the exception that we need to do less shuffling of the elements. */
45018
45019 /* Compute the sign-extension, aka highparts, of the two operands. */
45020 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45021 op1, pc_rtx, pc_rtx);
45022 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45023 op2, pc_rtx, pc_rtx);
45024
45025 /* Multiply LO(A) * HI(B), and vice-versa. */
45026 t1 = gen_reg_rtx (wmode);
45027 t2 = gen_reg_rtx (wmode);
45028 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45029 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45030
45031 /* Multiply LO(A) * LO(B). */
45032 t0 = gen_reg_rtx (wmode);
45033 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45034
45035 /* Combine and shift the highparts into place. */
45036 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45037 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45038 1, OPTAB_DIRECT);
45039
45040 /* Combine high and low parts. */
45041 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45042 return;
45043 }
45044 emit_insn (x);
45045 }
45046
45047 void
45048 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45049 bool uns_p, bool high_p)
45050 {
45051 enum machine_mode wmode = GET_MODE (dest);
45052 enum machine_mode mode = GET_MODE (op1);
45053 rtx t1, t2, t3, t4, mask;
45054
45055 switch (mode)
45056 {
45057 case V4SImode:
45058 t1 = gen_reg_rtx (mode);
45059 t2 = gen_reg_rtx (mode);
45060 if (TARGET_XOP && !uns_p)
45061 {
45062 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45063 shuffle the elements once so that all elements are in the right
45064 place for immediate use: { A C B D }. */
45065 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45066 const1_rtx, GEN_INT (3)));
45067 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45068 const1_rtx, GEN_INT (3)));
45069 }
45070 else
45071 {
45072 /* Put the elements into place for the multiply. */
45073 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45074 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45075 high_p = false;
45076 }
45077 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45078 break;
45079
45080 case V8SImode:
45081 /* Shuffle the elements between the lanes. After this we
45082 have { A B E F | C D G H } for each operand. */
45083 t1 = gen_reg_rtx (V4DImode);
45084 t2 = gen_reg_rtx (V4DImode);
45085 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45086 const0_rtx, const2_rtx,
45087 const1_rtx, GEN_INT (3)));
45088 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45089 const0_rtx, const2_rtx,
45090 const1_rtx, GEN_INT (3)));
45091
45092 /* Shuffle the elements within the lanes. After this we
45093 have { A A B B | C C D D } or { E E F F | G G H H }. */
45094 t3 = gen_reg_rtx (V8SImode);
45095 t4 = gen_reg_rtx (V8SImode);
45096 mask = GEN_INT (high_p
45097 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45098 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45099 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45100 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45101
45102 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45103 break;
45104
45105 case V8HImode:
45106 case V16HImode:
45107 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45108 uns_p, OPTAB_DIRECT);
45109 t2 = expand_binop (mode,
45110 uns_p ? umul_highpart_optab : smul_highpart_optab,
45111 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45112 gcc_assert (t1 && t2);
45113
45114 t3 = gen_reg_rtx (mode);
45115 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45116 emit_move_insn (dest, gen_lowpart (wmode, t3));
45117 break;
45118
45119 case V16QImode:
45120 case V32QImode:
45121 t1 = gen_reg_rtx (wmode);
45122 t2 = gen_reg_rtx (wmode);
45123 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45124 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45125
45126 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45127 break;
45128
45129 default:
45130 gcc_unreachable ();
45131 }
45132 }
45133
45134 void
45135 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45136 {
45137 rtx res_1, res_2, res_3, res_4;
45138
45139 res_1 = gen_reg_rtx (V4SImode);
45140 res_2 = gen_reg_rtx (V4SImode);
45141 res_3 = gen_reg_rtx (V2DImode);
45142 res_4 = gen_reg_rtx (V2DImode);
45143 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45144 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45145
45146 /* Move the results in element 2 down to element 1; we don't care
45147 what goes in elements 2 and 3. Then we can merge the parts
45148 back together with an interleave.
45149
45150 Note that two other sequences were tried:
45151 (1) Use interleaves at the start instead of psrldq, which allows
45152 us to use a single shufps to merge things back at the end.
45153 (2) Use shufps here to combine the two vectors, then pshufd to
45154 put the elements in the correct order.
45155 In both cases the cost of the reformatting stall was too high
45156 and the overall sequence slower. */
45157
45158 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45159 const0_rtx, const2_rtx,
45160 const0_rtx, const0_rtx));
45161 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45162 const0_rtx, const2_rtx,
45163 const0_rtx, const0_rtx));
45164 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45165
45166 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45167 }
45168
45169 void
45170 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45171 {
45172 enum machine_mode mode = GET_MODE (op0);
45173 rtx t1, t2, t3, t4, t5, t6;
45174
45175 if (TARGET_XOP && mode == V2DImode)
45176 {
45177 /* op1: A,B,C,D, op2: E,F,G,H */
45178 op1 = gen_lowpart (V4SImode, op1);
45179 op2 = gen_lowpart (V4SImode, op2);
45180
45181 t1 = gen_reg_rtx (V4SImode);
45182 t2 = gen_reg_rtx (V4SImode);
45183 t3 = gen_reg_rtx (V2DImode);
45184 t4 = gen_reg_rtx (V2DImode);
45185
45186 /* t1: B,A,D,C */
45187 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45188 GEN_INT (1),
45189 GEN_INT (0),
45190 GEN_INT (3),
45191 GEN_INT (2)));
45192
45193 /* t2: (B*E),(A*F),(D*G),(C*H) */
45194 emit_insn (gen_mulv4si3 (t2, t1, op2));
45195
45196 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45197 emit_insn (gen_xop_phadddq (t3, t2));
45198
45199 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45200 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45201
45202 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45203 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45204 }
45205 else
45206 {
45207 enum machine_mode nmode;
45208 rtx (*umul) (rtx, rtx, rtx);
45209
45210 if (mode == V2DImode)
45211 {
45212 umul = gen_vec_widen_umult_even_v4si;
45213 nmode = V4SImode;
45214 }
45215 else if (mode == V4DImode)
45216 {
45217 umul = gen_vec_widen_umult_even_v8si;
45218 nmode = V8SImode;
45219 }
45220 else if (mode == V8DImode)
45221 {
45222 umul = gen_vec_widen_umult_even_v16si;
45223 nmode = V16SImode;
45224 }
45225 else
45226 gcc_unreachable ();
45227
45228
45229 /* Multiply low parts. */
45230 t1 = gen_reg_rtx (mode);
45231 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45232
45233 /* Shift input vectors right 32 bits so we can multiply high parts. */
45234 t6 = GEN_INT (32);
45235 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45236 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45237
45238 /* Multiply high parts by low parts. */
45239 t4 = gen_reg_rtx (mode);
45240 t5 = gen_reg_rtx (mode);
45241 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45242 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45243
45244 /* Combine and shift the highparts back. */
45245 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45246 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45247
45248 /* Combine high and low parts. */
45249 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45250 }
45251
45252 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45253 gen_rtx_MULT (mode, op1, op2));
45254 }
45255
45256 /* Calculate integer abs() using only SSE2 instructions. */
45257
45258 void
45259 ix86_expand_sse2_abs (rtx target, rtx input)
45260 {
45261 enum machine_mode mode = GET_MODE (target);
45262 rtx tmp0, tmp1, x;
45263
45264 switch (mode)
45265 {
45266 /* For 32-bit signed integer X, the best way to calculate the absolute
45267 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45268 case V4SImode:
45269 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45270 GEN_INT (GET_MODE_BITSIZE
45271 (GET_MODE_INNER (mode)) - 1),
45272 NULL, 0, OPTAB_DIRECT);
45273 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45274 NULL, 0, OPTAB_DIRECT);
45275 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45276 target, 0, OPTAB_DIRECT);
45277 break;
45278
45279 /* For 16-bit signed integer X, the best way to calculate the absolute
45280 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45281 case V8HImode:
45282 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45283
45284 x = expand_simple_binop (mode, SMAX, tmp0, input,
45285 target, 0, OPTAB_DIRECT);
45286 break;
45287
45288 /* For 8-bit signed integer X, the best way to calculate the absolute
45289 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45290 as SSE2 provides the PMINUB insn. */
45291 case V16QImode:
45292 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45293
45294 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45295 target, 0, OPTAB_DIRECT);
45296 break;
45297
45298 default:
45299 gcc_unreachable ();
45300 }
45301
45302 if (x != target)
45303 emit_move_insn (target, x);
45304 }
45305
45306 /* Expand an insert into a vector register through pinsr insn.
45307 Return true if successful. */
45308
45309 bool
45310 ix86_expand_pinsr (rtx *operands)
45311 {
45312 rtx dst = operands[0];
45313 rtx src = operands[3];
45314
45315 unsigned int size = INTVAL (operands[1]);
45316 unsigned int pos = INTVAL (operands[2]);
45317
45318 if (GET_CODE (dst) == SUBREG)
45319 {
45320 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45321 dst = SUBREG_REG (dst);
45322 }
45323
45324 if (GET_CODE (src) == SUBREG)
45325 src = SUBREG_REG (src);
45326
45327 switch (GET_MODE (dst))
45328 {
45329 case V16QImode:
45330 case V8HImode:
45331 case V4SImode:
45332 case V2DImode:
45333 {
45334 enum machine_mode srcmode, dstmode;
45335 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45336
45337 srcmode = mode_for_size (size, MODE_INT, 0);
45338
45339 switch (srcmode)
45340 {
45341 case QImode:
45342 if (!TARGET_SSE4_1)
45343 return false;
45344 dstmode = V16QImode;
45345 pinsr = gen_sse4_1_pinsrb;
45346 break;
45347
45348 case HImode:
45349 if (!TARGET_SSE2)
45350 return false;
45351 dstmode = V8HImode;
45352 pinsr = gen_sse2_pinsrw;
45353 break;
45354
45355 case SImode:
45356 if (!TARGET_SSE4_1)
45357 return false;
45358 dstmode = V4SImode;
45359 pinsr = gen_sse4_1_pinsrd;
45360 break;
45361
45362 case DImode:
45363 gcc_assert (TARGET_64BIT);
45364 if (!TARGET_SSE4_1)
45365 return false;
45366 dstmode = V2DImode;
45367 pinsr = gen_sse4_1_pinsrq;
45368 break;
45369
45370 default:
45371 return false;
45372 }
45373
45374 rtx d = dst;
45375 if (GET_MODE (dst) != dstmode)
45376 d = gen_reg_rtx (dstmode);
45377 src = gen_lowpart (srcmode, src);
45378
45379 pos /= size;
45380
45381 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45382 GEN_INT (1 << pos)));
45383 if (d != dst)
45384 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45385 return true;
45386 }
45387
45388 default:
45389 return false;
45390 }
45391 }
45392 \f
45393 /* This function returns the calling abi specific va_list type node.
45394 It returns the FNDECL specific va_list type. */
45395
45396 static tree
45397 ix86_fn_abi_va_list (tree fndecl)
45398 {
45399 if (!TARGET_64BIT)
45400 return va_list_type_node;
45401 gcc_assert (fndecl != NULL_TREE);
45402
45403 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45404 return ms_va_list_type_node;
45405 else
45406 return sysv_va_list_type_node;
45407 }
45408
45409 /* Returns the canonical va_list type specified by TYPE. If there
45410 is no valid TYPE provided, it return NULL_TREE. */
45411
45412 static tree
45413 ix86_canonical_va_list_type (tree type)
45414 {
45415 tree wtype, htype;
45416
45417 /* Resolve references and pointers to va_list type. */
45418 if (TREE_CODE (type) == MEM_REF)
45419 type = TREE_TYPE (type);
45420 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45421 type = TREE_TYPE (type);
45422 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45423 type = TREE_TYPE (type);
45424
45425 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45426 {
45427 wtype = va_list_type_node;
45428 gcc_assert (wtype != NULL_TREE);
45429 htype = type;
45430 if (TREE_CODE (wtype) == ARRAY_TYPE)
45431 {
45432 /* If va_list is an array type, the argument may have decayed
45433 to a pointer type, e.g. by being passed to another function.
45434 In that case, unwrap both types so that we can compare the
45435 underlying records. */
45436 if (TREE_CODE (htype) == ARRAY_TYPE
45437 || POINTER_TYPE_P (htype))
45438 {
45439 wtype = TREE_TYPE (wtype);
45440 htype = TREE_TYPE (htype);
45441 }
45442 }
45443 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45444 return va_list_type_node;
45445 wtype = sysv_va_list_type_node;
45446 gcc_assert (wtype != NULL_TREE);
45447 htype = type;
45448 if (TREE_CODE (wtype) == ARRAY_TYPE)
45449 {
45450 /* If va_list is an array type, the argument may have decayed
45451 to a pointer type, e.g. by being passed to another function.
45452 In that case, unwrap both types so that we can compare the
45453 underlying records. */
45454 if (TREE_CODE (htype) == ARRAY_TYPE
45455 || POINTER_TYPE_P (htype))
45456 {
45457 wtype = TREE_TYPE (wtype);
45458 htype = TREE_TYPE (htype);
45459 }
45460 }
45461 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45462 return sysv_va_list_type_node;
45463 wtype = ms_va_list_type_node;
45464 gcc_assert (wtype != NULL_TREE);
45465 htype = type;
45466 if (TREE_CODE (wtype) == ARRAY_TYPE)
45467 {
45468 /* If va_list is an array type, the argument may have decayed
45469 to a pointer type, e.g. by being passed to another function.
45470 In that case, unwrap both types so that we can compare the
45471 underlying records. */
45472 if (TREE_CODE (htype) == ARRAY_TYPE
45473 || POINTER_TYPE_P (htype))
45474 {
45475 wtype = TREE_TYPE (wtype);
45476 htype = TREE_TYPE (htype);
45477 }
45478 }
45479 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45480 return ms_va_list_type_node;
45481 return NULL_TREE;
45482 }
45483 return std_canonical_va_list_type (type);
45484 }
45485
45486 /* Iterate through the target-specific builtin types for va_list.
45487 IDX denotes the iterator, *PTREE is set to the result type of
45488 the va_list builtin, and *PNAME to its internal type.
45489 Returns zero if there is no element for this index, otherwise
45490 IDX should be increased upon the next call.
45491 Note, do not iterate a base builtin's name like __builtin_va_list.
45492 Used from c_common_nodes_and_builtins. */
45493
45494 static int
45495 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45496 {
45497 if (TARGET_64BIT)
45498 {
45499 switch (idx)
45500 {
45501 default:
45502 break;
45503
45504 case 0:
45505 *ptree = ms_va_list_type_node;
45506 *pname = "__builtin_ms_va_list";
45507 return 1;
45508
45509 case 1:
45510 *ptree = sysv_va_list_type_node;
45511 *pname = "__builtin_sysv_va_list";
45512 return 1;
45513 }
45514 }
45515
45516 return 0;
45517 }
45518
45519 #undef TARGET_SCHED_DISPATCH
45520 #define TARGET_SCHED_DISPATCH has_dispatch
45521 #undef TARGET_SCHED_DISPATCH_DO
45522 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45523 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45524 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45525 #undef TARGET_SCHED_REORDER
45526 #define TARGET_SCHED_REORDER ix86_sched_reorder
45527 #undef TARGET_SCHED_ADJUST_PRIORITY
45528 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45529 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45530 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45531 ix86_dependencies_evaluation_hook
45532
45533 /* The size of the dispatch window is the total number of bytes of
45534 object code allowed in a window. */
45535 #define DISPATCH_WINDOW_SIZE 16
45536
45537 /* Number of dispatch windows considered for scheduling. */
45538 #define MAX_DISPATCH_WINDOWS 3
45539
45540 /* Maximum number of instructions in a window. */
45541 #define MAX_INSN 4
45542
45543 /* Maximum number of immediate operands in a window. */
45544 #define MAX_IMM 4
45545
45546 /* Maximum number of immediate bits allowed in a window. */
45547 #define MAX_IMM_SIZE 128
45548
45549 /* Maximum number of 32 bit immediates allowed in a window. */
45550 #define MAX_IMM_32 4
45551
45552 /* Maximum number of 64 bit immediates allowed in a window. */
45553 #define MAX_IMM_64 2
45554
45555 /* Maximum total of loads or prefetches allowed in a window. */
45556 #define MAX_LOAD 2
45557
45558 /* Maximum total of stores allowed in a window. */
45559 #define MAX_STORE 1
45560
45561 #undef BIG
45562 #define BIG 100
45563
45564
45565 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45566 enum dispatch_group {
45567 disp_no_group = 0,
45568 disp_load,
45569 disp_store,
45570 disp_load_store,
45571 disp_prefetch,
45572 disp_imm,
45573 disp_imm_32,
45574 disp_imm_64,
45575 disp_branch,
45576 disp_cmp,
45577 disp_jcc,
45578 disp_last
45579 };
45580
45581 /* Number of allowable groups in a dispatch window. It is an array
45582 indexed by dispatch_group enum. 100 is used as a big number,
45583 because the number of these kind of operations does not have any
45584 effect in dispatch window, but we need them for other reasons in
45585 the table. */
45586 static unsigned int num_allowable_groups[disp_last] = {
45587 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45588 };
45589
45590 char group_name[disp_last + 1][16] = {
45591 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45592 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45593 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45594 };
45595
45596 /* Instruction path. */
45597 enum insn_path {
45598 no_path = 0,
45599 path_single, /* Single micro op. */
45600 path_double, /* Double micro op. */
45601 path_multi, /* Instructions with more than 2 micro op.. */
45602 last_path
45603 };
45604
45605 /* sched_insn_info defines a window to the instructions scheduled in
45606 the basic block. It contains a pointer to the insn_info table and
45607 the instruction scheduled.
45608
45609 Windows are allocated for each basic block and are linked
45610 together. */
45611 typedef struct sched_insn_info_s {
45612 rtx insn;
45613 enum dispatch_group group;
45614 enum insn_path path;
45615 int byte_len;
45616 int imm_bytes;
45617 } sched_insn_info;
45618
45619 /* Linked list of dispatch windows. This is a two way list of
45620 dispatch windows of a basic block. It contains information about
45621 the number of uops in the window and the total number of
45622 instructions and of bytes in the object code for this dispatch
45623 window. */
45624 typedef struct dispatch_windows_s {
45625 int num_insn; /* Number of insn in the window. */
45626 int num_uops; /* Number of uops in the window. */
45627 int window_size; /* Number of bytes in the window. */
45628 int window_num; /* Window number between 0 or 1. */
45629 int num_imm; /* Number of immediates in an insn. */
45630 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45631 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45632 int imm_size; /* Total immediates in the window. */
45633 int num_loads; /* Total memory loads in the window. */
45634 int num_stores; /* Total memory stores in the window. */
45635 int violation; /* Violation exists in window. */
45636 sched_insn_info *window; /* Pointer to the window. */
45637 struct dispatch_windows_s *next;
45638 struct dispatch_windows_s *prev;
45639 } dispatch_windows;
45640
45641 /* Immediate valuse used in an insn. */
45642 typedef struct imm_info_s
45643 {
45644 int imm;
45645 int imm32;
45646 int imm64;
45647 } imm_info;
45648
45649 static dispatch_windows *dispatch_window_list;
45650 static dispatch_windows *dispatch_window_list1;
45651
45652 /* Get dispatch group of insn. */
45653
45654 static enum dispatch_group
45655 get_mem_group (rtx insn)
45656 {
45657 enum attr_memory memory;
45658
45659 if (INSN_CODE (insn) < 0)
45660 return disp_no_group;
45661 memory = get_attr_memory (insn);
45662 if (memory == MEMORY_STORE)
45663 return disp_store;
45664
45665 if (memory == MEMORY_LOAD)
45666 return disp_load;
45667
45668 if (memory == MEMORY_BOTH)
45669 return disp_load_store;
45670
45671 return disp_no_group;
45672 }
45673
45674 /* Return true if insn is a compare instruction. */
45675
45676 static bool
45677 is_cmp (rtx insn)
45678 {
45679 enum attr_type type;
45680
45681 type = get_attr_type (insn);
45682 return (type == TYPE_TEST
45683 || type == TYPE_ICMP
45684 || type == TYPE_FCMP
45685 || GET_CODE (PATTERN (insn)) == COMPARE);
45686 }
45687
45688 /* Return true if a dispatch violation encountered. */
45689
45690 static bool
45691 dispatch_violation (void)
45692 {
45693 if (dispatch_window_list->next)
45694 return dispatch_window_list->next->violation;
45695 return dispatch_window_list->violation;
45696 }
45697
45698 /* Return true if insn is a branch instruction. */
45699
45700 static bool
45701 is_branch (rtx insn)
45702 {
45703 return (CALL_P (insn) || JUMP_P (insn));
45704 }
45705
45706 /* Return true if insn is a prefetch instruction. */
45707
45708 static bool
45709 is_prefetch (rtx insn)
45710 {
45711 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45712 }
45713
45714 /* This function initializes a dispatch window and the list container holding a
45715 pointer to the window. */
45716
45717 static void
45718 init_window (int window_num)
45719 {
45720 int i;
45721 dispatch_windows *new_list;
45722
45723 if (window_num == 0)
45724 new_list = dispatch_window_list;
45725 else
45726 new_list = dispatch_window_list1;
45727
45728 new_list->num_insn = 0;
45729 new_list->num_uops = 0;
45730 new_list->window_size = 0;
45731 new_list->next = NULL;
45732 new_list->prev = NULL;
45733 new_list->window_num = window_num;
45734 new_list->num_imm = 0;
45735 new_list->num_imm_32 = 0;
45736 new_list->num_imm_64 = 0;
45737 new_list->imm_size = 0;
45738 new_list->num_loads = 0;
45739 new_list->num_stores = 0;
45740 new_list->violation = false;
45741
45742 for (i = 0; i < MAX_INSN; i++)
45743 {
45744 new_list->window[i].insn = NULL;
45745 new_list->window[i].group = disp_no_group;
45746 new_list->window[i].path = no_path;
45747 new_list->window[i].byte_len = 0;
45748 new_list->window[i].imm_bytes = 0;
45749 }
45750 return;
45751 }
45752
45753 /* This function allocates and initializes a dispatch window and the
45754 list container holding a pointer to the window. */
45755
45756 static dispatch_windows *
45757 allocate_window (void)
45758 {
45759 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45760 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45761
45762 return new_list;
45763 }
45764
45765 /* This routine initializes the dispatch scheduling information. It
45766 initiates building dispatch scheduler tables and constructs the
45767 first dispatch window. */
45768
45769 static void
45770 init_dispatch_sched (void)
45771 {
45772 /* Allocate a dispatch list and a window. */
45773 dispatch_window_list = allocate_window ();
45774 dispatch_window_list1 = allocate_window ();
45775 init_window (0);
45776 init_window (1);
45777 }
45778
45779 /* This function returns true if a branch is detected. End of a basic block
45780 does not have to be a branch, but here we assume only branches end a
45781 window. */
45782
45783 static bool
45784 is_end_basic_block (enum dispatch_group group)
45785 {
45786 return group == disp_branch;
45787 }
45788
45789 /* This function is called when the end of a window processing is reached. */
45790
45791 static void
45792 process_end_window (void)
45793 {
45794 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45795 if (dispatch_window_list->next)
45796 {
45797 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45798 gcc_assert (dispatch_window_list->window_size
45799 + dispatch_window_list1->window_size <= 48);
45800 init_window (1);
45801 }
45802 init_window (0);
45803 }
45804
45805 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45806 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45807 for 48 bytes of instructions. Note that these windows are not dispatch
45808 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45809
45810 static dispatch_windows *
45811 allocate_next_window (int window_num)
45812 {
45813 if (window_num == 0)
45814 {
45815 if (dispatch_window_list->next)
45816 init_window (1);
45817 init_window (0);
45818 return dispatch_window_list;
45819 }
45820
45821 dispatch_window_list->next = dispatch_window_list1;
45822 dispatch_window_list1->prev = dispatch_window_list;
45823
45824 return dispatch_window_list1;
45825 }
45826
45827 /* Increment the number of immediate operands of an instruction. */
45828
45829 static int
45830 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45831 {
45832 if (*in_rtx == 0)
45833 return 0;
45834
45835 switch ( GET_CODE (*in_rtx))
45836 {
45837 case CONST:
45838 case SYMBOL_REF:
45839 case CONST_INT:
45840 (imm_values->imm)++;
45841 if (x86_64_immediate_operand (*in_rtx, SImode))
45842 (imm_values->imm32)++;
45843 else
45844 (imm_values->imm64)++;
45845 break;
45846
45847 case CONST_DOUBLE:
45848 (imm_values->imm)++;
45849 (imm_values->imm64)++;
45850 break;
45851
45852 case CODE_LABEL:
45853 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45854 {
45855 (imm_values->imm)++;
45856 (imm_values->imm32)++;
45857 }
45858 break;
45859
45860 default:
45861 break;
45862 }
45863
45864 return 0;
45865 }
45866
45867 /* Compute number of immediate operands of an instruction. */
45868
45869 static void
45870 find_constant (rtx in_rtx, imm_info *imm_values)
45871 {
45872 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45873 (rtx_function) find_constant_1, (void *) imm_values);
45874 }
45875
45876 /* Return total size of immediate operands of an instruction along with number
45877 of corresponding immediate-operands. It initializes its parameters to zero
45878 befor calling FIND_CONSTANT.
45879 INSN is the input instruction. IMM is the total of immediates.
45880 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45881 bit immediates. */
45882
45883 static int
45884 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45885 {
45886 imm_info imm_values = {0, 0, 0};
45887
45888 find_constant (insn, &imm_values);
45889 *imm = imm_values.imm;
45890 *imm32 = imm_values.imm32;
45891 *imm64 = imm_values.imm64;
45892 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45893 }
45894
45895 /* This function indicates if an operand of an instruction is an
45896 immediate. */
45897
45898 static bool
45899 has_immediate (rtx insn)
45900 {
45901 int num_imm_operand;
45902 int num_imm32_operand;
45903 int num_imm64_operand;
45904
45905 if (insn)
45906 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45907 &num_imm64_operand);
45908 return false;
45909 }
45910
45911 /* Return single or double path for instructions. */
45912
45913 static enum insn_path
45914 get_insn_path (rtx insn)
45915 {
45916 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45917
45918 if ((int)path == 0)
45919 return path_single;
45920
45921 if ((int)path == 1)
45922 return path_double;
45923
45924 return path_multi;
45925 }
45926
45927 /* Return insn dispatch group. */
45928
45929 static enum dispatch_group
45930 get_insn_group (rtx insn)
45931 {
45932 enum dispatch_group group = get_mem_group (insn);
45933 if (group)
45934 return group;
45935
45936 if (is_branch (insn))
45937 return disp_branch;
45938
45939 if (is_cmp (insn))
45940 return disp_cmp;
45941
45942 if (has_immediate (insn))
45943 return disp_imm;
45944
45945 if (is_prefetch (insn))
45946 return disp_prefetch;
45947
45948 return disp_no_group;
45949 }
45950
45951 /* Count number of GROUP restricted instructions in a dispatch
45952 window WINDOW_LIST. */
45953
45954 static int
45955 count_num_restricted (rtx insn, dispatch_windows *window_list)
45956 {
45957 enum dispatch_group group = get_insn_group (insn);
45958 int imm_size;
45959 int num_imm_operand;
45960 int num_imm32_operand;
45961 int num_imm64_operand;
45962
45963 if (group == disp_no_group)
45964 return 0;
45965
45966 if (group == disp_imm)
45967 {
45968 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45969 &num_imm64_operand);
45970 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45971 || num_imm_operand + window_list->num_imm > MAX_IMM
45972 || (num_imm32_operand > 0
45973 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45974 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45975 || (num_imm64_operand > 0
45976 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45977 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45978 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45979 && num_imm64_operand > 0
45980 && ((window_list->num_imm_64 > 0
45981 && window_list->num_insn >= 2)
45982 || window_list->num_insn >= 3)))
45983 return BIG;
45984
45985 return 1;
45986 }
45987
45988 if ((group == disp_load_store
45989 && (window_list->num_loads >= MAX_LOAD
45990 || window_list->num_stores >= MAX_STORE))
45991 || ((group == disp_load
45992 || group == disp_prefetch)
45993 && window_list->num_loads >= MAX_LOAD)
45994 || (group == disp_store
45995 && window_list->num_stores >= MAX_STORE))
45996 return BIG;
45997
45998 return 1;
45999 }
46000
46001 /* This function returns true if insn satisfies dispatch rules on the
46002 last window scheduled. */
46003
46004 static bool
46005 fits_dispatch_window (rtx insn)
46006 {
46007 dispatch_windows *window_list = dispatch_window_list;
46008 dispatch_windows *window_list_next = dispatch_window_list->next;
46009 unsigned int num_restrict;
46010 enum dispatch_group group = get_insn_group (insn);
46011 enum insn_path path = get_insn_path (insn);
46012 int sum;
46013
46014 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46015 instructions should be given the lowest priority in the
46016 scheduling process in Haifa scheduler to make sure they will be
46017 scheduled in the same dispatch window as the reference to them. */
46018 if (group == disp_jcc || group == disp_cmp)
46019 return false;
46020
46021 /* Check nonrestricted. */
46022 if (group == disp_no_group || group == disp_branch)
46023 return true;
46024
46025 /* Get last dispatch window. */
46026 if (window_list_next)
46027 window_list = window_list_next;
46028
46029 if (window_list->window_num == 1)
46030 {
46031 sum = window_list->prev->window_size + window_list->window_size;
46032
46033 if (sum == 32
46034 || (min_insn_size (insn) + sum) >= 48)
46035 /* Window 1 is full. Go for next window. */
46036 return true;
46037 }
46038
46039 num_restrict = count_num_restricted (insn, window_list);
46040
46041 if (num_restrict > num_allowable_groups[group])
46042 return false;
46043
46044 /* See if it fits in the first window. */
46045 if (window_list->window_num == 0)
46046 {
46047 /* The first widow should have only single and double path
46048 uops. */
46049 if (path == path_double
46050 && (window_list->num_uops + 2) > MAX_INSN)
46051 return false;
46052 else if (path != path_single)
46053 return false;
46054 }
46055 return true;
46056 }
46057
46058 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46059 dispatch window WINDOW_LIST. */
46060
46061 static void
46062 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46063 {
46064 int byte_len = min_insn_size (insn);
46065 int num_insn = window_list->num_insn;
46066 int imm_size;
46067 sched_insn_info *window = window_list->window;
46068 enum dispatch_group group = get_insn_group (insn);
46069 enum insn_path path = get_insn_path (insn);
46070 int num_imm_operand;
46071 int num_imm32_operand;
46072 int num_imm64_operand;
46073
46074 if (!window_list->violation && group != disp_cmp
46075 && !fits_dispatch_window (insn))
46076 window_list->violation = true;
46077
46078 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46079 &num_imm64_operand);
46080
46081 /* Initialize window with new instruction. */
46082 window[num_insn].insn = insn;
46083 window[num_insn].byte_len = byte_len;
46084 window[num_insn].group = group;
46085 window[num_insn].path = path;
46086 window[num_insn].imm_bytes = imm_size;
46087
46088 window_list->window_size += byte_len;
46089 window_list->num_insn = num_insn + 1;
46090 window_list->num_uops = window_list->num_uops + num_uops;
46091 window_list->imm_size += imm_size;
46092 window_list->num_imm += num_imm_operand;
46093 window_list->num_imm_32 += num_imm32_operand;
46094 window_list->num_imm_64 += num_imm64_operand;
46095
46096 if (group == disp_store)
46097 window_list->num_stores += 1;
46098 else if (group == disp_load
46099 || group == disp_prefetch)
46100 window_list->num_loads += 1;
46101 else if (group == disp_load_store)
46102 {
46103 window_list->num_stores += 1;
46104 window_list->num_loads += 1;
46105 }
46106 }
46107
46108 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46109 If the total bytes of instructions or the number of instructions in
46110 the window exceed allowable, it allocates a new window. */
46111
46112 static void
46113 add_to_dispatch_window (rtx insn)
46114 {
46115 int byte_len;
46116 dispatch_windows *window_list;
46117 dispatch_windows *next_list;
46118 dispatch_windows *window0_list;
46119 enum insn_path path;
46120 enum dispatch_group insn_group;
46121 bool insn_fits;
46122 int num_insn;
46123 int num_uops;
46124 int window_num;
46125 int insn_num_uops;
46126 int sum;
46127
46128 if (INSN_CODE (insn) < 0)
46129 return;
46130
46131 byte_len = min_insn_size (insn);
46132 window_list = dispatch_window_list;
46133 next_list = window_list->next;
46134 path = get_insn_path (insn);
46135 insn_group = get_insn_group (insn);
46136
46137 /* Get the last dispatch window. */
46138 if (next_list)
46139 window_list = dispatch_window_list->next;
46140
46141 if (path == path_single)
46142 insn_num_uops = 1;
46143 else if (path == path_double)
46144 insn_num_uops = 2;
46145 else
46146 insn_num_uops = (int) path;
46147
46148 /* If current window is full, get a new window.
46149 Window number zero is full, if MAX_INSN uops are scheduled in it.
46150 Window number one is full, if window zero's bytes plus window
46151 one's bytes is 32, or if the bytes of the new instruction added
46152 to the total makes it greater than 48, or it has already MAX_INSN
46153 instructions in it. */
46154 num_insn = window_list->num_insn;
46155 num_uops = window_list->num_uops;
46156 window_num = window_list->window_num;
46157 insn_fits = fits_dispatch_window (insn);
46158
46159 if (num_insn >= MAX_INSN
46160 || num_uops + insn_num_uops > MAX_INSN
46161 || !(insn_fits))
46162 {
46163 window_num = ~window_num & 1;
46164 window_list = allocate_next_window (window_num);
46165 }
46166
46167 if (window_num == 0)
46168 {
46169 add_insn_window (insn, window_list, insn_num_uops);
46170 if (window_list->num_insn >= MAX_INSN
46171 && insn_group == disp_branch)
46172 {
46173 process_end_window ();
46174 return;
46175 }
46176 }
46177 else if (window_num == 1)
46178 {
46179 window0_list = window_list->prev;
46180 sum = window0_list->window_size + window_list->window_size;
46181 if (sum == 32
46182 || (byte_len + sum) >= 48)
46183 {
46184 process_end_window ();
46185 window_list = dispatch_window_list;
46186 }
46187
46188 add_insn_window (insn, window_list, insn_num_uops);
46189 }
46190 else
46191 gcc_unreachable ();
46192
46193 if (is_end_basic_block (insn_group))
46194 {
46195 /* End of basic block is reached do end-basic-block process. */
46196 process_end_window ();
46197 return;
46198 }
46199 }
46200
46201 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46202
46203 DEBUG_FUNCTION static void
46204 debug_dispatch_window_file (FILE *file, int window_num)
46205 {
46206 dispatch_windows *list;
46207 int i;
46208
46209 if (window_num == 0)
46210 list = dispatch_window_list;
46211 else
46212 list = dispatch_window_list1;
46213
46214 fprintf (file, "Window #%d:\n", list->window_num);
46215 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46216 list->num_insn, list->num_uops, list->window_size);
46217 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46218 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46219
46220 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46221 list->num_stores);
46222 fprintf (file, " insn info:\n");
46223
46224 for (i = 0; i < MAX_INSN; i++)
46225 {
46226 if (!list->window[i].insn)
46227 break;
46228 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46229 i, group_name[list->window[i].group],
46230 i, (void *)list->window[i].insn,
46231 i, list->window[i].path,
46232 i, list->window[i].byte_len,
46233 i, list->window[i].imm_bytes);
46234 }
46235 }
46236
46237 /* Print to stdout a dispatch window. */
46238
46239 DEBUG_FUNCTION void
46240 debug_dispatch_window (int window_num)
46241 {
46242 debug_dispatch_window_file (stdout, window_num);
46243 }
46244
46245 /* Print INSN dispatch information to FILE. */
46246
46247 DEBUG_FUNCTION static void
46248 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46249 {
46250 int byte_len;
46251 enum insn_path path;
46252 enum dispatch_group group;
46253 int imm_size;
46254 int num_imm_operand;
46255 int num_imm32_operand;
46256 int num_imm64_operand;
46257
46258 if (INSN_CODE (insn) < 0)
46259 return;
46260
46261 byte_len = min_insn_size (insn);
46262 path = get_insn_path (insn);
46263 group = get_insn_group (insn);
46264 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46265 &num_imm64_operand);
46266
46267 fprintf (file, " insn info:\n");
46268 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46269 group_name[group], path, byte_len);
46270 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46271 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46272 }
46273
46274 /* Print to STDERR the status of the ready list with respect to
46275 dispatch windows. */
46276
46277 DEBUG_FUNCTION void
46278 debug_ready_dispatch (void)
46279 {
46280 int i;
46281 int no_ready = number_in_ready ();
46282
46283 fprintf (stdout, "Number of ready: %d\n", no_ready);
46284
46285 for (i = 0; i < no_ready; i++)
46286 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46287 }
46288
46289 /* This routine is the driver of the dispatch scheduler. */
46290
46291 static void
46292 do_dispatch (rtx insn, int mode)
46293 {
46294 if (mode == DISPATCH_INIT)
46295 init_dispatch_sched ();
46296 else if (mode == ADD_TO_DISPATCH_WINDOW)
46297 add_to_dispatch_window (insn);
46298 }
46299
46300 /* Return TRUE if Dispatch Scheduling is supported. */
46301
46302 static bool
46303 has_dispatch (rtx insn, int action)
46304 {
46305 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46306 && flag_dispatch_scheduler)
46307 switch (action)
46308 {
46309 default:
46310 return false;
46311
46312 case IS_DISPATCH_ON:
46313 return true;
46314 break;
46315
46316 case IS_CMP:
46317 return is_cmp (insn);
46318
46319 case DISPATCH_VIOLATION:
46320 return dispatch_violation ();
46321
46322 case FITS_DISPATCH_WINDOW:
46323 return fits_dispatch_window (insn);
46324 }
46325
46326 return false;
46327 }
46328
46329 /* Implementation of reassociation_width target hook used by
46330 reassoc phase to identify parallelism level in reassociated
46331 tree. Statements tree_code is passed in OPC. Arguments type
46332 is passed in MODE.
46333
46334 Currently parallel reassociation is enabled for Atom
46335 processors only and we set reassociation width to be 2
46336 because Atom may issue up to 2 instructions per cycle.
46337
46338 Return value should be fixed if parallel reassociation is
46339 enabled for other processors. */
46340
46341 static int
46342 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46343 enum machine_mode mode)
46344 {
46345 int res = 1;
46346
46347 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46348 res = 2;
46349 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46350 res = 2;
46351
46352 return res;
46353 }
46354
46355 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46356 place emms and femms instructions. */
46357
46358 static enum machine_mode
46359 ix86_preferred_simd_mode (enum machine_mode mode)
46360 {
46361 if (!TARGET_SSE)
46362 return word_mode;
46363
46364 switch (mode)
46365 {
46366 case QImode:
46367 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46368 case HImode:
46369 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46370 case SImode:
46371 return TARGET_AVX512F ? V16SImode :
46372 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46373 case DImode:
46374 return TARGET_AVX512F ? V8DImode :
46375 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46376
46377 case SFmode:
46378 if (TARGET_AVX512F)
46379 return V16SFmode;
46380 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46381 return V8SFmode;
46382 else
46383 return V4SFmode;
46384
46385 case DFmode:
46386 if (!TARGET_VECTORIZE_DOUBLE)
46387 return word_mode;
46388 else if (TARGET_AVX512F)
46389 return V8DFmode;
46390 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46391 return V4DFmode;
46392 else if (TARGET_SSE2)
46393 return V2DFmode;
46394 /* FALLTHRU */
46395
46396 default:
46397 return word_mode;
46398 }
46399 }
46400
46401 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46402 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46403 256bit and 128bit vectors. */
46404
46405 static unsigned int
46406 ix86_autovectorize_vector_sizes (void)
46407 {
46408 return TARGET_AVX512F ? 64 | 32 | 16 :
46409 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46410 }
46411
46412 \f
46413
46414 /* Return class of registers which could be used for pseudo of MODE
46415 and of class RCLASS for spilling instead of memory. Return NO_REGS
46416 if it is not possible or non-profitable. */
46417 static reg_class_t
46418 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46419 {
46420 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46421 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46422 && INTEGER_CLASS_P (rclass))
46423 return ALL_SSE_REGS;
46424 return NO_REGS;
46425 }
46426
46427 /* Implement targetm.vectorize.init_cost. */
46428
46429 static void *
46430 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46431 {
46432 unsigned *cost = XNEWVEC (unsigned, 3);
46433 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46434 return cost;
46435 }
46436
46437 /* Implement targetm.vectorize.add_stmt_cost. */
46438
46439 static unsigned
46440 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46441 struct _stmt_vec_info *stmt_info, int misalign,
46442 enum vect_cost_model_location where)
46443 {
46444 unsigned *cost = (unsigned *) data;
46445 unsigned retval = 0;
46446
46447 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46448 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46449
46450 /* Statements in an inner loop relative to the loop being
46451 vectorized are weighted more heavily. The value here is
46452 arbitrary and could potentially be improved with analysis. */
46453 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46454 count *= 50; /* FIXME. */
46455
46456 retval = (unsigned) (count * stmt_cost);
46457
46458 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46459 for Silvermont as it has out of order integer pipeline and can execute
46460 2 scalar instruction per tick, but has in order SIMD pipeline. */
46461 if (TARGET_SILVERMONT || TARGET_INTEL)
46462 if (stmt_info && stmt_info->stmt)
46463 {
46464 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46465 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46466 retval = (retval * 17) / 10;
46467 }
46468
46469 cost[where] += retval;
46470
46471 return retval;
46472 }
46473
46474 /* Implement targetm.vectorize.finish_cost. */
46475
46476 static void
46477 ix86_finish_cost (void *data, unsigned *prologue_cost,
46478 unsigned *body_cost, unsigned *epilogue_cost)
46479 {
46480 unsigned *cost = (unsigned *) data;
46481 *prologue_cost = cost[vect_prologue];
46482 *body_cost = cost[vect_body];
46483 *epilogue_cost = cost[vect_epilogue];
46484 }
46485
46486 /* Implement targetm.vectorize.destroy_cost_data. */
46487
46488 static void
46489 ix86_destroy_cost_data (void *data)
46490 {
46491 free (data);
46492 }
46493
46494 /* Validate target specific memory model bits in VAL. */
46495
46496 static unsigned HOST_WIDE_INT
46497 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46498 {
46499 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46500 bool strong;
46501
46502 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46503 |MEMMODEL_MASK)
46504 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46505 {
46506 warning (OPT_Winvalid_memory_model,
46507 "Unknown architecture specific memory model");
46508 return MEMMODEL_SEQ_CST;
46509 }
46510 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46511 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46512 {
46513 warning (OPT_Winvalid_memory_model,
46514 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46515 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46516 }
46517 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46518 {
46519 warning (OPT_Winvalid_memory_model,
46520 "HLE_RELEASE not used with RELEASE or stronger memory model");
46521 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46522 }
46523 return val;
46524 }
46525
46526 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46527 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46528 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46529 or number of vecsize_mangle variants that should be emitted. */
46530
46531 static int
46532 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46533 struct cgraph_simd_clone *clonei,
46534 tree base_type, int num)
46535 {
46536 int ret = 1;
46537
46538 if (clonei->simdlen
46539 && (clonei->simdlen < 2
46540 || clonei->simdlen > 16
46541 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46542 {
46543 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46544 "unsupported simdlen %d", clonei->simdlen);
46545 return 0;
46546 }
46547
46548 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46549 if (TREE_CODE (ret_type) != VOID_TYPE)
46550 switch (TYPE_MODE (ret_type))
46551 {
46552 case QImode:
46553 case HImode:
46554 case SImode:
46555 case DImode:
46556 case SFmode:
46557 case DFmode:
46558 /* case SCmode: */
46559 /* case DCmode: */
46560 break;
46561 default:
46562 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46563 "unsupported return type %qT for simd\n", ret_type);
46564 return 0;
46565 }
46566
46567 tree t;
46568 int i;
46569
46570 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46571 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46572 switch (TYPE_MODE (TREE_TYPE (t)))
46573 {
46574 case QImode:
46575 case HImode:
46576 case SImode:
46577 case DImode:
46578 case SFmode:
46579 case DFmode:
46580 /* case SCmode: */
46581 /* case DCmode: */
46582 break;
46583 default:
46584 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46585 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46586 return 0;
46587 }
46588
46589 if (clonei->cilk_elemental)
46590 {
46591 /* Parse here processor clause. If not present, default to 'b'. */
46592 clonei->vecsize_mangle = 'b';
46593 }
46594 else if (!TREE_PUBLIC (node->decl))
46595 {
46596 /* If the function isn't exported, we can pick up just one ISA
46597 for the clones. */
46598 if (TARGET_AVX2)
46599 clonei->vecsize_mangle = 'd';
46600 else if (TARGET_AVX)
46601 clonei->vecsize_mangle = 'c';
46602 else
46603 clonei->vecsize_mangle = 'b';
46604 ret = 1;
46605 }
46606 else
46607 {
46608 clonei->vecsize_mangle = "bcd"[num];
46609 ret = 3;
46610 }
46611 switch (clonei->vecsize_mangle)
46612 {
46613 case 'b':
46614 clonei->vecsize_int = 128;
46615 clonei->vecsize_float = 128;
46616 break;
46617 case 'c':
46618 clonei->vecsize_int = 128;
46619 clonei->vecsize_float = 256;
46620 break;
46621 case 'd':
46622 clonei->vecsize_int = 256;
46623 clonei->vecsize_float = 256;
46624 break;
46625 }
46626 if (clonei->simdlen == 0)
46627 {
46628 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46629 clonei->simdlen = clonei->vecsize_int;
46630 else
46631 clonei->simdlen = clonei->vecsize_float;
46632 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46633 if (clonei->simdlen > 16)
46634 clonei->simdlen = 16;
46635 }
46636 return ret;
46637 }
46638
46639 /* Add target attribute to SIMD clone NODE if needed. */
46640
46641 static void
46642 ix86_simd_clone_adjust (struct cgraph_node *node)
46643 {
46644 const char *str = NULL;
46645 gcc_assert (node->decl == cfun->decl);
46646 switch (node->simdclone->vecsize_mangle)
46647 {
46648 case 'b':
46649 if (!TARGET_SSE2)
46650 str = "sse2";
46651 break;
46652 case 'c':
46653 if (!TARGET_AVX)
46654 str = "avx";
46655 break;
46656 case 'd':
46657 if (!TARGET_AVX2)
46658 str = "avx2";
46659 break;
46660 default:
46661 gcc_unreachable ();
46662 }
46663 if (str == NULL)
46664 return;
46665 push_cfun (NULL);
46666 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46667 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46668 gcc_assert (ok);
46669 pop_cfun ();
46670 ix86_previous_fndecl = NULL_TREE;
46671 ix86_set_current_function (node->decl);
46672 }
46673
46674 /* If SIMD clone NODE can't be used in a vectorized loop
46675 in current function, return -1, otherwise return a badness of using it
46676 (0 if it is most desirable from vecsize_mangle point of view, 1
46677 slightly less desirable, etc.). */
46678
46679 static int
46680 ix86_simd_clone_usable (struct cgraph_node *node)
46681 {
46682 switch (node->simdclone->vecsize_mangle)
46683 {
46684 case 'b':
46685 if (!TARGET_SSE2)
46686 return -1;
46687 if (!TARGET_AVX)
46688 return 0;
46689 return TARGET_AVX2 ? 2 : 1;
46690 case 'c':
46691 if (!TARGET_AVX)
46692 return -1;
46693 return TARGET_AVX2 ? 1 : 0;
46694 break;
46695 case 'd':
46696 if (!TARGET_AVX2)
46697 return -1;
46698 return 0;
46699 default:
46700 gcc_unreachable ();
46701 }
46702 }
46703
46704 /* This function gives out the number of memory references.
46705 This value determines the unrolling factor for
46706 bdver3 and bdver4 architectures. */
46707
46708 static int
46709 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46710 {
46711 if (*x != NULL_RTX && MEM_P (*x))
46712 {
46713 enum machine_mode mode;
46714 unsigned int n_words;
46715
46716 mode = GET_MODE (*x);
46717 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46718
46719 if (n_words > 4)
46720 (*mem_count)+=2;
46721 else
46722 (*mem_count)+=1;
46723 }
46724 return 0;
46725 }
46726
46727 /* This function adjusts the unroll factor based on
46728 the hardware capabilities. For ex, bdver3 has
46729 a loop buffer which makes unrolling of smaller
46730 loops less important. This function decides the
46731 unroll factor using number of memory references
46732 (value 32 is used) as a heuristic. */
46733
46734 static unsigned
46735 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46736 {
46737 basic_block *bbs;
46738 rtx insn;
46739 unsigned i;
46740 unsigned mem_count = 0;
46741
46742 if (!TARGET_ADJUST_UNROLL)
46743 return nunroll;
46744
46745 /* Count the number of memory references within the loop body. */
46746 bbs = get_loop_body (loop);
46747 for (i = 0; i < loop->num_nodes; i++)
46748 {
46749 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46750 if (NONDEBUG_INSN_P (insn))
46751 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46752 }
46753 free (bbs);
46754
46755 if (mem_count && mem_count <=32)
46756 return 32/mem_count;
46757
46758 return nunroll;
46759 }
46760
46761
46762 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46763
46764 static bool
46765 ix86_float_exceptions_rounding_supported_p (void)
46766 {
46767 /* For x87 floating point with standard excess precision handling,
46768 there is no adddf3 pattern (since x87 floating point only has
46769 XFmode operations) so the default hook implementation gets this
46770 wrong. */
46771 return TARGET_80387 || TARGET_SSE_MATH;
46772 }
46773
46774 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46775
46776 static void
46777 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46778 {
46779 if (!TARGET_80387 && !TARGET_SSE_MATH)
46780 return;
46781 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46782 if (TARGET_80387)
46783 {
46784 tree fenv_index_type = build_index_type (size_int (6));
46785 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46786 tree fenv_var = create_tmp_var (fenv_type, NULL);
46787 mark_addressable (fenv_var);
46788 tree fenv_ptr = build_pointer_type (fenv_type);
46789 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46790 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46791 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46792 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46793 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46794 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46795 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46796 tree hold_fnclex = build_call_expr (fnclex, 0);
46797 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46798 hold_fnclex);
46799 *clear = build_call_expr (fnclex, 0);
46800 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46801 mark_addressable (sw_var);
46802 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46803 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46804 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46805 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46806 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46807 exceptions_var, exceptions_x87);
46808 *update = build2 (COMPOUND_EXPR, integer_type_node,
46809 fnstsw_call, update_mod);
46810 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46811 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46812 }
46813 if (TARGET_SSE_MATH)
46814 {
46815 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46816 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46817 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46818 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46819 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46820 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46821 mxcsr_orig_var, stmxcsr_hold_call);
46822 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46823 mxcsr_orig_var,
46824 build_int_cst (unsigned_type_node, 0x1f80));
46825 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46826 build_int_cst (unsigned_type_node, 0xffffffc0));
46827 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46828 mxcsr_mod_var, hold_mod_val);
46829 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46830 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46831 hold_assign_orig, hold_assign_mod);
46832 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46833 ldmxcsr_hold_call);
46834 if (*hold)
46835 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46836 else
46837 *hold = hold_all;
46838 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46839 if (*clear)
46840 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46841 ldmxcsr_clear_call);
46842 else
46843 *clear = ldmxcsr_clear_call;
46844 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46845 tree exceptions_sse = fold_convert (integer_type_node,
46846 stxmcsr_update_call);
46847 if (*update)
46848 {
46849 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46850 exceptions_var, exceptions_sse);
46851 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46852 exceptions_var, exceptions_mod);
46853 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46854 exceptions_assign);
46855 }
46856 else
46857 *update = build2 (MODIFY_EXPR, integer_type_node,
46858 exceptions_var, exceptions_sse);
46859 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46860 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46861 ldmxcsr_update_call);
46862 }
46863 tree atomic_feraiseexcept
46864 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46865 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46866 1, exceptions_var);
46867 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46868 atomic_feraiseexcept_call);
46869 }
46870
46871 /* Initialize the GCC target structure. */
46872 #undef TARGET_RETURN_IN_MEMORY
46873 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46874
46875 #undef TARGET_LEGITIMIZE_ADDRESS
46876 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46877
46878 #undef TARGET_ATTRIBUTE_TABLE
46879 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46880 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46881 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46882 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46883 # undef TARGET_MERGE_DECL_ATTRIBUTES
46884 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46885 #endif
46886
46887 #undef TARGET_COMP_TYPE_ATTRIBUTES
46888 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46889
46890 #undef TARGET_INIT_BUILTINS
46891 #define TARGET_INIT_BUILTINS ix86_init_builtins
46892 #undef TARGET_BUILTIN_DECL
46893 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46894 #undef TARGET_EXPAND_BUILTIN
46895 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46896
46897 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46898 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46899 ix86_builtin_vectorized_function
46900
46901 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46902 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46903
46904 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46905 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46906
46907 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46908 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46909
46910 #undef TARGET_BUILTIN_RECIPROCAL
46911 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46912
46913 #undef TARGET_ASM_FUNCTION_EPILOGUE
46914 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46915
46916 #undef TARGET_ENCODE_SECTION_INFO
46917 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46918 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46919 #else
46920 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46921 #endif
46922
46923 #undef TARGET_ASM_OPEN_PAREN
46924 #define TARGET_ASM_OPEN_PAREN ""
46925 #undef TARGET_ASM_CLOSE_PAREN
46926 #define TARGET_ASM_CLOSE_PAREN ""
46927
46928 #undef TARGET_ASM_BYTE_OP
46929 #define TARGET_ASM_BYTE_OP ASM_BYTE
46930
46931 #undef TARGET_ASM_ALIGNED_HI_OP
46932 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46933 #undef TARGET_ASM_ALIGNED_SI_OP
46934 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46935 #ifdef ASM_QUAD
46936 #undef TARGET_ASM_ALIGNED_DI_OP
46937 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46938 #endif
46939
46940 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46941 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46942
46943 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46944 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46945
46946 #undef TARGET_ASM_UNALIGNED_HI_OP
46947 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46948 #undef TARGET_ASM_UNALIGNED_SI_OP
46949 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46950 #undef TARGET_ASM_UNALIGNED_DI_OP
46951 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46952
46953 #undef TARGET_PRINT_OPERAND
46954 #define TARGET_PRINT_OPERAND ix86_print_operand
46955 #undef TARGET_PRINT_OPERAND_ADDRESS
46956 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46957 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46958 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46959 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46960 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46961
46962 #undef TARGET_SCHED_INIT_GLOBAL
46963 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46964 #undef TARGET_SCHED_ADJUST_COST
46965 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46966 #undef TARGET_SCHED_ISSUE_RATE
46967 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46968 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46969 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46970 ia32_multipass_dfa_lookahead
46971 #undef TARGET_SCHED_MACRO_FUSION_P
46972 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46973 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46974 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46975
46976 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46977 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46978
46979 #undef TARGET_MEMMODEL_CHECK
46980 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46981
46982 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46983 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46984
46985 #ifdef HAVE_AS_TLS
46986 #undef TARGET_HAVE_TLS
46987 #define TARGET_HAVE_TLS true
46988 #endif
46989 #undef TARGET_CANNOT_FORCE_CONST_MEM
46990 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46991 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46992 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46993
46994 #undef TARGET_DELEGITIMIZE_ADDRESS
46995 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46996
46997 #undef TARGET_MS_BITFIELD_LAYOUT_P
46998 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46999
47000 #if TARGET_MACHO
47001 #undef TARGET_BINDS_LOCAL_P
47002 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47003 #endif
47004 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47005 #undef TARGET_BINDS_LOCAL_P
47006 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47007 #endif
47008
47009 #undef TARGET_ASM_OUTPUT_MI_THUNK
47010 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47011 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47012 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47013
47014 #undef TARGET_ASM_FILE_START
47015 #define TARGET_ASM_FILE_START x86_file_start
47016
47017 #undef TARGET_OPTION_OVERRIDE
47018 #define TARGET_OPTION_OVERRIDE ix86_option_override
47019
47020 #undef TARGET_REGISTER_MOVE_COST
47021 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47022 #undef TARGET_MEMORY_MOVE_COST
47023 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47024 #undef TARGET_RTX_COSTS
47025 #define TARGET_RTX_COSTS ix86_rtx_costs
47026 #undef TARGET_ADDRESS_COST
47027 #define TARGET_ADDRESS_COST ix86_address_cost
47028
47029 #undef TARGET_FIXED_CONDITION_CODE_REGS
47030 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47031 #undef TARGET_CC_MODES_COMPATIBLE
47032 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47033
47034 #undef TARGET_MACHINE_DEPENDENT_REORG
47035 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47036
47037 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47038 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47039
47040 #undef TARGET_BUILD_BUILTIN_VA_LIST
47041 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47042
47043 #undef TARGET_FOLD_BUILTIN
47044 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47045
47046 #undef TARGET_COMPARE_VERSION_PRIORITY
47047 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47048
47049 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47050 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47051 ix86_generate_version_dispatcher_body
47052
47053 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47054 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47055 ix86_get_function_versions_dispatcher
47056
47057 #undef TARGET_ENUM_VA_LIST_P
47058 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47059
47060 #undef TARGET_FN_ABI_VA_LIST
47061 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47062
47063 #undef TARGET_CANONICAL_VA_LIST_TYPE
47064 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47065
47066 #undef TARGET_EXPAND_BUILTIN_VA_START
47067 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47068
47069 #undef TARGET_MD_ASM_CLOBBERS
47070 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47071
47072 #undef TARGET_PROMOTE_PROTOTYPES
47073 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47074 #undef TARGET_SETUP_INCOMING_VARARGS
47075 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47076 #undef TARGET_MUST_PASS_IN_STACK
47077 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47078 #undef TARGET_FUNCTION_ARG_ADVANCE
47079 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47080 #undef TARGET_FUNCTION_ARG
47081 #define TARGET_FUNCTION_ARG ix86_function_arg
47082 #undef TARGET_FUNCTION_ARG_BOUNDARY
47083 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47084 #undef TARGET_PASS_BY_REFERENCE
47085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47086 #undef TARGET_INTERNAL_ARG_POINTER
47087 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47088 #undef TARGET_UPDATE_STACK_BOUNDARY
47089 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47090 #undef TARGET_GET_DRAP_RTX
47091 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47092 #undef TARGET_STRICT_ARGUMENT_NAMING
47093 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47094 #undef TARGET_STATIC_CHAIN
47095 #define TARGET_STATIC_CHAIN ix86_static_chain
47096 #undef TARGET_TRAMPOLINE_INIT
47097 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47098 #undef TARGET_RETURN_POPS_ARGS
47099 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47100
47101 #undef TARGET_LEGITIMATE_COMBINED_INSN
47102 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47103
47104 #undef TARGET_ASAN_SHADOW_OFFSET
47105 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47106
47107 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47108 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47109
47110 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47111 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47112
47113 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47114 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47115
47116 #undef TARGET_C_MODE_FOR_SUFFIX
47117 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47118
47119 #ifdef HAVE_AS_TLS
47120 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47121 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47122 #endif
47123
47124 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47125 #undef TARGET_INSERT_ATTRIBUTES
47126 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47127 #endif
47128
47129 #undef TARGET_MANGLE_TYPE
47130 #define TARGET_MANGLE_TYPE ix86_mangle_type
47131
47132 #if !TARGET_MACHO
47133 #undef TARGET_STACK_PROTECT_FAIL
47134 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47135 #endif
47136
47137 #undef TARGET_FUNCTION_VALUE
47138 #define TARGET_FUNCTION_VALUE ix86_function_value
47139
47140 #undef TARGET_FUNCTION_VALUE_REGNO_P
47141 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47142
47143 #undef TARGET_PROMOTE_FUNCTION_MODE
47144 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47145
47146 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47147 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47148
47149 #undef TARGET_INSTANTIATE_DECLS
47150 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47151
47152 #undef TARGET_SECONDARY_RELOAD
47153 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47154
47155 #undef TARGET_CLASS_MAX_NREGS
47156 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47157
47158 #undef TARGET_PREFERRED_RELOAD_CLASS
47159 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47160 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47161 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47162 #undef TARGET_CLASS_LIKELY_SPILLED_P
47163 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47164
47165 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47166 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47167 ix86_builtin_vectorization_cost
47168 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47169 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47170 ix86_vectorize_vec_perm_const_ok
47171 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47172 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47173 ix86_preferred_simd_mode
47174 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47175 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47176 ix86_autovectorize_vector_sizes
47177 #undef TARGET_VECTORIZE_INIT_COST
47178 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47179 #undef TARGET_VECTORIZE_ADD_STMT_COST
47180 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47181 #undef TARGET_VECTORIZE_FINISH_COST
47182 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47183 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47184 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47185
47186 #undef TARGET_SET_CURRENT_FUNCTION
47187 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47188
47189 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47190 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47191
47192 #undef TARGET_OPTION_SAVE
47193 #define TARGET_OPTION_SAVE ix86_function_specific_save
47194
47195 #undef TARGET_OPTION_RESTORE
47196 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47197
47198 #undef TARGET_OPTION_PRINT
47199 #define TARGET_OPTION_PRINT ix86_function_specific_print
47200
47201 #undef TARGET_OPTION_FUNCTION_VERSIONS
47202 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47203
47204 #undef TARGET_CAN_INLINE_P
47205 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47206
47207 #undef TARGET_EXPAND_TO_RTL_HOOK
47208 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47209
47210 #undef TARGET_LEGITIMATE_ADDRESS_P
47211 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47212
47213 #undef TARGET_LRA_P
47214 #define TARGET_LRA_P hook_bool_void_true
47215
47216 #undef TARGET_REGISTER_PRIORITY
47217 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47218
47219 #undef TARGET_REGISTER_USAGE_LEVELING_P
47220 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47221
47222 #undef TARGET_LEGITIMATE_CONSTANT_P
47223 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47224
47225 #undef TARGET_FRAME_POINTER_REQUIRED
47226 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47227
47228 #undef TARGET_CAN_ELIMINATE
47229 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47230
47231 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47232 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47233
47234 #undef TARGET_ASM_CODE_END
47235 #define TARGET_ASM_CODE_END ix86_code_end
47236
47237 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47238 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47239
47240 #if TARGET_MACHO
47241 #undef TARGET_INIT_LIBFUNCS
47242 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47243 #endif
47244
47245 #undef TARGET_LOOP_UNROLL_ADJUST
47246 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47247
47248 #undef TARGET_SPILL_CLASS
47249 #define TARGET_SPILL_CLASS ix86_spill_class
47250
47251 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47252 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47253 ix86_simd_clone_compute_vecsize_and_simdlen
47254
47255 #undef TARGET_SIMD_CLONE_ADJUST
47256 #define TARGET_SIMD_CLONE_ADJUST \
47257 ix86_simd_clone_adjust
47258
47259 #undef TARGET_SIMD_CLONE_USABLE
47260 #define TARGET_SIMD_CLONE_USABLE \
47261 ix86_simd_clone_usable
47262
47263 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47264 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47265 ix86_float_exceptions_rounding_supported_p
47266
47267 #undef TARGET_MODE_EMIT
47268 #define TARGET_MODE_EMIT ix86_emit_mode_set
47269
47270 #undef TARGET_MODE_NEEDED
47271 #define TARGET_MODE_NEEDED ix86_mode_needed
47272
47273 #undef TARGET_MODE_AFTER
47274 #define TARGET_MODE_AFTER ix86_mode_after
47275
47276 #undef TARGET_MODE_ENTRY
47277 #define TARGET_MODE_ENTRY ix86_mode_entry
47278
47279 #undef TARGET_MODE_EXIT
47280 #define TARGET_MODE_EXIT ix86_mode_exit
47281
47282 #undef TARGET_MODE_PRIORITY
47283 #define TARGET_MODE_PRIORITY ix86_mode_priority
47284
47285 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47286 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47287
47288 struct gcc_target targetm = TARGET_INITIALIZER;
47289 \f
47290 #include "gt-i386.h"