]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
507d4c691ef7cd4f0be9e1269c84dbe4ac9f0688
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57 #include "debug.h"
58 #include "dwarf2out.h"
59 #include "sched-int.h"
60 #include "sbitmap.h"
61 #include "fibheap.h"
62 #include "opts.h"
63 #include "diagnostic.h"
64
65 enum upper_128bits_state
66 {
67 unknown = 0,
68 unused,
69 used
70 };
71
72 typedef struct block_info_def
73 {
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
77 in this block. */
78 bool unchanged;
79 /* TRUE if block has been processed. */
80 bool processed;
81 /* TRUE if block has been scanned. */
82 bool scanned;
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev;
85 } *block_info;
86
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88
89 enum call_avx256_state
90 {
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256 = -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256,
95 /* Callee passes 256bit AVX register. */
96 callee_pass_avx256,
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
99 call_no_avx256,
100 /* vzeroupper intrinsic. */
101 vzeroupper_intrinsic
102 };
103
104 /* Check if a 256bit AVX register is referenced in stores. */
105
106 static void
107 check_avx256_stores (rtx dest, const_rtx set, void *data)
108 {
109 if ((REG_P (dest)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
111 || (GET_CODE (set) == SET
112 && REG_P (SET_SRC (set))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 {
115 enum upper_128bits_state *state
116 = (enum upper_128bits_state *) data;
117 *state = used;
118 }
119 }
120
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
124
125 STATE is state of the upper 128bits of AVX registers at entry. */
126
127 static void
128 move_or_delete_vzeroupper_2 (basic_block bb,
129 enum upper_128bits_state state)
130 {
131 rtx insn, bb_end;
132 rtx vzeroupper_insn = NULL_RTX;
133 rtx pat;
134 int avx256;
135 bool unchanged;
136
137 if (BLOCK_INFO (bb)->unchanged)
138 {
139 if (dump_file)
140 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
141 bb->index, state);
142
143 BLOCK_INFO (bb)->state = state;
144 return;
145 }
146
147 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
148 {
149 if (dump_file)
150 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
151 bb->index, BLOCK_INFO (bb)->state);
152 return;
153 }
154
155 BLOCK_INFO (bb)->prev = state;
156
157 if (dump_file)
158 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
159 bb->index, state);
160
161 unchanged = true;
162
163 /* BB_END changes when it is deleted. */
164 bb_end = BB_END (bb);
165 insn = BB_HEAD (bb);
166 while (insn != bb_end)
167 {
168 insn = NEXT_INSN (insn);
169
170 if (!NONDEBUG_INSN_P (insn))
171 continue;
172
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn) || CALL_P (insn))
175 {
176 if (!vzeroupper_insn)
177 continue;
178
179 if (PREV_INSN (insn) != vzeroupper_insn)
180 {
181 if (dump_file)
182 {
183 fprintf (dump_file, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file, PREV_INSN (insn));
185 fprintf (dump_file, "before:\n");
186 print_rtl_single (dump_file, insn);
187 }
188 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
189 PREV_INSN (insn));
190 }
191 vzeroupper_insn = NULL_RTX;
192 continue;
193 }
194
195 pat = PATTERN (insn);
196
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat) == UNSPEC_VOLATILE
199 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
200 {
201 if (dump_file)
202 {
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file, "Found vzeroupper:\n");
205 print_rtl_single (dump_file, insn);
206 }
207 }
208 else
209 {
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat) == PARALLEL
212 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
214 {
215 state = unused;
216 unchanged = false;
217
218 /* Delete pending vzeroupper insertion. */
219 if (vzeroupper_insn)
220 {
221 delete_insn (vzeroupper_insn);
222 vzeroupper_insn = NULL_RTX;
223 }
224 }
225 else if (state != used)
226 {
227 note_stores (pat, check_avx256_stores, &state);
228 if (state == used)
229 unchanged = false;
230 }
231 continue;
232 }
233
234 /* Process vzeroupper intrinsic. */
235 avx256 = INTVAL (XVECEXP (pat, 0, 0));
236
237 if (state == unused)
238 {
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256 == callee_return_avx256)
243 {
244 state = used;
245 unchanged = false;
246 }
247
248 /* Remove unnecessary vzeroupper since upper 128bits are
249 cleared. */
250 if (dump_file)
251 {
252 fprintf (dump_file, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
254 }
255 delete_insn (insn);
256 }
257 else
258 {
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 register. */
261 if (avx256 != callee_return_pass_avx256)
262 state = unused;
263
264 if (avx256 == callee_return_pass_avx256
265 || avx256 == callee_pass_avx256)
266 {
267 /* Must remove vzeroupper since callee passes in 256bit
268 AVX register. */
269 if (dump_file)
270 {
271 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file, insn);
273 }
274 delete_insn (insn);
275 }
276 else
277 {
278 vzeroupper_insn = insn;
279 unchanged = false;
280 }
281 }
282 }
283
284 BLOCK_INFO (bb)->state = state;
285 BLOCK_INFO (bb)->unchanged = unchanged;
286 BLOCK_INFO (bb)->scanned = true;
287
288 if (dump_file)
289 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb->index, unchanged ? "unchanged" : "changed",
291 state);
292 }
293
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
297 state is changed. */
298
299 static bool
300 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
301 {
302 edge e;
303 edge_iterator ei;
304 enum upper_128bits_state state, old_state, new_state;
305 bool seen_unknown;
306
307 if (dump_file)
308 fprintf (dump_file, " Process [bb %i]: status: %d\n",
309 block->index, BLOCK_INFO (block)->processed);
310
311 if (BLOCK_INFO (block)->processed)
312 return false;
313
314 state = unused;
315
316 /* Check all predecessor edges of this block. */
317 seen_unknown = false;
318 FOR_EACH_EDGE (e, ei, block->preds)
319 {
320 if (e->src == block)
321 continue;
322 switch (BLOCK_INFO (e->src)->state)
323 {
324 case unknown:
325 if (!unknown_is_unused)
326 seen_unknown = true;
327 case unused:
328 break;
329 case used:
330 state = used;
331 goto done;
332 }
333 }
334
335 if (seen_unknown)
336 state = unknown;
337
338 done:
339 old_state = BLOCK_INFO (block)->state;
340 move_or_delete_vzeroupper_2 (block, state);
341 new_state = BLOCK_INFO (block)->state;
342
343 if (state != unknown || new_state == used)
344 BLOCK_INFO (block)->processed = true;
345
346 /* Need to rescan if the upper 128bits of AVX registers are changed
347 to USED at exit. */
348 if (new_state != old_state)
349 {
350 if (new_state == used)
351 cfun->machine->rescan_vzeroupper_p = 1;
352 return true;
353 }
354 else
355 return false;
356 }
357
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
361
362 static void
363 move_or_delete_vzeroupper (void)
364 {
365 edge e;
366 edge_iterator ei;
367 basic_block bb;
368 fibheap_t worklist, pending, fibheap_swap;
369 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
370 int *bb_order;
371 int *rc_order;
372 int i;
373
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def));
376
377 /* Process outgoing edges of entry point. */
378 if (dump_file)
379 fprintf (dump_file, "Process outgoing edges of entry point\n");
380
381 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 {
383 move_or_delete_vzeroupper_2 (e->dest,
384 cfun->machine->caller_pass_avx256_p
385 ? used : unused);
386 BLOCK_INFO (e->dest)->processed = true;
387 }
388
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
392 bb_order = XNEWVEC (int, last_basic_block);
393 pre_and_rev_post_order_compute (NULL, rc_order, false);
394 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
395 bb_order[rc_order[i]] = i;
396 free (rc_order);
397
398 worklist = fibheap_new ();
399 pending = fibheap_new ();
400 visited = sbitmap_alloc (last_basic_block);
401 in_worklist = sbitmap_alloc (last_basic_block);
402 in_pending = sbitmap_alloc (last_basic_block);
403 sbitmap_zero (in_worklist);
404
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending);
407 FOR_EACH_BB (bb)
408 if (BLOCK_INFO (bb)->processed)
409 RESET_BIT (in_pending, bb->index);
410 else
411 {
412 move_or_delete_vzeroupper_1 (bb, false);
413 fibheap_insert (pending, bb_order[bb->index], bb);
414 }
415
416 if (dump_file)
417 fprintf (dump_file, "Check remaining basic blocks\n");
418
419 while (!fibheap_empty (pending))
420 {
421 fibheap_swap = pending;
422 pending = worklist;
423 worklist = fibheap_swap;
424 sbitmap_swap = in_pending;
425 in_pending = in_worklist;
426 in_worklist = sbitmap_swap;
427
428 sbitmap_zero (visited);
429
430 cfun->machine->rescan_vzeroupper_p = 0;
431
432 while (!fibheap_empty (worklist))
433 {
434 bb = (basic_block) fibheap_extract_min (worklist);
435 RESET_BIT (in_worklist, bb->index);
436 gcc_assert (!TEST_BIT (visited, bb->index));
437 if (!TEST_BIT (visited, bb->index))
438 {
439 edge_iterator ei;
440
441 SET_BIT (visited, bb->index);
442
443 if (move_or_delete_vzeroupper_1 (bb, false))
444 FOR_EACH_EDGE (e, ei, bb->succs)
445 {
446 if (e->dest == EXIT_BLOCK_PTR
447 || BLOCK_INFO (e->dest)->processed)
448 continue;
449
450 if (TEST_BIT (visited, e->dest->index))
451 {
452 if (!TEST_BIT (in_pending, e->dest->index))
453 {
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending, e->dest->index);
456 fibheap_insert (pending,
457 bb_order[e->dest->index],
458 e->dest);
459 }
460 }
461 else if (!TEST_BIT (in_worklist, e->dest->index))
462 {
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist, e->dest->index);
465 fibheap_insert (worklist, bb_order[e->dest->index],
466 e->dest);
467 }
468 }
469 }
470 }
471
472 if (!cfun->machine->rescan_vzeroupper_p)
473 break;
474 }
475
476 free (bb_order);
477 fibheap_delete (worklist);
478 fibheap_delete (pending);
479 sbitmap_free (visited);
480 sbitmap_free (in_worklist);
481 sbitmap_free (in_pending);
482
483 if (dump_file)
484 fprintf (dump_file, "Process remaining basic blocks\n");
485
486 FOR_EACH_BB (bb)
487 move_or_delete_vzeroupper_1 (bb, true);
488
489 free_aux_for_blocks ();
490 }
491
492 static rtx legitimize_dllimport_symbol (rtx, bool);
493
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
496 #endif
497
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
504 : 4)
505
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
509
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
511
512 const
513 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
532 2, /* MOVE_RATIO */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
558 2, /* Branch cost */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
580 };
581
582 /* Processor costs (relative to an add) */
583 static const
584 struct processor_costs i386_cost = { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
603 3, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
629 1, /* Branch cost */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
651 };
652
653 static const
654 struct processor_costs i486_cost = { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
673 3, /* MOVE_RATIO */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
701 1, /* Branch cost */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
711 DUMMY_STRINGOP_ALGS},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 static const
726 struct processor_costs pentium_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
745 6, /* MOVE_RATIO */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
771 2, /* Branch cost */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
779 DUMMY_STRINGOP_ALGS},
780 {{libcall, {{-1, rep_prefix_4_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
793 };
794
795 static const
796 struct processor_costs pentiumpro_cost = {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
815 6, /* MOVE_RATIO */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
841 2, /* Branch cost */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
855 DUMMY_STRINGOP_ALGS},
856 {{rep_prefix_4_byte, {{1024, unrolled_loop},
857 {8192, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
870 };
871
872 static const
873 struct processor_costs geode_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
892 4, /* MOVE_RATIO */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
903
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
919 1, /* Branch cost */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
941 };
942
943 static const
944 struct processor_costs k6_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
963 4, /* MOVE_RATIO */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
992 1, /* Branch cost */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1014 };
1015
1016 static const
1017 struct processor_costs athlon_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1036 9, /* MOVE_RATIO */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1075 DUMMY_STRINGOP_ALGS},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1087 };
1088
1089 static const
1090 struct processor_costs k8_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1109 9, /* MOVE_RATIO */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1138 time). */
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1151 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 {{libcall, {{8, loop}, {24, unrolled_loop},
1153 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1154 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1166 };
1167
1168 struct processor_costs amdfam10_cost = {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1187 9, /* MOVE_RATIO */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1209 /* On K8:
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1212 On AMDFAM10:
1213 MOVD reg64, xmmreg Double FADD 3
1214 1/1 1/1
1215 MOVD reg32, xmmreg Double FADD 3
1216 1/1 1/1 */
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1224 time). */
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1238 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 {{libcall, {{8, loop}, {24, unrolled_loop},
1240 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1241 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1253 };
1254
1255 struct processor_costs bdver1_cost = {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1274 9, /* MOVE_RATIO */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1296 /* On K8:
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1299 On AMDFAM10:
1300 MOVD reg64, xmmreg Double FADD 3
1301 1/1 1/1
1302 MOVD reg32, xmmreg Double FADD 3
1303 1/1 1/1 */
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1311 time). */
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1325 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 {{libcall, {{8, loop}, {24, unrolled_loop},
1327 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1328 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1340 };
1341
1342 struct processor_costs btver1_cost = {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (2), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (3), /* SI */
1350 COSTS_N_INSNS (4), /* DI */
1351 COSTS_N_INSNS (5)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1361 9, /* MOVE_RATIO */
1362 4, /* cost for loading QImode using movzbl */
1363 {3, 4, 3}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {3, 4, 3}, /* cost of storing integer registers */
1367 4, /* cost of reg,reg fld/fst */
1368 {4, 4, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {6, 6, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {3, 3}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 3}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 5}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 3, /* MMX or SSE register to integer */
1383 /* On K8:
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1386 On AMDFAM10:
1387 MOVD reg64, xmmreg Double FADD 3
1388 1/1 1/1
1389 MOVD reg32, xmmreg Double FADD 3
1390 1/1 1/1 */
1391 32, /* size of l1 cache. */
1392 512, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 100, /* number of parallel prefetches */
1395 2, /* Branch cost */
1396 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1397 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1398 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1399 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1400 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1401 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1402
1403 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1404 very small blocks it is better to use loop. For large blocks, libcall can
1405 do nontemporary accesses and beat inline considerably. */
1406 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1407 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1408 {{libcall, {{8, loop}, {24, unrolled_loop},
1409 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1410 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1411 4, /* scalar_stmt_cost. */
1412 2, /* scalar load_cost. */
1413 2, /* scalar_store_cost. */
1414 6, /* vec_stmt_cost. */
1415 0, /* vec_to_scalar_cost. */
1416 2, /* scalar_to_vec_cost. */
1417 2, /* vec_align_load_cost. */
1418 2, /* vec_unalign_load_cost. */
1419 2, /* vec_store_cost. */
1420 2, /* cond_taken_branch_cost. */
1421 1, /* cond_not_taken_branch_cost. */
1422 };
1423
1424 static const
1425 struct processor_costs pentium4_cost = {
1426 COSTS_N_INSNS (1), /* cost of an add instruction */
1427 COSTS_N_INSNS (3), /* cost of a lea instruction */
1428 COSTS_N_INSNS (4), /* variable shift costs */
1429 COSTS_N_INSNS (4), /* constant shift costs */
1430 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1431 COSTS_N_INSNS (15), /* HI */
1432 COSTS_N_INSNS (15), /* SI */
1433 COSTS_N_INSNS (15), /* DI */
1434 COSTS_N_INSNS (15)}, /* other */
1435 0, /* cost of multiply per each bit set */
1436 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1437 COSTS_N_INSNS (56), /* HI */
1438 COSTS_N_INSNS (56), /* SI */
1439 COSTS_N_INSNS (56), /* DI */
1440 COSTS_N_INSNS (56)}, /* other */
1441 COSTS_N_INSNS (1), /* cost of movsx */
1442 COSTS_N_INSNS (1), /* cost of movzx */
1443 16, /* "large" insn */
1444 6, /* MOVE_RATIO */
1445 2, /* cost for loading QImode using movzbl */
1446 {4, 5, 4}, /* cost of loading integer registers
1447 in QImode, HImode and SImode.
1448 Relative to reg-reg move (2). */
1449 {2, 3, 2}, /* cost of storing integer registers */
1450 2, /* cost of reg,reg fld/fst */
1451 {2, 2, 6}, /* cost of loading fp registers
1452 in SFmode, DFmode and XFmode */
1453 {4, 4, 6}, /* cost of storing fp registers
1454 in SFmode, DFmode and XFmode */
1455 2, /* cost of moving MMX register */
1456 {2, 2}, /* cost of loading MMX registers
1457 in SImode and DImode */
1458 {2, 2}, /* cost of storing MMX registers
1459 in SImode and DImode */
1460 12, /* cost of moving SSE register */
1461 {12, 12, 12}, /* cost of loading SSE registers
1462 in SImode, DImode and TImode */
1463 {2, 2, 8}, /* cost of storing SSE registers
1464 in SImode, DImode and TImode */
1465 10, /* MMX or SSE register to integer */
1466 8, /* size of l1 cache. */
1467 256, /* size of l2 cache. */
1468 64, /* size of prefetch block */
1469 6, /* number of parallel prefetches */
1470 2, /* Branch cost */
1471 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1472 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1473 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1474 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1475 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1476 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1477 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1478 DUMMY_STRINGOP_ALGS},
1479 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1480 {-1, libcall}}},
1481 DUMMY_STRINGOP_ALGS},
1482 1, /* scalar_stmt_cost. */
1483 1, /* scalar load_cost. */
1484 1, /* scalar_store_cost. */
1485 1, /* vec_stmt_cost. */
1486 1, /* vec_to_scalar_cost. */
1487 1, /* scalar_to_vec_cost. */
1488 1, /* vec_align_load_cost. */
1489 2, /* vec_unalign_load_cost. */
1490 1, /* vec_store_cost. */
1491 3, /* cond_taken_branch_cost. */
1492 1, /* cond_not_taken_branch_cost. */
1493 };
1494
1495 static const
1496 struct processor_costs nocona_cost = {
1497 COSTS_N_INSNS (1), /* cost of an add instruction */
1498 COSTS_N_INSNS (1), /* cost of a lea instruction */
1499 COSTS_N_INSNS (1), /* variable shift costs */
1500 COSTS_N_INSNS (1), /* constant shift costs */
1501 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1502 COSTS_N_INSNS (10), /* HI */
1503 COSTS_N_INSNS (10), /* SI */
1504 COSTS_N_INSNS (10), /* DI */
1505 COSTS_N_INSNS (10)}, /* other */
1506 0, /* cost of multiply per each bit set */
1507 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1508 COSTS_N_INSNS (66), /* HI */
1509 COSTS_N_INSNS (66), /* SI */
1510 COSTS_N_INSNS (66), /* DI */
1511 COSTS_N_INSNS (66)}, /* other */
1512 COSTS_N_INSNS (1), /* cost of movsx */
1513 COSTS_N_INSNS (1), /* cost of movzx */
1514 16, /* "large" insn */
1515 17, /* MOVE_RATIO */
1516 4, /* cost for loading QImode using movzbl */
1517 {4, 4, 4}, /* cost of loading integer registers
1518 in QImode, HImode and SImode.
1519 Relative to reg-reg move (2). */
1520 {4, 4, 4}, /* cost of storing integer registers */
1521 3, /* cost of reg,reg fld/fst */
1522 {12, 12, 12}, /* cost of loading fp registers
1523 in SFmode, DFmode and XFmode */
1524 {4, 4, 4}, /* cost of storing fp registers
1525 in SFmode, DFmode and XFmode */
1526 6, /* cost of moving MMX register */
1527 {12, 12}, /* cost of loading MMX registers
1528 in SImode and DImode */
1529 {12, 12}, /* cost of storing MMX registers
1530 in SImode and DImode */
1531 6, /* cost of moving SSE register */
1532 {12, 12, 12}, /* cost of loading SSE registers
1533 in SImode, DImode and TImode */
1534 {12, 12, 12}, /* cost of storing SSE registers
1535 in SImode, DImode and TImode */
1536 8, /* MMX or SSE register to integer */
1537 8, /* size of l1 cache. */
1538 1024, /* size of l2 cache. */
1539 128, /* size of prefetch block */
1540 8, /* number of parallel prefetches */
1541 1, /* Branch cost */
1542 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1543 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1544 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1545 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1546 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1547 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1548 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1549 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1550 {100000, unrolled_loop}, {-1, libcall}}}},
1551 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1552 {-1, libcall}}},
1553 {libcall, {{24, loop}, {64, unrolled_loop},
1554 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1555 1, /* scalar_stmt_cost. */
1556 1, /* scalar load_cost. */
1557 1, /* scalar_store_cost. */
1558 1, /* vec_stmt_cost. */
1559 1, /* vec_to_scalar_cost. */
1560 1, /* scalar_to_vec_cost. */
1561 1, /* vec_align_load_cost. */
1562 2, /* vec_unalign_load_cost. */
1563 1, /* vec_store_cost. */
1564 3, /* cond_taken_branch_cost. */
1565 1, /* cond_not_taken_branch_cost. */
1566 };
1567
1568 static const
1569 struct processor_costs atom_cost = {
1570 COSTS_N_INSNS (1), /* cost of an add instruction */
1571 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1572 COSTS_N_INSNS (1), /* variable shift costs */
1573 COSTS_N_INSNS (1), /* constant shift costs */
1574 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1575 COSTS_N_INSNS (4), /* HI */
1576 COSTS_N_INSNS (3), /* SI */
1577 COSTS_N_INSNS (4), /* DI */
1578 COSTS_N_INSNS (2)}, /* other */
1579 0, /* cost of multiply per each bit set */
1580 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1581 COSTS_N_INSNS (26), /* HI */
1582 COSTS_N_INSNS (42), /* SI */
1583 COSTS_N_INSNS (74), /* DI */
1584 COSTS_N_INSNS (74)}, /* other */
1585 COSTS_N_INSNS (1), /* cost of movsx */
1586 COSTS_N_INSNS (1), /* cost of movzx */
1587 8, /* "large" insn */
1588 17, /* MOVE_RATIO */
1589 2, /* cost for loading QImode using movzbl */
1590 {4, 4, 4}, /* cost of loading integer registers
1591 in QImode, HImode and SImode.
1592 Relative to reg-reg move (2). */
1593 {4, 4, 4}, /* cost of storing integer registers */
1594 4, /* cost of reg,reg fld/fst */
1595 {12, 12, 12}, /* cost of loading fp registers
1596 in SFmode, DFmode and XFmode */
1597 {6, 6, 8}, /* cost of storing fp registers
1598 in SFmode, DFmode and XFmode */
1599 2, /* cost of moving MMX register */
1600 {8, 8}, /* cost of loading MMX registers
1601 in SImode and DImode */
1602 {8, 8}, /* cost of storing MMX registers
1603 in SImode and DImode */
1604 2, /* cost of moving SSE register */
1605 {8, 8, 8}, /* cost of loading SSE registers
1606 in SImode, DImode and TImode */
1607 {8, 8, 8}, /* cost of storing SSE registers
1608 in SImode, DImode and TImode */
1609 5, /* MMX or SSE register to integer */
1610 32, /* size of l1 cache. */
1611 256, /* size of l2 cache. */
1612 64, /* size of prefetch block */
1613 6, /* number of parallel prefetches */
1614 3, /* Branch cost */
1615 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1616 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1617 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1618 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1619 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1620 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1621 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1622 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1623 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1624 {{libcall, {{8, loop}, {15, unrolled_loop},
1625 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1626 {libcall, {{24, loop}, {32, unrolled_loop},
1627 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1628 1, /* scalar_stmt_cost. */
1629 1, /* scalar load_cost. */
1630 1, /* scalar_store_cost. */
1631 1, /* vec_stmt_cost. */
1632 1, /* vec_to_scalar_cost. */
1633 1, /* scalar_to_vec_cost. */
1634 1, /* vec_align_load_cost. */
1635 2, /* vec_unalign_load_cost. */
1636 1, /* vec_store_cost. */
1637 3, /* cond_taken_branch_cost. */
1638 1, /* cond_not_taken_branch_cost. */
1639 };
1640
1641 /* Generic64 should produce code tuned for Nocona and K8. */
1642 static const
1643 struct processor_costs generic64_cost = {
1644 COSTS_N_INSNS (1), /* cost of an add instruction */
1645 /* On all chips taken into consideration lea is 2 cycles and more. With
1646 this cost however our current implementation of synth_mult results in
1647 use of unnecessary temporary registers causing regression on several
1648 SPECfp benchmarks. */
1649 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1650 COSTS_N_INSNS (1), /* variable shift costs */
1651 COSTS_N_INSNS (1), /* constant shift costs */
1652 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1653 COSTS_N_INSNS (4), /* HI */
1654 COSTS_N_INSNS (3), /* SI */
1655 COSTS_N_INSNS (4), /* DI */
1656 COSTS_N_INSNS (2)}, /* other */
1657 0, /* cost of multiply per each bit set */
1658 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1659 COSTS_N_INSNS (26), /* HI */
1660 COSTS_N_INSNS (42), /* SI */
1661 COSTS_N_INSNS (74), /* DI */
1662 COSTS_N_INSNS (74)}, /* other */
1663 COSTS_N_INSNS (1), /* cost of movsx */
1664 COSTS_N_INSNS (1), /* cost of movzx */
1665 8, /* "large" insn */
1666 17, /* MOVE_RATIO */
1667 4, /* cost for loading QImode using movzbl */
1668 {4, 4, 4}, /* cost of loading integer registers
1669 in QImode, HImode and SImode.
1670 Relative to reg-reg move (2). */
1671 {4, 4, 4}, /* cost of storing integer registers */
1672 4, /* cost of reg,reg fld/fst */
1673 {12, 12, 12}, /* cost of loading fp registers
1674 in SFmode, DFmode and XFmode */
1675 {6, 6, 8}, /* cost of storing fp registers
1676 in SFmode, DFmode and XFmode */
1677 2, /* cost of moving MMX register */
1678 {8, 8}, /* cost of loading MMX registers
1679 in SImode and DImode */
1680 {8, 8}, /* cost of storing MMX registers
1681 in SImode and DImode */
1682 2, /* cost of moving SSE register */
1683 {8, 8, 8}, /* cost of loading SSE registers
1684 in SImode, DImode and TImode */
1685 {8, 8, 8}, /* cost of storing SSE registers
1686 in SImode, DImode and TImode */
1687 5, /* MMX or SSE register to integer */
1688 32, /* size of l1 cache. */
1689 512, /* size of l2 cache. */
1690 64, /* size of prefetch block */
1691 6, /* number of parallel prefetches */
1692 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1693 value is increased to perhaps more appropriate value of 5. */
1694 3, /* Branch cost */
1695 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1696 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1697 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1698 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1699 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1700 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1701 {DUMMY_STRINGOP_ALGS,
1702 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1703 {DUMMY_STRINGOP_ALGS,
1704 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1705 1, /* scalar_stmt_cost. */
1706 1, /* scalar load_cost. */
1707 1, /* scalar_store_cost. */
1708 1, /* vec_stmt_cost. */
1709 1, /* vec_to_scalar_cost. */
1710 1, /* scalar_to_vec_cost. */
1711 1, /* vec_align_load_cost. */
1712 2, /* vec_unalign_load_cost. */
1713 1, /* vec_store_cost. */
1714 3, /* cond_taken_branch_cost. */
1715 1, /* cond_not_taken_branch_cost. */
1716 };
1717
1718 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1719 Athlon and K8. */
1720 static const
1721 struct processor_costs generic32_cost = {
1722 COSTS_N_INSNS (1), /* cost of an add instruction */
1723 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1724 COSTS_N_INSNS (1), /* variable shift costs */
1725 COSTS_N_INSNS (1), /* constant shift costs */
1726 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1727 COSTS_N_INSNS (4), /* HI */
1728 COSTS_N_INSNS (3), /* SI */
1729 COSTS_N_INSNS (4), /* DI */
1730 COSTS_N_INSNS (2)}, /* other */
1731 0, /* cost of multiply per each bit set */
1732 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1733 COSTS_N_INSNS (26), /* HI */
1734 COSTS_N_INSNS (42), /* SI */
1735 COSTS_N_INSNS (74), /* DI */
1736 COSTS_N_INSNS (74)}, /* other */
1737 COSTS_N_INSNS (1), /* cost of movsx */
1738 COSTS_N_INSNS (1), /* cost of movzx */
1739 8, /* "large" insn */
1740 17, /* MOVE_RATIO */
1741 4, /* cost for loading QImode using movzbl */
1742 {4, 4, 4}, /* cost of loading integer registers
1743 in QImode, HImode and SImode.
1744 Relative to reg-reg move (2). */
1745 {4, 4, 4}, /* cost of storing integer registers */
1746 4, /* cost of reg,reg fld/fst */
1747 {12, 12, 12}, /* cost of loading fp registers
1748 in SFmode, DFmode and XFmode */
1749 {6, 6, 8}, /* cost of storing fp registers
1750 in SFmode, DFmode and XFmode */
1751 2, /* cost of moving MMX register */
1752 {8, 8}, /* cost of loading MMX registers
1753 in SImode and DImode */
1754 {8, 8}, /* cost of storing MMX registers
1755 in SImode and DImode */
1756 2, /* cost of moving SSE register */
1757 {8, 8, 8}, /* cost of loading SSE registers
1758 in SImode, DImode and TImode */
1759 {8, 8, 8}, /* cost of storing SSE registers
1760 in SImode, DImode and TImode */
1761 5, /* MMX or SSE register to integer */
1762 32, /* size of l1 cache. */
1763 256, /* size of l2 cache. */
1764 64, /* size of prefetch block */
1765 6, /* number of parallel prefetches */
1766 3, /* Branch cost */
1767 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1768 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1769 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1770 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1771 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1772 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1773 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1774 DUMMY_STRINGOP_ALGS},
1775 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1776 DUMMY_STRINGOP_ALGS},
1777 1, /* scalar_stmt_cost. */
1778 1, /* scalar load_cost. */
1779 1, /* scalar_store_cost. */
1780 1, /* vec_stmt_cost. */
1781 1, /* vec_to_scalar_cost. */
1782 1, /* scalar_to_vec_cost. */
1783 1, /* vec_align_load_cost. */
1784 2, /* vec_unalign_load_cost. */
1785 1, /* vec_store_cost. */
1786 3, /* cond_taken_branch_cost. */
1787 1, /* cond_not_taken_branch_cost. */
1788 };
1789
1790 const struct processor_costs *ix86_cost = &pentium_cost;
1791
1792 /* Processor feature/optimization bitmasks. */
1793 #define m_386 (1<<PROCESSOR_I386)
1794 #define m_486 (1<<PROCESSOR_I486)
1795 #define m_PENT (1<<PROCESSOR_PENTIUM)
1796 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1797 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1798 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1799 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1800 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1801 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1802 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1803 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1804 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1805 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1806 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1807 #define m_ATOM (1<<PROCESSOR_ATOM)
1808
1809 #define m_GEODE (1<<PROCESSOR_GEODE)
1810 #define m_K6 (1<<PROCESSOR_K6)
1811 #define m_K6_GEODE (m_K6 | m_GEODE)
1812 #define m_K8 (1<<PROCESSOR_K8)
1813 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1814 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1815 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1816 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1817 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1818 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1819
1820 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1821 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1822
1823 /* Generic instruction choice should be common subset of supported CPUs
1824 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1825 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1826
1827 /* Feature tests against the various tunings. */
1828 unsigned char ix86_tune_features[X86_TUNE_LAST];
1829
1830 /* Feature tests against the various tunings used to create ix86_tune_features
1831 based on the processor mask. */
1832 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1833 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1834 negatively, so enabling for Generic64 seems like good code size
1835 tradeoff. We can't enable it for 32bit generic because it does not
1836 work well with PPro base chips. */
1837 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1838
1839 /* X86_TUNE_PUSH_MEMORY */
1840 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1841 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1842
1843 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1844 m_486 | m_PENT,
1845
1846 /* X86_TUNE_UNROLL_STRLEN */
1847 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1848 | m_CORE2I7 | m_GENERIC,
1849
1850 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1851 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1852 | m_CORE2I7 | m_GENERIC,
1853
1854 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1855 on simulation result. But after P4 was made, no performance benefit
1856 was observed with branch hints. It also increases the code size.
1857 As a result, icc never generates branch hints. */
1858 0,
1859
1860 /* X86_TUNE_DOUBLE_WITH_ADD */
1861 ~m_386,
1862
1863 /* X86_TUNE_USE_SAHF */
1864 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1865 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1866
1867 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1868 partial dependencies. */
1869 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1870 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1871
1872 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1873 register stalls on Generic32 compilation setting as well. However
1874 in current implementation the partial register stalls are not eliminated
1875 very well - they can be introduced via subregs synthesized by combine
1876 and can happen in caller/callee saving sequences. Because this option
1877 pays back little on PPro based chips and is in conflict with partial reg
1878 dependencies used by Athlon/P4 based chips, it is better to leave it off
1879 for generic32 for now. */
1880 m_PPRO,
1881
1882 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1883 m_CORE2I7 | m_GENERIC,
1884
1885 /* X86_TUNE_USE_HIMODE_FIOP */
1886 m_386 | m_486 | m_K6_GEODE,
1887
1888 /* X86_TUNE_USE_SIMODE_FIOP */
1889 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1890
1891 /* X86_TUNE_USE_MOV0 */
1892 m_K6,
1893
1894 /* X86_TUNE_USE_CLTD */
1895 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1896
1897 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1898 m_PENT4,
1899
1900 /* X86_TUNE_SPLIT_LONG_MOVES */
1901 m_PPRO,
1902
1903 /* X86_TUNE_READ_MODIFY_WRITE */
1904 ~m_PENT,
1905
1906 /* X86_TUNE_READ_MODIFY */
1907 ~(m_PENT | m_PPRO),
1908
1909 /* X86_TUNE_PROMOTE_QIMODE */
1910 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1911 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1912
1913 /* X86_TUNE_FAST_PREFIX */
1914 ~(m_PENT | m_486 | m_386),
1915
1916 /* X86_TUNE_SINGLE_STRINGOP */
1917 m_386 | m_PENT4 | m_NOCONA,
1918
1919 /* X86_TUNE_QIMODE_MATH */
1920 ~0,
1921
1922 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1923 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1924 might be considered for Generic32 if our scheme for avoiding partial
1925 stalls was more effective. */
1926 ~m_PPRO,
1927
1928 /* X86_TUNE_PROMOTE_QI_REGS */
1929 0,
1930
1931 /* X86_TUNE_PROMOTE_HI_REGS */
1932 m_PPRO,
1933
1934 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1935 over esp addition. */
1936 m_386 | m_486 | m_PENT | m_PPRO,
1937
1938 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1939 over esp addition. */
1940 m_PENT,
1941
1942 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1943 over esp subtraction. */
1944 m_386 | m_486 | m_PENT | m_K6_GEODE,
1945
1946 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1947 over esp subtraction. */
1948 m_PENT | m_K6_GEODE,
1949
1950 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1951 for DFmode copies */
1952 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1953 | m_GENERIC | m_GEODE),
1954
1955 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1956 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1957
1958 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1959 conflict here in between PPro/Pentium4 based chips that thread 128bit
1960 SSE registers as single units versus K8 based chips that divide SSE
1961 registers to two 64bit halves. This knob promotes all store destinations
1962 to be 128bit to allow register renaming on 128bit SSE units, but usually
1963 results in one extra microop on 64bit SSE units. Experimental results
1964 shows that disabling this option on P4 brings over 20% SPECfp regression,
1965 while enabling it on K8 brings roughly 2.4% regression that can be partly
1966 masked by careful scheduling of moves. */
1967 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1968 | m_AMDFAM10 | m_BDVER1,
1969
1970 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1971 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1972
1973 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1974 m_BDVER1 | m_COREI7,
1975
1976 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1977 m_BDVER1,
1978
1979 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1980 are resolved on SSE register parts instead of whole registers, so we may
1981 maintain just lower part of scalar values in proper format leaving the
1982 upper part undefined. */
1983 m_ATHLON_K8,
1984
1985 /* X86_TUNE_SSE_TYPELESS_STORES */
1986 m_AMD_MULTIPLE,
1987
1988 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1989 m_PPRO | m_PENT4 | m_NOCONA,
1990
1991 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1992 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1993
1994 /* X86_TUNE_PROLOGUE_USING_MOVE */
1995 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1996
1997 /* X86_TUNE_EPILOGUE_USING_MOVE */
1998 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1999
2000 /* X86_TUNE_SHIFT1 */
2001 ~m_486,
2002
2003 /* X86_TUNE_USE_FFREEP */
2004 m_AMD_MULTIPLE,
2005
2006 /* X86_TUNE_INTER_UNIT_MOVES */
2007 ~(m_AMD_MULTIPLE | m_GENERIC),
2008
2009 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2010 ~(m_AMDFAM10 | m_BDVER1),
2011
2012 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2013 than 4 branch instructions in the 16 byte window. */
2014 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2015 | m_GENERIC,
2016
2017 /* X86_TUNE_SCHEDULE */
2018 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2019 | m_GENERIC,
2020
2021 /* X86_TUNE_USE_BT */
2022 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2023
2024 /* X86_TUNE_USE_INCDEC */
2025 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2026
2027 /* X86_TUNE_PAD_RETURNS */
2028 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2029
2030 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2031 m_ATOM,
2032
2033 /* X86_TUNE_EXT_80387_CONSTANTS */
2034 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2035 | m_CORE2I7 | m_GENERIC,
2036
2037 /* X86_TUNE_SHORTEN_X87_SSE */
2038 ~m_K8,
2039
2040 /* X86_TUNE_AVOID_VECTOR_DECODE */
2041 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2042
2043 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2044 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2045 ~(m_386 | m_486),
2046
2047 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2048 vector path on AMD machines. */
2049 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2050
2051 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2052 machines. */
2053 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2054
2055 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2056 than a MOV. */
2057 m_PENT,
2058
2059 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2060 but one byte longer. */
2061 m_PENT,
2062
2063 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2064 operand that cannot be represented using a modRM byte. The XOR
2065 replacement is long decoded, so this split helps here as well. */
2066 m_K6,
2067
2068 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2069 from FP to FP. */
2070 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2071
2072 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2073 from integer to FP. */
2074 m_AMDFAM10,
2075
2076 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2077 with a subsequent conditional jump instruction into a single
2078 compare-and-branch uop. */
2079 m_BDVER1,
2080
2081 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2082 will impact LEA instruction selection. */
2083 m_ATOM,
2084
2085 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2086 instructions. */
2087 ~m_ATOM,
2088
2089 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2090 at -O3. For the moment, the prefetching seems badly tuned for Intel
2091 chips. */
2092 m_K6_GEODE | m_AMD_MULTIPLE
2093 };
2094
2095 /* Feature tests against the various architecture variations. */
2096 unsigned char ix86_arch_features[X86_ARCH_LAST];
2097
2098 /* Feature tests against the various architecture variations, used to create
2099 ix86_arch_features based on the processor mask. */
2100 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2101 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2102 ~(m_386 | m_486 | m_PENT | m_K6),
2103
2104 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2105 ~m_386,
2106
2107 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2108 ~(m_386 | m_486),
2109
2110 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2111 ~m_386,
2112
2113 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2114 ~m_386,
2115 };
2116
2117 static const unsigned int x86_accumulate_outgoing_args
2118 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2119 | m_GENERIC;
2120
2121 static const unsigned int x86_arch_always_fancy_math_387
2122 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2123 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2124
2125 /* In case the average insn count for single function invocation is
2126 lower than this constant, emit fast (but longer) prologue and
2127 epilogue code. */
2128 #define FAST_PROLOGUE_INSN_COUNT 20
2129
2130 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2131 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2132 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2133 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2134
2135 /* Array of the smallest class containing reg number REGNO, indexed by
2136 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2137
2138 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2139 {
2140 /* ax, dx, cx, bx */
2141 AREG, DREG, CREG, BREG,
2142 /* si, di, bp, sp */
2143 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2144 /* FP registers */
2145 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2146 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2147 /* arg pointer */
2148 NON_Q_REGS,
2149 /* flags, fpsr, fpcr, frame */
2150 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2151 /* SSE registers */
2152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2153 SSE_REGS, SSE_REGS,
2154 /* MMX registers */
2155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2156 MMX_REGS, MMX_REGS,
2157 /* REX registers */
2158 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2159 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2160 /* SSE REX registers */
2161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2162 SSE_REGS, SSE_REGS,
2163 };
2164
2165 /* The "default" register map used in 32bit mode. */
2166
2167 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2168 {
2169 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2170 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2171 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2172 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2173 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2174 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2175 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2176 };
2177
2178 /* The "default" register map used in 64bit mode. */
2179
2180 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2181 {
2182 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2183 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2184 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2185 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2186 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2187 8,9,10,11,12,13,14,15, /* extended integer registers */
2188 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2189 };
2190
2191 /* Define the register numbers to be used in Dwarf debugging information.
2192 The SVR4 reference port C compiler uses the following register numbers
2193 in its Dwarf output code:
2194 0 for %eax (gcc regno = 0)
2195 1 for %ecx (gcc regno = 2)
2196 2 for %edx (gcc regno = 1)
2197 3 for %ebx (gcc regno = 3)
2198 4 for %esp (gcc regno = 7)
2199 5 for %ebp (gcc regno = 6)
2200 6 for %esi (gcc regno = 4)
2201 7 for %edi (gcc regno = 5)
2202 The following three DWARF register numbers are never generated by
2203 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2204 believes these numbers have these meanings.
2205 8 for %eip (no gcc equivalent)
2206 9 for %eflags (gcc regno = 17)
2207 10 for %trapno (no gcc equivalent)
2208 It is not at all clear how we should number the FP stack registers
2209 for the x86 architecture. If the version of SDB on x86/svr4 were
2210 a bit less brain dead with respect to floating-point then we would
2211 have a precedent to follow with respect to DWARF register numbers
2212 for x86 FP registers, but the SDB on x86/svr4 is so completely
2213 broken with respect to FP registers that it is hardly worth thinking
2214 of it as something to strive for compatibility with.
2215 The version of x86/svr4 SDB I have at the moment does (partially)
2216 seem to believe that DWARF register number 11 is associated with
2217 the x86 register %st(0), but that's about all. Higher DWARF
2218 register numbers don't seem to be associated with anything in
2219 particular, and even for DWARF regno 11, SDB only seems to under-
2220 stand that it should say that a variable lives in %st(0) (when
2221 asked via an `=' command) if we said it was in DWARF regno 11,
2222 but SDB still prints garbage when asked for the value of the
2223 variable in question (via a `/' command).
2224 (Also note that the labels SDB prints for various FP stack regs
2225 when doing an `x' command are all wrong.)
2226 Note that these problems generally don't affect the native SVR4
2227 C compiler because it doesn't allow the use of -O with -g and
2228 because when it is *not* optimizing, it allocates a memory
2229 location for each floating-point variable, and the memory
2230 location is what gets described in the DWARF AT_location
2231 attribute for the variable in question.
2232 Regardless of the severe mental illness of the x86/svr4 SDB, we
2233 do something sensible here and we use the following DWARF
2234 register numbers. Note that these are all stack-top-relative
2235 numbers.
2236 11 for %st(0) (gcc regno = 8)
2237 12 for %st(1) (gcc regno = 9)
2238 13 for %st(2) (gcc regno = 10)
2239 14 for %st(3) (gcc regno = 11)
2240 15 for %st(4) (gcc regno = 12)
2241 16 for %st(5) (gcc regno = 13)
2242 17 for %st(6) (gcc regno = 14)
2243 18 for %st(7) (gcc regno = 15)
2244 */
2245 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2246 {
2247 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2248 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2249 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2250 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2251 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2254 };
2255
2256 /* Define parameter passing and return registers. */
2257
2258 static int const x86_64_int_parameter_registers[6] =
2259 {
2260 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2261 };
2262
2263 static int const x86_64_ms_abi_int_parameter_registers[4] =
2264 {
2265 CX_REG, DX_REG, R8_REG, R9_REG
2266 };
2267
2268 static int const x86_64_int_return_registers[4] =
2269 {
2270 AX_REG, DX_REG, DI_REG, SI_REG
2271 };
2272
2273 /* Define the structure for the machine field in struct function. */
2274
2275 struct GTY(()) stack_local_entry {
2276 unsigned short mode;
2277 unsigned short n;
2278 rtx rtl;
2279 struct stack_local_entry *next;
2280 };
2281
2282 /* Structure describing stack frame layout.
2283 Stack grows downward:
2284
2285 [arguments]
2286 <- ARG_POINTER
2287 saved pc
2288
2289 saved static chain if ix86_static_chain_on_stack
2290
2291 saved frame pointer if frame_pointer_needed
2292 <- HARD_FRAME_POINTER
2293 [saved regs]
2294 <- regs_save_offset
2295 [padding0]
2296
2297 [saved SSE regs]
2298 <- sse_regs_save_offset
2299 [padding1] |
2300 | <- FRAME_POINTER
2301 [va_arg registers] |
2302 |
2303 [frame] |
2304 |
2305 [padding2] | = to_allocate
2306 <- STACK_POINTER
2307 */
2308 struct ix86_frame
2309 {
2310 int nsseregs;
2311 int nregs;
2312 int va_arg_size;
2313 int red_zone_size;
2314 int outgoing_arguments_size;
2315 HOST_WIDE_INT frame;
2316
2317 /* The offsets relative to ARG_POINTER. */
2318 HOST_WIDE_INT frame_pointer_offset;
2319 HOST_WIDE_INT hard_frame_pointer_offset;
2320 HOST_WIDE_INT stack_pointer_offset;
2321 HOST_WIDE_INT hfp_save_offset;
2322 HOST_WIDE_INT reg_save_offset;
2323 HOST_WIDE_INT sse_reg_save_offset;
2324
2325 /* When save_regs_using_mov is set, emit prologue using
2326 move instead of push instructions. */
2327 bool save_regs_using_mov;
2328 };
2329
2330 /* Which cpu are we scheduling for. */
2331 enum attr_cpu ix86_schedule;
2332
2333 /* Which cpu are we optimizing for. */
2334 enum processor_type ix86_tune;
2335
2336 /* Which instruction set architecture to use. */
2337 enum processor_type ix86_arch;
2338
2339 /* true if sse prefetch instruction is not NOOP. */
2340 int x86_prefetch_sse;
2341
2342 /* -mstackrealign option */
2343 static const char ix86_force_align_arg_pointer_string[]
2344 = "force_align_arg_pointer";
2345
2346 static rtx (*ix86_gen_leave) (void);
2347 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2349 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2350 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2351 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2352 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2353 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2354 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2355 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2356
2357 /* Preferred alignment for stack boundary in bits. */
2358 unsigned int ix86_preferred_stack_boundary;
2359
2360 /* Alignment for incoming stack boundary in bits specified at
2361 command line. */
2362 static unsigned int ix86_user_incoming_stack_boundary;
2363
2364 /* Default alignment for incoming stack boundary in bits. */
2365 static unsigned int ix86_default_incoming_stack_boundary;
2366
2367 /* Alignment for incoming stack boundary in bits. */
2368 unsigned int ix86_incoming_stack_boundary;
2369
2370 /* Calling abi specific va_list type nodes. */
2371 static GTY(()) tree sysv_va_list_type_node;
2372 static GTY(()) tree ms_va_list_type_node;
2373
2374 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2375 char internal_label_prefix[16];
2376 int internal_label_prefix_len;
2377
2378 /* Fence to use after loop using movnt. */
2379 tree x86_mfence;
2380
2381 /* Register class used for passing given 64bit part of the argument.
2382 These represent classes as documented by the PS ABI, with the exception
2383 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2384 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2385
2386 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2387 whenever possible (upper half does contain padding). */
2388 enum x86_64_reg_class
2389 {
2390 X86_64_NO_CLASS,
2391 X86_64_INTEGER_CLASS,
2392 X86_64_INTEGERSI_CLASS,
2393 X86_64_SSE_CLASS,
2394 X86_64_SSESF_CLASS,
2395 X86_64_SSEDF_CLASS,
2396 X86_64_SSEUP_CLASS,
2397 X86_64_X87_CLASS,
2398 X86_64_X87UP_CLASS,
2399 X86_64_COMPLEX_X87_CLASS,
2400 X86_64_MEMORY_CLASS
2401 };
2402
2403 #define MAX_CLASSES 4
2404
2405 /* Table of constants used by fldpi, fldln2, etc.... */
2406 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2407 static bool ext_80387_constants_init = 0;
2408
2409 \f
2410 static struct machine_function * ix86_init_machine_status (void);
2411 static rtx ix86_function_value (const_tree, const_tree, bool);
2412 static bool ix86_function_value_regno_p (const unsigned int);
2413 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2414 const_tree);
2415 static rtx ix86_static_chain (const_tree, bool);
2416 static int ix86_function_regparm (const_tree, const_tree);
2417 static void ix86_compute_frame_layout (struct ix86_frame *);
2418 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2419 rtx, rtx, int);
2420 static void ix86_add_new_builtins (int);
2421 static rtx ix86_expand_vec_perm_builtin (tree);
2422 static tree ix86_canonical_va_list_type (tree);
2423 static void predict_jump (int);
2424 static unsigned int split_stack_prologue_scratch_regno (void);
2425 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2426
2427 enum ix86_function_specific_strings
2428 {
2429 IX86_FUNCTION_SPECIFIC_ARCH,
2430 IX86_FUNCTION_SPECIFIC_TUNE,
2431 IX86_FUNCTION_SPECIFIC_MAX
2432 };
2433
2434 static char *ix86_target_string (int, int, const char *, const char *,
2435 enum fpmath_unit, bool);
2436 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2437 static void ix86_function_specific_save (struct cl_target_option *);
2438 static void ix86_function_specific_restore (struct cl_target_option *);
2439 static void ix86_function_specific_print (FILE *, int,
2440 struct cl_target_option *);
2441 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2442 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2443 struct gcc_options *);
2444 static bool ix86_can_inline_p (tree, tree);
2445 static void ix86_set_current_function (tree);
2446 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2447
2448 static enum calling_abi ix86_function_abi (const_tree);
2449
2450 \f
2451 #ifndef SUBTARGET32_DEFAULT_CPU
2452 #define SUBTARGET32_DEFAULT_CPU "i386"
2453 #endif
2454
2455 /* The svr4 ABI for the i386 says that records and unions are returned
2456 in memory. */
2457 #ifndef DEFAULT_PCC_STRUCT_RETURN
2458 #define DEFAULT_PCC_STRUCT_RETURN 1
2459 #endif
2460
2461 /* Whether -mtune= or -march= were specified */
2462 static int ix86_tune_defaulted;
2463 static int ix86_arch_specified;
2464
2465 /* Vectorization library interface and handlers. */
2466 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2467
2468 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2469 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2470
2471 /* Processor target table, indexed by processor number */
2472 struct ptt
2473 {
2474 const struct processor_costs *cost; /* Processor costs */
2475 const int align_loop; /* Default alignments. */
2476 const int align_loop_max_skip;
2477 const int align_jump;
2478 const int align_jump_max_skip;
2479 const int align_func;
2480 };
2481
2482 static const struct ptt processor_target_table[PROCESSOR_max] =
2483 {
2484 {&i386_cost, 4, 3, 4, 3, 4},
2485 {&i486_cost, 16, 15, 16, 15, 16},
2486 {&pentium_cost, 16, 7, 16, 7, 16},
2487 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2488 {&geode_cost, 0, 0, 0, 0, 0},
2489 {&k6_cost, 32, 7, 32, 7, 32},
2490 {&athlon_cost, 16, 7, 16, 7, 16},
2491 {&pentium4_cost, 0, 0, 0, 0, 0},
2492 {&k8_cost, 16, 7, 16, 7, 16},
2493 {&nocona_cost, 0, 0, 0, 0, 0},
2494 /* Core 2 32-bit. */
2495 {&generic32_cost, 16, 10, 16, 10, 16},
2496 /* Core 2 64-bit. */
2497 {&generic64_cost, 16, 10, 16, 10, 16},
2498 /* Core i7 32-bit. */
2499 {&generic32_cost, 16, 10, 16, 10, 16},
2500 /* Core i7 64-bit. */
2501 {&generic64_cost, 16, 10, 16, 10, 16},
2502 {&generic32_cost, 16, 7, 16, 7, 16},
2503 {&generic64_cost, 16, 10, 16, 10, 16},
2504 {&amdfam10_cost, 32, 24, 32, 7, 32},
2505 {&bdver1_cost, 32, 24, 32, 7, 32},
2506 {&btver1_cost, 32, 24, 32, 7, 32},
2507 {&atom_cost, 16, 7, 16, 7, 16}
2508 };
2509
2510 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2511 {
2512 "generic",
2513 "i386",
2514 "i486",
2515 "pentium",
2516 "pentium-mmx",
2517 "pentiumpro",
2518 "pentium2",
2519 "pentium3",
2520 "pentium4",
2521 "pentium-m",
2522 "prescott",
2523 "nocona",
2524 "core2",
2525 "corei7",
2526 "atom",
2527 "geode",
2528 "k6",
2529 "k6-2",
2530 "k6-3",
2531 "athlon",
2532 "athlon-4",
2533 "k8",
2534 "amdfam10",
2535 "bdver1",
2536 "btver1"
2537 };
2538 \f
2539 /* Return true if a red-zone is in use. */
2540
2541 static inline bool
2542 ix86_using_red_zone (void)
2543 {
2544 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2545 }
2546 \f
2547 /* Return a string that documents the current -m options. The caller is
2548 responsible for freeing the string. */
2549
2550 static char *
2551 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2552 enum fpmath_unit fpmath, bool add_nl_p)
2553 {
2554 struct ix86_target_opts
2555 {
2556 const char *option; /* option string */
2557 int mask; /* isa mask options */
2558 };
2559
2560 /* This table is ordered so that options like -msse4.2 that imply
2561 preceding options while match those first. */
2562 static struct ix86_target_opts isa_opts[] =
2563 {
2564 { "-m64", OPTION_MASK_ISA_64BIT },
2565 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2566 { "-mfma", OPTION_MASK_ISA_FMA },
2567 { "-mxop", OPTION_MASK_ISA_XOP },
2568 { "-mlwp", OPTION_MASK_ISA_LWP },
2569 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2570 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2571 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2572 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2573 { "-msse3", OPTION_MASK_ISA_SSE3 },
2574 { "-msse2", OPTION_MASK_ISA_SSE2 },
2575 { "-msse", OPTION_MASK_ISA_SSE },
2576 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2577 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2578 { "-mmmx", OPTION_MASK_ISA_MMX },
2579 { "-mabm", OPTION_MASK_ISA_ABM },
2580 { "-mbmi", OPTION_MASK_ISA_BMI },
2581 { "-mtbm", OPTION_MASK_ISA_TBM },
2582 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2583 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2584 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2585 { "-maes", OPTION_MASK_ISA_AES },
2586 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2587 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2588 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2589 { "-mf16c", OPTION_MASK_ISA_F16C },
2590 };
2591
2592 /* Flag options. */
2593 static struct ix86_target_opts flag_opts[] =
2594 {
2595 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2596 { "-m80387", MASK_80387 },
2597 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2598 { "-malign-double", MASK_ALIGN_DOUBLE },
2599 { "-mcld", MASK_CLD },
2600 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2601 { "-mieee-fp", MASK_IEEE_FP },
2602 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2603 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2604 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2605 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2606 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2607 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2608 { "-mno-red-zone", MASK_NO_RED_ZONE },
2609 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2610 { "-mrecip", MASK_RECIP },
2611 { "-mrtd", MASK_RTD },
2612 { "-msseregparm", MASK_SSEREGPARM },
2613 { "-mstack-arg-probe", MASK_STACK_PROBE },
2614 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2615 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2616 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2617 { "-mvzeroupper", MASK_VZEROUPPER },
2618 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2619 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2620 };
2621
2622 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2623
2624 char isa_other[40];
2625 char target_other[40];
2626 unsigned num = 0;
2627 unsigned i, j;
2628 char *ret;
2629 char *ptr;
2630 size_t len;
2631 size_t line_len;
2632 size_t sep_len;
2633
2634 memset (opts, '\0', sizeof (opts));
2635
2636 /* Add -march= option. */
2637 if (arch)
2638 {
2639 opts[num][0] = "-march=";
2640 opts[num++][1] = arch;
2641 }
2642
2643 /* Add -mtune= option. */
2644 if (tune)
2645 {
2646 opts[num][0] = "-mtune=";
2647 opts[num++][1] = tune;
2648 }
2649
2650 /* Pick out the options in isa options. */
2651 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2652 {
2653 if ((isa & isa_opts[i].mask) != 0)
2654 {
2655 opts[num++][0] = isa_opts[i].option;
2656 isa &= ~ isa_opts[i].mask;
2657 }
2658 }
2659
2660 if (isa && add_nl_p)
2661 {
2662 opts[num++][0] = isa_other;
2663 sprintf (isa_other, "(other isa: %#x)", isa);
2664 }
2665
2666 /* Add flag options. */
2667 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2668 {
2669 if ((flags & flag_opts[i].mask) != 0)
2670 {
2671 opts[num++][0] = flag_opts[i].option;
2672 flags &= ~ flag_opts[i].mask;
2673 }
2674 }
2675
2676 if (flags && add_nl_p)
2677 {
2678 opts[num++][0] = target_other;
2679 sprintf (target_other, "(other flags: %#x)", flags);
2680 }
2681
2682 /* Add -fpmath= option. */
2683 if (fpmath)
2684 {
2685 opts[num][0] = "-mfpmath=";
2686 switch ((int) fpmath)
2687 {
2688 case FPMATH_387:
2689 opts[num++][1] = "387";
2690 break;
2691
2692 case FPMATH_SSE:
2693 opts[num++][1] = "sse";
2694 break;
2695
2696 case FPMATH_387 | FPMATH_SSE:
2697 opts[num++][1] = "sse+387";
2698 break;
2699
2700 default:
2701 gcc_unreachable ();
2702 }
2703 }
2704
2705 /* Any options? */
2706 if (num == 0)
2707 return NULL;
2708
2709 gcc_assert (num < ARRAY_SIZE (opts));
2710
2711 /* Size the string. */
2712 len = 0;
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2715 {
2716 len += sep_len;
2717 for (j = 0; j < 2; j++)
2718 if (opts[i][j])
2719 len += strlen (opts[i][j]);
2720 }
2721
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2724 line_len = 0;
2725
2726 for (i = 0; i < num; i++)
2727 {
2728 size_t len2[2];
2729
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2732
2733 if (i != 0)
2734 {
2735 *ptr++ = ' ';
2736 line_len++;
2737
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2739 {
2740 *ptr++ = '\\';
2741 *ptr++ = '\n';
2742 line_len = 0;
2743 }
2744 }
2745
2746 for (j = 0; j < 2; j++)
2747 if (opts[i][j])
2748 {
2749 memcpy (ptr, opts[i][j], len2[j]);
2750 ptr += len2[j];
2751 line_len += len2[j];
2752 }
2753 }
2754
2755 *ptr = '\0';
2756 gcc_assert (ret + len >= ptr);
2757
2758 return ret;
2759 }
2760
2761 /* Return true, if profiling code should be emitted before
2762 prologue. Otherwise it returns false.
2763 Note: For x86 with "hotfix" it is sorried. */
2764 static bool
2765 ix86_profile_before_prologue (void)
2766 {
2767 return flag_fentry != 0;
2768 }
2769
2770 /* Function that is callable from the debugger to print the current
2771 options. */
2772 void
2773 ix86_debug_options (void)
2774 {
2775 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2776 ix86_arch_string, ix86_tune_string,
2777 ix86_fpmath, true);
2778
2779 if (opts)
2780 {
2781 fprintf (stderr, "%s\n\n", opts);
2782 free (opts);
2783 }
2784 else
2785 fputs ("<no options>\n\n", stderr);
2786
2787 return;
2788 }
2789 \f
2790 /* Override various settings based on options. If MAIN_ARGS_P, the
2791 options are from the command line, otherwise they are from
2792 attributes. */
2793
2794 static void
2795 ix86_option_override_internal (bool main_args_p)
2796 {
2797 int i;
2798 unsigned int ix86_arch_mask, ix86_tune_mask;
2799 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2800 const char *prefix;
2801 const char *suffix;
2802 const char *sw;
2803
2804 enum pta_flags
2805 {
2806 PTA_SSE = 1 << 0,
2807 PTA_SSE2 = 1 << 1,
2808 PTA_SSE3 = 1 << 2,
2809 PTA_MMX = 1 << 3,
2810 PTA_PREFETCH_SSE = 1 << 4,
2811 PTA_3DNOW = 1 << 5,
2812 PTA_3DNOW_A = 1 << 6,
2813 PTA_64BIT = 1 << 7,
2814 PTA_SSSE3 = 1 << 8,
2815 PTA_CX16 = 1 << 9,
2816 PTA_POPCNT = 1 << 10,
2817 PTA_ABM = 1 << 11,
2818 PTA_SSE4A = 1 << 12,
2819 PTA_NO_SAHF = 1 << 13,
2820 PTA_SSE4_1 = 1 << 14,
2821 PTA_SSE4_2 = 1 << 15,
2822 PTA_AES = 1 << 16,
2823 PTA_PCLMUL = 1 << 17,
2824 PTA_AVX = 1 << 18,
2825 PTA_FMA = 1 << 19,
2826 PTA_MOVBE = 1 << 20,
2827 PTA_FMA4 = 1 << 21,
2828 PTA_XOP = 1 << 22,
2829 PTA_LWP = 1 << 23,
2830 PTA_FSGSBASE = 1 << 24,
2831 PTA_RDRND = 1 << 25,
2832 PTA_F16C = 1 << 26,
2833 PTA_BMI = 1 << 27,
2834 PTA_TBM = 1 << 28
2835 /* if this reaches 32, need to widen struct pta flags below */
2836 };
2837
2838 static struct pta
2839 {
2840 const char *const name; /* processor name or nickname. */
2841 const enum processor_type processor;
2842 const enum attr_cpu schedule;
2843 const unsigned /*enum pta_flags*/ flags;
2844 }
2845 const processor_alias_table[] =
2846 {
2847 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2848 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2849 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2850 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2851 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2852 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2853 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2854 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2855 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2856 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2857 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2858 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2859 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2860 PTA_MMX | PTA_SSE},
2861 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2862 PTA_MMX | PTA_SSE},
2863 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2864 PTA_MMX | PTA_SSE | PTA_SSE2},
2865 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2866 PTA_MMX |PTA_SSE | PTA_SSE2},
2867 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2868 PTA_MMX | PTA_SSE | PTA_SSE2},
2869 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2870 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2871 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2872 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2873 | PTA_CX16 | PTA_NO_SAHF},
2874 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2875 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2876 | PTA_SSSE3 | PTA_CX16},
2877 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2878 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2879 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2880 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2881 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2882 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2883 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2884 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2885 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2886 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2887 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2888 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2889 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2890 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2891 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2892 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2893 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2894 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2895 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2896 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2897 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2898 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2900 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2902 {"x86-64", PROCESSOR_K8, CPU_K8,
2903 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2904 {"k8", PROCESSOR_K8, CPU_K8,
2905 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2906 | PTA_SSE2 | PTA_NO_SAHF},
2907 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2908 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2909 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2910 {"opteron", PROCESSOR_K8, CPU_K8,
2911 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2912 | PTA_SSE2 | PTA_NO_SAHF},
2913 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2914 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2915 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2916 {"athlon64", PROCESSOR_K8, CPU_K8,
2917 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2918 | PTA_SSE2 | PTA_NO_SAHF},
2919 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2920 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2921 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2922 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2923 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2924 | PTA_SSE2 | PTA_NO_SAHF},
2925 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2926 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2927 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2928 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2929 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2930 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2931 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2932 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2933 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
2934 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
2935 | PTA_XOP | PTA_LWP},
2936 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
2937 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2938 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
2939 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2940 0 /* flags are only used for -march switch. */ },
2941 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2942 PTA_64BIT /* flags are only used for -march switch. */ },
2943 };
2944
2945 int const pta_size = ARRAY_SIZE (processor_alias_table);
2946
2947 /* Set up prefix/suffix so the error messages refer to either the command
2948 line argument, or the attribute(target). */
2949 if (main_args_p)
2950 {
2951 prefix = "-m";
2952 suffix = "";
2953 sw = "switch";
2954 }
2955 else
2956 {
2957 prefix = "option(\"";
2958 suffix = "\")";
2959 sw = "attribute";
2960 }
2961
2962 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2963 SUBTARGET_OVERRIDE_OPTIONS;
2964 #endif
2965
2966 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2967 SUBSUBTARGET_OVERRIDE_OPTIONS;
2968 #endif
2969
2970 /* -fPIC is the default for x86_64. */
2971 if (TARGET_MACHO && TARGET_64BIT)
2972 flag_pic = 2;
2973
2974 /* Need to check -mtune=generic first. */
2975 if (ix86_tune_string)
2976 {
2977 if (!strcmp (ix86_tune_string, "generic")
2978 || !strcmp (ix86_tune_string, "i686")
2979 /* As special support for cross compilers we read -mtune=native
2980 as -mtune=generic. With native compilers we won't see the
2981 -mtune=native, as it was changed by the driver. */
2982 || !strcmp (ix86_tune_string, "native"))
2983 {
2984 if (TARGET_64BIT)
2985 ix86_tune_string = "generic64";
2986 else
2987 ix86_tune_string = "generic32";
2988 }
2989 /* If this call is for setting the option attribute, allow the
2990 generic32/generic64 that was previously set. */
2991 else if (!main_args_p
2992 && (!strcmp (ix86_tune_string, "generic32")
2993 || !strcmp (ix86_tune_string, "generic64")))
2994 ;
2995 else if (!strncmp (ix86_tune_string, "generic", 7))
2996 error ("bad value (%s) for %stune=%s %s",
2997 ix86_tune_string, prefix, suffix, sw);
2998 else if (!strcmp (ix86_tune_string, "x86-64"))
2999 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3000 "%stune=k8%s or %stune=generic%s instead as appropriate",
3001 prefix, suffix, prefix, suffix, prefix, suffix);
3002 }
3003 else
3004 {
3005 if (ix86_arch_string)
3006 ix86_tune_string = ix86_arch_string;
3007 if (!ix86_tune_string)
3008 {
3009 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3010 ix86_tune_defaulted = 1;
3011 }
3012
3013 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3014 need to use a sensible tune option. */
3015 if (!strcmp (ix86_tune_string, "generic")
3016 || !strcmp (ix86_tune_string, "x86-64")
3017 || !strcmp (ix86_tune_string, "i686"))
3018 {
3019 if (TARGET_64BIT)
3020 ix86_tune_string = "generic64";
3021 else
3022 ix86_tune_string = "generic32";
3023 }
3024 }
3025
3026 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3027 {
3028 /* rep; movq isn't available in 32-bit code. */
3029 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3030 ix86_stringop_alg = no_stringop;
3031 }
3032
3033 if (!ix86_arch_string)
3034 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3035 else
3036 ix86_arch_specified = 1;
3037
3038 if (!global_options_set.x_ix86_abi)
3039 ix86_abi = DEFAULT_ABI;
3040
3041 if (global_options_set.x_ix86_cmodel)
3042 {
3043 switch (ix86_cmodel)
3044 {
3045 case CM_SMALL:
3046 case CM_SMALL_PIC:
3047 if (flag_pic)
3048 ix86_cmodel = CM_SMALL_PIC;
3049 if (!TARGET_64BIT)
3050 error ("code model %qs not supported in the %s bit mode",
3051 "small", "32");
3052 break;
3053
3054 case CM_MEDIUM:
3055 case CM_MEDIUM_PIC:
3056 if (flag_pic)
3057 ix86_cmodel = CM_MEDIUM_PIC;
3058 if (!TARGET_64BIT)
3059 error ("code model %qs not supported in the %s bit mode",
3060 "medium", "32");
3061 break;
3062
3063 case CM_LARGE:
3064 case CM_LARGE_PIC:
3065 if (flag_pic)
3066 ix86_cmodel = CM_LARGE_PIC;
3067 if (!TARGET_64BIT)
3068 error ("code model %qs not supported in the %s bit mode",
3069 "large", "32");
3070 break;
3071
3072 case CM_32:
3073 if (flag_pic)
3074 error ("code model %s does not support PIC mode", "32");
3075 if (TARGET_64BIT)
3076 error ("code model %qs not supported in the %s bit mode",
3077 "32", "64");
3078 break;
3079
3080 case CM_KERNEL:
3081 if (flag_pic)
3082 {
3083 error ("code model %s does not support PIC mode", "kernel");
3084 ix86_cmodel = CM_32;
3085 }
3086 if (!TARGET_64BIT)
3087 error ("code model %qs not supported in the %s bit mode",
3088 "kernel", "32");
3089 break;
3090
3091 default:
3092 gcc_unreachable ();
3093 }
3094 }
3095 else
3096 {
3097 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3098 use of rip-relative addressing. This eliminates fixups that
3099 would otherwise be needed if this object is to be placed in a
3100 DLL, and is essentially just as efficient as direct addressing. */
3101 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3102 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3103 else if (TARGET_64BIT)
3104 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3105 else
3106 ix86_cmodel = CM_32;
3107 }
3108 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3109 {
3110 error ("-masm=intel not supported in this configuration");
3111 ix86_asm_dialect = ASM_ATT;
3112 }
3113 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3114 sorry ("%i-bit mode not compiled in",
3115 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3116
3117 for (i = 0; i < pta_size; i++)
3118 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3119 {
3120 ix86_schedule = processor_alias_table[i].schedule;
3121 ix86_arch = processor_alias_table[i].processor;
3122 /* Default cpu tuning to the architecture. */
3123 ix86_tune = ix86_arch;
3124
3125 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3126 error ("CPU you selected does not support x86-64 "
3127 "instruction set");
3128
3129 if (processor_alias_table[i].flags & PTA_MMX
3130 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3131 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3132 if (processor_alias_table[i].flags & PTA_3DNOW
3133 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3134 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3135 if (processor_alias_table[i].flags & PTA_3DNOW_A
3136 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3137 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3138 if (processor_alias_table[i].flags & PTA_SSE
3139 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3140 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3141 if (processor_alias_table[i].flags & PTA_SSE2
3142 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3143 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3144 if (processor_alias_table[i].flags & PTA_SSE3
3145 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3146 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3147 if (processor_alias_table[i].flags & PTA_SSSE3
3148 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3149 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3150 if (processor_alias_table[i].flags & PTA_SSE4_1
3151 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3152 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3153 if (processor_alias_table[i].flags & PTA_SSE4_2
3154 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3155 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3156 if (processor_alias_table[i].flags & PTA_AVX
3157 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3158 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3159 if (processor_alias_table[i].flags & PTA_FMA
3160 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3161 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3162 if (processor_alias_table[i].flags & PTA_SSE4A
3163 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3164 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3165 if (processor_alias_table[i].flags & PTA_FMA4
3166 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3167 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3168 if (processor_alias_table[i].flags & PTA_XOP
3169 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3170 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3171 if (processor_alias_table[i].flags & PTA_LWP
3172 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3173 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3174 if (processor_alias_table[i].flags & PTA_ABM
3175 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3176 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3177 if (processor_alias_table[i].flags & PTA_BMI
3178 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3179 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3180 if (processor_alias_table[i].flags & PTA_TBM
3181 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3182 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3183 if (processor_alias_table[i].flags & PTA_CX16
3184 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3185 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3186 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3187 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3188 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3189 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3190 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3191 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3192 if (processor_alias_table[i].flags & PTA_MOVBE
3193 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3194 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3195 if (processor_alias_table[i].flags & PTA_AES
3196 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3197 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3198 if (processor_alias_table[i].flags & PTA_PCLMUL
3199 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3200 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3201 if (processor_alias_table[i].flags & PTA_FSGSBASE
3202 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3203 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3204 if (processor_alias_table[i].flags & PTA_RDRND
3205 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3206 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3207 if (processor_alias_table[i].flags & PTA_F16C
3208 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3209 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3210 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3211 x86_prefetch_sse = true;
3212
3213 break;
3214 }
3215
3216 if (!strcmp (ix86_arch_string, "generic"))
3217 error ("generic CPU can be used only for %stune=%s %s",
3218 prefix, suffix, sw);
3219 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3220 error ("bad value (%s) for %sarch=%s %s",
3221 ix86_arch_string, prefix, suffix, sw);
3222
3223 ix86_arch_mask = 1u << ix86_arch;
3224 for (i = 0; i < X86_ARCH_LAST; ++i)
3225 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3226
3227 for (i = 0; i < pta_size; i++)
3228 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3229 {
3230 ix86_schedule = processor_alias_table[i].schedule;
3231 ix86_tune = processor_alias_table[i].processor;
3232 if (TARGET_64BIT)
3233 {
3234 if (!(processor_alias_table[i].flags & PTA_64BIT))
3235 {
3236 if (ix86_tune_defaulted)
3237 {
3238 ix86_tune_string = "x86-64";
3239 for (i = 0; i < pta_size; i++)
3240 if (! strcmp (ix86_tune_string,
3241 processor_alias_table[i].name))
3242 break;
3243 ix86_schedule = processor_alias_table[i].schedule;
3244 ix86_tune = processor_alias_table[i].processor;
3245 }
3246 else
3247 error ("CPU you selected does not support x86-64 "
3248 "instruction set");
3249 }
3250 }
3251 else
3252 {
3253 /* Adjust tuning when compiling for 32-bit ABI. */
3254 switch (ix86_tune)
3255 {
3256 case PROCESSOR_GENERIC64:
3257 ix86_tune = PROCESSOR_GENERIC32;
3258 ix86_schedule = CPU_PENTIUMPRO;
3259 break;
3260
3261 case PROCESSOR_CORE2_64:
3262 ix86_tune = PROCESSOR_CORE2_32;
3263 break;
3264
3265 case PROCESSOR_COREI7_64:
3266 ix86_tune = PROCESSOR_COREI7_32;
3267 break;
3268
3269 default:
3270 break;
3271 }
3272 }
3273 /* Intel CPUs have always interpreted SSE prefetch instructions as
3274 NOPs; so, we can enable SSE prefetch instructions even when
3275 -mtune (rather than -march) points us to a processor that has them.
3276 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3277 higher processors. */
3278 if (TARGET_CMOVE
3279 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3280 x86_prefetch_sse = true;
3281 break;
3282 }
3283
3284 if (ix86_tune_specified && i == pta_size)
3285 error ("bad value (%s) for %stune=%s %s",
3286 ix86_tune_string, prefix, suffix, sw);
3287
3288 ix86_tune_mask = 1u << ix86_tune;
3289 for (i = 0; i < X86_TUNE_LAST; ++i)
3290 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3291
3292 #ifndef USE_IX86_FRAME_POINTER
3293 #define USE_IX86_FRAME_POINTER 0
3294 #endif
3295
3296 #ifndef USE_X86_64_FRAME_POINTER
3297 #define USE_X86_64_FRAME_POINTER 0
3298 #endif
3299
3300 /* Set the default values for switches whose default depends on TARGET_64BIT
3301 in case they weren't overwritten by command line options. */
3302 if (TARGET_64BIT)
3303 {
3304 if (optimize > 1 && !global_options_set.x_flag_zee)
3305 flag_zee = 1;
3306 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3307 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3308 if (flag_asynchronous_unwind_tables == 2)
3309 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3310 if (flag_pcc_struct_return == 2)
3311 flag_pcc_struct_return = 0;
3312 }
3313 else
3314 {
3315 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3316 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3317 if (flag_asynchronous_unwind_tables == 2)
3318 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3319 if (flag_pcc_struct_return == 2)
3320 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3321 }
3322
3323 if (optimize_size)
3324 ix86_cost = &ix86_size_cost;
3325 else
3326 ix86_cost = processor_target_table[ix86_tune].cost;
3327
3328 /* Arrange to set up i386_stack_locals for all functions. */
3329 init_machine_status = ix86_init_machine_status;
3330
3331 /* Validate -mregparm= value. */
3332 if (global_options_set.x_ix86_regparm)
3333 {
3334 if (TARGET_64BIT)
3335 warning (0, "-mregparm is ignored in 64-bit mode");
3336 if (ix86_regparm > REGPARM_MAX)
3337 {
3338 error ("-mregparm=%d is not between 0 and %d",
3339 ix86_regparm, REGPARM_MAX);
3340 ix86_regparm = 0;
3341 }
3342 }
3343 if (TARGET_64BIT)
3344 ix86_regparm = REGPARM_MAX;
3345
3346 /* Default align_* from the processor table. */
3347 if (align_loops == 0)
3348 {
3349 align_loops = processor_target_table[ix86_tune].align_loop;
3350 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3351 }
3352 if (align_jumps == 0)
3353 {
3354 align_jumps = processor_target_table[ix86_tune].align_jump;
3355 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3356 }
3357 if (align_functions == 0)
3358 {
3359 align_functions = processor_target_table[ix86_tune].align_func;
3360 }
3361
3362 /* Provide default for -mbranch-cost= value. */
3363 if (!global_options_set.x_ix86_branch_cost)
3364 ix86_branch_cost = ix86_cost->branch_cost;
3365
3366 if (TARGET_64BIT)
3367 {
3368 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3369
3370 /* Enable by default the SSE and MMX builtins. Do allow the user to
3371 explicitly disable any of these. In particular, disabling SSE and
3372 MMX for kernel code is extremely useful. */
3373 if (!ix86_arch_specified)
3374 ix86_isa_flags
3375 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3376 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3377
3378 if (TARGET_RTD)
3379 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3380 }
3381 else
3382 {
3383 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3384
3385 if (!ix86_arch_specified)
3386 ix86_isa_flags
3387 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3388
3389 /* i386 ABI does not specify red zone. It still makes sense to use it
3390 when programmer takes care to stack from being destroyed. */
3391 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3392 target_flags |= MASK_NO_RED_ZONE;
3393 }
3394
3395 /* Keep nonleaf frame pointers. */
3396 if (flag_omit_frame_pointer)
3397 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3398 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3399 flag_omit_frame_pointer = 1;
3400
3401 /* If we're doing fast math, we don't care about comparison order
3402 wrt NaNs. This lets us use a shorter comparison sequence. */
3403 if (flag_finite_math_only)
3404 target_flags &= ~MASK_IEEE_FP;
3405
3406 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3407 since the insns won't need emulation. */
3408 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3409 target_flags &= ~MASK_NO_FANCY_MATH_387;
3410
3411 /* Likewise, if the target doesn't have a 387, or we've specified
3412 software floating point, don't use 387 inline intrinsics. */
3413 if (!TARGET_80387)
3414 target_flags |= MASK_NO_FANCY_MATH_387;
3415
3416 /* Turn on MMX builtins for -msse. */
3417 if (TARGET_SSE)
3418 {
3419 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3420 x86_prefetch_sse = true;
3421 }
3422
3423 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3424 if (TARGET_SSE4_2 || TARGET_ABM)
3425 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3426
3427 /* Validate -mpreferred-stack-boundary= value or default it to
3428 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3429 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3430 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3431 {
3432 int min = (TARGET_64BIT ? 4 : 2);
3433 int max = (TARGET_SEH ? 4 : 12);
3434
3435 if (ix86_preferred_stack_boundary_arg < min
3436 || ix86_preferred_stack_boundary_arg > max)
3437 {
3438 if (min == max)
3439 error ("-mpreferred-stack-boundary is not supported "
3440 "for this target");
3441 else
3442 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3443 ix86_preferred_stack_boundary_arg, min, max);
3444 }
3445 else
3446 ix86_preferred_stack_boundary
3447 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3448 }
3449
3450 /* Set the default value for -mstackrealign. */
3451 if (ix86_force_align_arg_pointer == -1)
3452 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3453
3454 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3455
3456 /* Validate -mincoming-stack-boundary= value or default it to
3457 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3458 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3459 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3460 {
3461 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3462 || ix86_incoming_stack_boundary_arg > 12)
3463 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3464 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3465 else
3466 {
3467 ix86_user_incoming_stack_boundary
3468 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3469 ix86_incoming_stack_boundary
3470 = ix86_user_incoming_stack_boundary;
3471 }
3472 }
3473
3474 /* Accept -msseregparm only if at least SSE support is enabled. */
3475 if (TARGET_SSEREGPARM
3476 && ! TARGET_SSE)
3477 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3478
3479 if (global_options_set.x_ix86_fpmath)
3480 {
3481 if (ix86_fpmath & FPMATH_SSE)
3482 {
3483 if (!TARGET_SSE)
3484 {
3485 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3486 ix86_fpmath = FPMATH_387;
3487 }
3488 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3489 {
3490 warning (0, "387 instruction set disabled, using SSE arithmetics");
3491 ix86_fpmath = FPMATH_SSE;
3492 }
3493 }
3494 }
3495 else
3496 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3497
3498 /* If the i387 is disabled, then do not return values in it. */
3499 if (!TARGET_80387)
3500 target_flags &= ~MASK_FLOAT_RETURNS;
3501
3502 /* Use external vectorized library in vectorizing intrinsics. */
3503 if (global_options_set.x_ix86_veclibabi_type)
3504 switch (ix86_veclibabi_type)
3505 {
3506 case ix86_veclibabi_type_svml:
3507 ix86_veclib_handler = ix86_veclibabi_svml;
3508 break;
3509
3510 case ix86_veclibabi_type_acml:
3511 ix86_veclib_handler = ix86_veclibabi_acml;
3512 break;
3513
3514 default:
3515 gcc_unreachable ();
3516 }
3517
3518 if ((!USE_IX86_FRAME_POINTER
3519 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3520 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3521 && !optimize_size)
3522 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3523
3524 /* ??? Unwind info is not correct around the CFG unless either a frame
3525 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3526 unwind info generation to be aware of the CFG and propagating states
3527 around edges. */
3528 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3529 || flag_exceptions || flag_non_call_exceptions)
3530 && flag_omit_frame_pointer
3531 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3532 {
3533 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3534 warning (0, "unwind tables currently require either a frame pointer "
3535 "or %saccumulate-outgoing-args%s for correctness",
3536 prefix, suffix);
3537 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3538 }
3539
3540 /* If stack probes are required, the space used for large function
3541 arguments on the stack must also be probed, so enable
3542 -maccumulate-outgoing-args so this happens in the prologue. */
3543 if (TARGET_STACK_PROBE
3544 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3545 {
3546 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3547 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3548 "for correctness", prefix, suffix);
3549 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3550 }
3551
3552 /* For sane SSE instruction set generation we need fcomi instruction.
3553 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3554 expands to a sequence that includes conditional move. */
3555 if (TARGET_SSE || TARGET_RDRND)
3556 TARGET_CMOVE = 1;
3557
3558 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3559 {
3560 char *p;
3561 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3562 p = strchr (internal_label_prefix, 'X');
3563 internal_label_prefix_len = p - internal_label_prefix;
3564 *p = '\0';
3565 }
3566
3567 /* When scheduling description is not available, disable scheduler pass
3568 so it won't slow down the compilation and make x87 code slower. */
3569 if (!TARGET_SCHEDULE)
3570 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3571
3572 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3573 ix86_cost->simultaneous_prefetches,
3574 global_options.x_param_values,
3575 global_options_set.x_param_values);
3576 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3577 global_options.x_param_values,
3578 global_options_set.x_param_values);
3579 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3580 global_options.x_param_values,
3581 global_options_set.x_param_values);
3582 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3583 global_options.x_param_values,
3584 global_options_set.x_param_values);
3585
3586 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3587 if (flag_prefetch_loop_arrays < 0
3588 && HAVE_prefetch
3589 && optimize >= 3
3590 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3591 flag_prefetch_loop_arrays = 1;
3592
3593 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3594 can be optimized to ap = __builtin_next_arg (0). */
3595 if (!TARGET_64BIT && !flag_split_stack)
3596 targetm.expand_builtin_va_start = NULL;
3597
3598 if (TARGET_64BIT)
3599 {
3600 ix86_gen_leave = gen_leave_rex64;
3601 ix86_gen_add3 = gen_adddi3;
3602 ix86_gen_sub3 = gen_subdi3;
3603 ix86_gen_sub3_carry = gen_subdi3_carry;
3604 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3605 ix86_gen_monitor = gen_sse3_monitor64;
3606 ix86_gen_andsp = gen_anddi3;
3607 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3608 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3609 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3610 }
3611 else
3612 {
3613 ix86_gen_leave = gen_leave;
3614 ix86_gen_add3 = gen_addsi3;
3615 ix86_gen_sub3 = gen_subsi3;
3616 ix86_gen_sub3_carry = gen_subsi3_carry;
3617 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3618 ix86_gen_monitor = gen_sse3_monitor;
3619 ix86_gen_andsp = gen_andsi3;
3620 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3621 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3622 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3623 }
3624
3625 #ifdef USE_IX86_CLD
3626 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3627 if (!TARGET_64BIT)
3628 target_flags |= MASK_CLD & ~target_flags_explicit;
3629 #endif
3630
3631 if (!TARGET_64BIT && flag_pic)
3632 {
3633 if (flag_fentry > 0)
3634 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3635 "with -fpic");
3636 flag_fentry = 0;
3637 }
3638 else if (TARGET_SEH)
3639 {
3640 if (flag_fentry == 0)
3641 sorry ("-mno-fentry isn%'t compatible with SEH");
3642 flag_fentry = 1;
3643 }
3644 else if (flag_fentry < 0)
3645 {
3646 #if defined(PROFILE_BEFORE_PROLOGUE)
3647 flag_fentry = 1;
3648 #else
3649 flag_fentry = 0;
3650 #endif
3651 }
3652
3653 if (TARGET_AVX)
3654 {
3655 /* When not optimize for size, enable vzeroupper optimization for
3656 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3657 AVX unaligned load/store. */
3658 if (!optimize_size)
3659 {
3660 if (flag_expensive_optimizations
3661 && !(target_flags_explicit & MASK_VZEROUPPER))
3662 target_flags |= MASK_VZEROUPPER;
3663 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3664 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3665 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3666 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3667 }
3668 }
3669 else
3670 {
3671 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3672 target_flags &= ~MASK_VZEROUPPER;
3673 }
3674
3675 /* Save the initial options in case the user does function specific
3676 options. */
3677 if (main_args_p)
3678 target_option_default_node = target_option_current_node
3679 = build_target_option_node ();
3680 }
3681
3682 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3683
3684 static bool
3685 function_pass_avx256_p (const_rtx val)
3686 {
3687 if (!val)
3688 return false;
3689
3690 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3691 return true;
3692
3693 if (GET_CODE (val) == PARALLEL)
3694 {
3695 int i;
3696 rtx r;
3697
3698 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3699 {
3700 r = XVECEXP (val, 0, i);
3701 if (GET_CODE (r) == EXPR_LIST
3702 && XEXP (r, 0)
3703 && REG_P (XEXP (r, 0))
3704 && (GET_MODE (XEXP (r, 0)) == OImode
3705 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3706 return true;
3707 }
3708 }
3709
3710 return false;
3711 }
3712
3713 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3714
3715 static void
3716 ix86_option_override (void)
3717 {
3718 ix86_option_override_internal (true);
3719 }
3720
3721 /* Update register usage after having seen the compiler flags. */
3722
3723 static void
3724 ix86_conditional_register_usage (void)
3725 {
3726 int i;
3727 unsigned int j;
3728
3729 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3730 {
3731 if (fixed_regs[i] > 1)
3732 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3733 if (call_used_regs[i] > 1)
3734 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3735 }
3736
3737 /* The PIC register, if it exists, is fixed. */
3738 j = PIC_OFFSET_TABLE_REGNUM;
3739 if (j != INVALID_REGNUM)
3740 fixed_regs[j] = call_used_regs[j] = 1;
3741
3742 /* The 64-bit MS_ABI changes the set of call-used registers. */
3743 if (TARGET_64BIT_MS_ABI)
3744 {
3745 call_used_regs[SI_REG] = 0;
3746 call_used_regs[DI_REG] = 0;
3747 call_used_regs[XMM6_REG] = 0;
3748 call_used_regs[XMM7_REG] = 0;
3749 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3750 call_used_regs[i] = 0;
3751 }
3752
3753 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3754 other call-clobbered regs for 64-bit. */
3755 if (TARGET_64BIT)
3756 {
3757 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3758
3759 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3760 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3761 && call_used_regs[i])
3762 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3763 }
3764
3765 /* If MMX is disabled, squash the registers. */
3766 if (! TARGET_MMX)
3767 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3768 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3769 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3770
3771 /* If SSE is disabled, squash the registers. */
3772 if (! TARGET_SSE)
3773 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3774 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3775 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3776
3777 /* If the FPU is disabled, squash the registers. */
3778 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3779 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3780 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3781 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3782
3783 /* If 32-bit, squash the 64-bit registers. */
3784 if (! TARGET_64BIT)
3785 {
3786 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3787 reg_names[i] = "";
3788 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3789 reg_names[i] = "";
3790 }
3791 }
3792
3793 \f
3794 /* Save the current options */
3795
3796 static void
3797 ix86_function_specific_save (struct cl_target_option *ptr)
3798 {
3799 ptr->arch = ix86_arch;
3800 ptr->schedule = ix86_schedule;
3801 ptr->tune = ix86_tune;
3802 ptr->branch_cost = ix86_branch_cost;
3803 ptr->tune_defaulted = ix86_tune_defaulted;
3804 ptr->arch_specified = ix86_arch_specified;
3805 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3806 ptr->ix86_target_flags_explicit = target_flags_explicit;
3807
3808 /* The fields are char but the variables are not; make sure the
3809 values fit in the fields. */
3810 gcc_assert (ptr->arch == ix86_arch);
3811 gcc_assert (ptr->schedule == ix86_schedule);
3812 gcc_assert (ptr->tune == ix86_tune);
3813 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3814 }
3815
3816 /* Restore the current options */
3817
3818 static void
3819 ix86_function_specific_restore (struct cl_target_option *ptr)
3820 {
3821 enum processor_type old_tune = ix86_tune;
3822 enum processor_type old_arch = ix86_arch;
3823 unsigned int ix86_arch_mask, ix86_tune_mask;
3824 int i;
3825
3826 ix86_arch = (enum processor_type) ptr->arch;
3827 ix86_schedule = (enum attr_cpu) ptr->schedule;
3828 ix86_tune = (enum processor_type) ptr->tune;
3829 ix86_branch_cost = ptr->branch_cost;
3830 ix86_tune_defaulted = ptr->tune_defaulted;
3831 ix86_arch_specified = ptr->arch_specified;
3832 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3833 target_flags_explicit = ptr->ix86_target_flags_explicit;
3834
3835 /* Recreate the arch feature tests if the arch changed */
3836 if (old_arch != ix86_arch)
3837 {
3838 ix86_arch_mask = 1u << ix86_arch;
3839 for (i = 0; i < X86_ARCH_LAST; ++i)
3840 ix86_arch_features[i]
3841 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3842 }
3843
3844 /* Recreate the tune optimization tests */
3845 if (old_tune != ix86_tune)
3846 {
3847 ix86_tune_mask = 1u << ix86_tune;
3848 for (i = 0; i < X86_TUNE_LAST; ++i)
3849 ix86_tune_features[i]
3850 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3851 }
3852 }
3853
3854 /* Print the current options */
3855
3856 static void
3857 ix86_function_specific_print (FILE *file, int indent,
3858 struct cl_target_option *ptr)
3859 {
3860 char *target_string
3861 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3862 NULL, NULL, ptr->x_ix86_fpmath, false);
3863
3864 fprintf (file, "%*sarch = %d (%s)\n",
3865 indent, "",
3866 ptr->arch,
3867 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3868 ? cpu_names[ptr->arch]
3869 : "<unknown>"));
3870
3871 fprintf (file, "%*stune = %d (%s)\n",
3872 indent, "",
3873 ptr->tune,
3874 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3875 ? cpu_names[ptr->tune]
3876 : "<unknown>"));
3877
3878 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3879
3880 if (target_string)
3881 {
3882 fprintf (file, "%*s%s\n", indent, "", target_string);
3883 free (target_string);
3884 }
3885 }
3886
3887 \f
3888 /* Inner function to process the attribute((target(...))), take an argument and
3889 set the current options from the argument. If we have a list, recursively go
3890 over the list. */
3891
3892 static bool
3893 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
3894 struct gcc_options *enum_opts_set)
3895 {
3896 char *next_optstr;
3897 bool ret = true;
3898
3899 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3900 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3901 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
3902 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3903 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3904
3905 enum ix86_opt_type
3906 {
3907 ix86_opt_unknown,
3908 ix86_opt_yes,
3909 ix86_opt_no,
3910 ix86_opt_str,
3911 ix86_opt_enum,
3912 ix86_opt_isa
3913 };
3914
3915 static const struct
3916 {
3917 const char *string;
3918 size_t len;
3919 enum ix86_opt_type type;
3920 int opt;
3921 int mask;
3922 } attrs[] = {
3923 /* isa options */
3924 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3925 IX86_ATTR_ISA ("abm", OPT_mabm),
3926 IX86_ATTR_ISA ("bmi", OPT_mbmi),
3927 IX86_ATTR_ISA ("tbm", OPT_mtbm),
3928 IX86_ATTR_ISA ("aes", OPT_maes),
3929 IX86_ATTR_ISA ("avx", OPT_mavx),
3930 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3931 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3932 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3933 IX86_ATTR_ISA ("sse", OPT_msse),
3934 IX86_ATTR_ISA ("sse2", OPT_msse2),
3935 IX86_ATTR_ISA ("sse3", OPT_msse3),
3936 IX86_ATTR_ISA ("sse4", OPT_msse4),
3937 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3938 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3939 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3940 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3941 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3942 IX86_ATTR_ISA ("xop", OPT_mxop),
3943 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3944 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3945 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3946 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3947
3948 /* enum options */
3949 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
3950
3951 /* string options */
3952 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3953 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3954
3955 /* flag options */
3956 IX86_ATTR_YES ("cld",
3957 OPT_mcld,
3958 MASK_CLD),
3959
3960 IX86_ATTR_NO ("fancy-math-387",
3961 OPT_mfancy_math_387,
3962 MASK_NO_FANCY_MATH_387),
3963
3964 IX86_ATTR_YES ("ieee-fp",
3965 OPT_mieee_fp,
3966 MASK_IEEE_FP),
3967
3968 IX86_ATTR_YES ("inline-all-stringops",
3969 OPT_minline_all_stringops,
3970 MASK_INLINE_ALL_STRINGOPS),
3971
3972 IX86_ATTR_YES ("inline-stringops-dynamically",
3973 OPT_minline_stringops_dynamically,
3974 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3975
3976 IX86_ATTR_NO ("align-stringops",
3977 OPT_mno_align_stringops,
3978 MASK_NO_ALIGN_STRINGOPS),
3979
3980 IX86_ATTR_YES ("recip",
3981 OPT_mrecip,
3982 MASK_RECIP),
3983
3984 };
3985
3986 /* If this is a list, recurse to get the options. */
3987 if (TREE_CODE (args) == TREE_LIST)
3988 {
3989 bool ret = true;
3990
3991 for (; args; args = TREE_CHAIN (args))
3992 if (TREE_VALUE (args)
3993 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
3994 p_strings, enum_opts_set))
3995 ret = false;
3996
3997 return ret;
3998 }
3999
4000 else if (TREE_CODE (args) != STRING_CST)
4001 gcc_unreachable ();
4002
4003 /* Handle multiple arguments separated by commas. */
4004 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4005
4006 while (next_optstr && *next_optstr != '\0')
4007 {
4008 char *p = next_optstr;
4009 char *orig_p = p;
4010 char *comma = strchr (next_optstr, ',');
4011 const char *opt_string;
4012 size_t len, opt_len;
4013 int opt;
4014 bool opt_set_p;
4015 char ch;
4016 unsigned i;
4017 enum ix86_opt_type type = ix86_opt_unknown;
4018 int mask = 0;
4019
4020 if (comma)
4021 {
4022 *comma = '\0';
4023 len = comma - next_optstr;
4024 next_optstr = comma + 1;
4025 }
4026 else
4027 {
4028 len = strlen (p);
4029 next_optstr = NULL;
4030 }
4031
4032 /* Recognize no-xxx. */
4033 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4034 {
4035 opt_set_p = false;
4036 p += 3;
4037 len -= 3;
4038 }
4039 else
4040 opt_set_p = true;
4041
4042 /* Find the option. */
4043 ch = *p;
4044 opt = N_OPTS;
4045 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4046 {
4047 type = attrs[i].type;
4048 opt_len = attrs[i].len;
4049 if (ch == attrs[i].string[0]
4050 && ((type != ix86_opt_str && type != ix86_opt_enum)
4051 ? len == opt_len
4052 : len > opt_len)
4053 && memcmp (p, attrs[i].string, opt_len) == 0)
4054 {
4055 opt = attrs[i].opt;
4056 mask = attrs[i].mask;
4057 opt_string = attrs[i].string;
4058 break;
4059 }
4060 }
4061
4062 /* Process the option. */
4063 if (opt == N_OPTS)
4064 {
4065 error ("attribute(target(\"%s\")) is unknown", orig_p);
4066 ret = false;
4067 }
4068
4069 else if (type == ix86_opt_isa)
4070 {
4071 struct cl_decoded_option decoded;
4072
4073 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4074 ix86_handle_option (&global_options, &global_options_set,
4075 &decoded, input_location);
4076 }
4077
4078 else if (type == ix86_opt_yes || type == ix86_opt_no)
4079 {
4080 if (type == ix86_opt_no)
4081 opt_set_p = !opt_set_p;
4082
4083 if (opt_set_p)
4084 target_flags |= mask;
4085 else
4086 target_flags &= ~mask;
4087 }
4088
4089 else if (type == ix86_opt_str)
4090 {
4091 if (p_strings[opt])
4092 {
4093 error ("option(\"%s\") was already specified", opt_string);
4094 ret = false;
4095 }
4096 else
4097 p_strings[opt] = xstrdup (p + opt_len);
4098 }
4099
4100 else if (type == ix86_opt_enum)
4101 {
4102 bool arg_ok;
4103 int value;
4104
4105 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4106 if (arg_ok)
4107 set_option (&global_options, enum_opts_set, opt, value,
4108 p + opt_len, DK_UNSPECIFIED, input_location,
4109 global_dc);
4110 else
4111 {
4112 error ("attribute(target(\"%s\")) is unknown", orig_p);
4113 ret = false;
4114 }
4115 }
4116
4117 else
4118 gcc_unreachable ();
4119 }
4120
4121 return ret;
4122 }
4123
4124 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4125
4126 tree
4127 ix86_valid_target_attribute_tree (tree args)
4128 {
4129 const char *orig_arch_string = ix86_arch_string;
4130 const char *orig_tune_string = ix86_tune_string;
4131 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4132 int orig_tune_defaulted = ix86_tune_defaulted;
4133 int orig_arch_specified = ix86_arch_specified;
4134 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4135 tree t = NULL_TREE;
4136 int i;
4137 struct cl_target_option *def
4138 = TREE_TARGET_OPTION (target_option_default_node);
4139 struct gcc_options enum_opts_set;
4140
4141 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4142
4143 /* Process each of the options on the chain. */
4144 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4145 &enum_opts_set))
4146 return NULL_TREE;
4147
4148 /* If the changed options are different from the default, rerun
4149 ix86_option_override_internal, and then save the options away.
4150 The string options are are attribute options, and will be undone
4151 when we copy the save structure. */
4152 if (ix86_isa_flags != def->x_ix86_isa_flags
4153 || target_flags != def->x_target_flags
4154 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4155 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4156 || enum_opts_set.x_ix86_fpmath)
4157 {
4158 /* If we are using the default tune= or arch=, undo the string assigned,
4159 and use the default. */
4160 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4161 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4162 else if (!orig_arch_specified)
4163 ix86_arch_string = NULL;
4164
4165 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4166 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4167 else if (orig_tune_defaulted)
4168 ix86_tune_string = NULL;
4169
4170 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4171 if (enum_opts_set.x_ix86_fpmath)
4172 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4173 else if (!TARGET_64BIT && TARGET_SSE)
4174 {
4175 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4176 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4177 }
4178
4179 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4180 ix86_option_override_internal (false);
4181
4182 /* Add any builtin functions with the new isa if any. */
4183 ix86_add_new_builtins (ix86_isa_flags);
4184
4185 /* Save the current options unless we are validating options for
4186 #pragma. */
4187 t = build_target_option_node ();
4188
4189 ix86_arch_string = orig_arch_string;
4190 ix86_tune_string = orig_tune_string;
4191 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4192
4193 /* Free up memory allocated to hold the strings */
4194 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4195 free (option_strings[i]);
4196 }
4197
4198 return t;
4199 }
4200
4201 /* Hook to validate attribute((target("string"))). */
4202
4203 static bool
4204 ix86_valid_target_attribute_p (tree fndecl,
4205 tree ARG_UNUSED (name),
4206 tree args,
4207 int ARG_UNUSED (flags))
4208 {
4209 struct cl_target_option cur_target;
4210 bool ret = true;
4211 tree old_optimize = build_optimization_node ();
4212 tree new_target, new_optimize;
4213 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4214
4215 /* If the function changed the optimization levels as well as setting target
4216 options, start with the optimizations specified. */
4217 if (func_optimize && func_optimize != old_optimize)
4218 cl_optimization_restore (&global_options,
4219 TREE_OPTIMIZATION (func_optimize));
4220
4221 /* The target attributes may also change some optimization flags, so update
4222 the optimization options if necessary. */
4223 cl_target_option_save (&cur_target, &global_options);
4224 new_target = ix86_valid_target_attribute_tree (args);
4225 new_optimize = build_optimization_node ();
4226
4227 if (!new_target)
4228 ret = false;
4229
4230 else if (fndecl)
4231 {
4232 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4233
4234 if (old_optimize != new_optimize)
4235 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4236 }
4237
4238 cl_target_option_restore (&global_options, &cur_target);
4239
4240 if (old_optimize != new_optimize)
4241 cl_optimization_restore (&global_options,
4242 TREE_OPTIMIZATION (old_optimize));
4243
4244 return ret;
4245 }
4246
4247 \f
4248 /* Hook to determine if one function can safely inline another. */
4249
4250 static bool
4251 ix86_can_inline_p (tree caller, tree callee)
4252 {
4253 bool ret = false;
4254 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4255 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4256
4257 /* If callee has no option attributes, then it is ok to inline. */
4258 if (!callee_tree)
4259 ret = true;
4260
4261 /* If caller has no option attributes, but callee does then it is not ok to
4262 inline. */
4263 else if (!caller_tree)
4264 ret = false;
4265
4266 else
4267 {
4268 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4269 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4270
4271 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4272 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4273 function. */
4274 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4275 != callee_opts->x_ix86_isa_flags)
4276 ret = false;
4277
4278 /* See if we have the same non-isa options. */
4279 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4280 ret = false;
4281
4282 /* See if arch, tune, etc. are the same. */
4283 else if (caller_opts->arch != callee_opts->arch)
4284 ret = false;
4285
4286 else if (caller_opts->tune != callee_opts->tune)
4287 ret = false;
4288
4289 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4290 ret = false;
4291
4292 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4293 ret = false;
4294
4295 else
4296 ret = true;
4297 }
4298
4299 return ret;
4300 }
4301
4302 \f
4303 /* Remember the last target of ix86_set_current_function. */
4304 static GTY(()) tree ix86_previous_fndecl;
4305
4306 /* Establish appropriate back-end context for processing the function
4307 FNDECL. The argument might be NULL to indicate processing at top
4308 level, outside of any function scope. */
4309 static void
4310 ix86_set_current_function (tree fndecl)
4311 {
4312 /* Only change the context if the function changes. This hook is called
4313 several times in the course of compiling a function, and we don't want to
4314 slow things down too much or call target_reinit when it isn't safe. */
4315 if (fndecl && fndecl != ix86_previous_fndecl)
4316 {
4317 tree old_tree = (ix86_previous_fndecl
4318 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4319 : NULL_TREE);
4320
4321 tree new_tree = (fndecl
4322 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4323 : NULL_TREE);
4324
4325 ix86_previous_fndecl = fndecl;
4326 if (old_tree == new_tree)
4327 ;
4328
4329 else if (new_tree)
4330 {
4331 cl_target_option_restore (&global_options,
4332 TREE_TARGET_OPTION (new_tree));
4333 target_reinit ();
4334 }
4335
4336 else if (old_tree)
4337 {
4338 struct cl_target_option *def
4339 = TREE_TARGET_OPTION (target_option_current_node);
4340
4341 cl_target_option_restore (&global_options, def);
4342 target_reinit ();
4343 }
4344 }
4345 }
4346
4347 \f
4348 /* Return true if this goes in large data/bss. */
4349
4350 static bool
4351 ix86_in_large_data_p (tree exp)
4352 {
4353 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4354 return false;
4355
4356 /* Functions are never large data. */
4357 if (TREE_CODE (exp) == FUNCTION_DECL)
4358 return false;
4359
4360 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4361 {
4362 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4363 if (strcmp (section, ".ldata") == 0
4364 || strcmp (section, ".lbss") == 0)
4365 return true;
4366 return false;
4367 }
4368 else
4369 {
4370 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4371
4372 /* If this is an incomplete type with size 0, then we can't put it
4373 in data because it might be too big when completed. */
4374 if (!size || size > ix86_section_threshold)
4375 return true;
4376 }
4377
4378 return false;
4379 }
4380
4381 /* Switch to the appropriate section for output of DECL.
4382 DECL is either a `VAR_DECL' node or a constant of some sort.
4383 RELOC indicates whether forming the initial value of DECL requires
4384 link-time relocations. */
4385
4386 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4387 ATTRIBUTE_UNUSED;
4388
4389 static section *
4390 x86_64_elf_select_section (tree decl, int reloc,
4391 unsigned HOST_WIDE_INT align)
4392 {
4393 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4394 && ix86_in_large_data_p (decl))
4395 {
4396 const char *sname = NULL;
4397 unsigned int flags = SECTION_WRITE;
4398 switch (categorize_decl_for_section (decl, reloc))
4399 {
4400 case SECCAT_DATA:
4401 sname = ".ldata";
4402 break;
4403 case SECCAT_DATA_REL:
4404 sname = ".ldata.rel";
4405 break;
4406 case SECCAT_DATA_REL_LOCAL:
4407 sname = ".ldata.rel.local";
4408 break;
4409 case SECCAT_DATA_REL_RO:
4410 sname = ".ldata.rel.ro";
4411 break;
4412 case SECCAT_DATA_REL_RO_LOCAL:
4413 sname = ".ldata.rel.ro.local";
4414 break;
4415 case SECCAT_BSS:
4416 sname = ".lbss";
4417 flags |= SECTION_BSS;
4418 break;
4419 case SECCAT_RODATA:
4420 case SECCAT_RODATA_MERGE_STR:
4421 case SECCAT_RODATA_MERGE_STR_INIT:
4422 case SECCAT_RODATA_MERGE_CONST:
4423 sname = ".lrodata";
4424 flags = 0;
4425 break;
4426 case SECCAT_SRODATA:
4427 case SECCAT_SDATA:
4428 case SECCAT_SBSS:
4429 gcc_unreachable ();
4430 case SECCAT_TEXT:
4431 case SECCAT_TDATA:
4432 case SECCAT_TBSS:
4433 /* We don't split these for medium model. Place them into
4434 default sections and hope for best. */
4435 break;
4436 }
4437 if (sname)
4438 {
4439 /* We might get called with string constants, but get_named_section
4440 doesn't like them as they are not DECLs. Also, we need to set
4441 flags in that case. */
4442 if (!DECL_P (decl))
4443 return get_section (sname, flags, NULL);
4444 return get_named_section (decl, sname, reloc);
4445 }
4446 }
4447 return default_elf_select_section (decl, reloc, align);
4448 }
4449
4450 /* Build up a unique section name, expressed as a
4451 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4452 RELOC indicates whether the initial value of EXP requires
4453 link-time relocations. */
4454
4455 static void ATTRIBUTE_UNUSED
4456 x86_64_elf_unique_section (tree decl, int reloc)
4457 {
4458 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4459 && ix86_in_large_data_p (decl))
4460 {
4461 const char *prefix = NULL;
4462 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4463 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4464
4465 switch (categorize_decl_for_section (decl, reloc))
4466 {
4467 case SECCAT_DATA:
4468 case SECCAT_DATA_REL:
4469 case SECCAT_DATA_REL_LOCAL:
4470 case SECCAT_DATA_REL_RO:
4471 case SECCAT_DATA_REL_RO_LOCAL:
4472 prefix = one_only ? ".ld" : ".ldata";
4473 break;
4474 case SECCAT_BSS:
4475 prefix = one_only ? ".lb" : ".lbss";
4476 break;
4477 case SECCAT_RODATA:
4478 case SECCAT_RODATA_MERGE_STR:
4479 case SECCAT_RODATA_MERGE_STR_INIT:
4480 case SECCAT_RODATA_MERGE_CONST:
4481 prefix = one_only ? ".lr" : ".lrodata";
4482 break;
4483 case SECCAT_SRODATA:
4484 case SECCAT_SDATA:
4485 case SECCAT_SBSS:
4486 gcc_unreachable ();
4487 case SECCAT_TEXT:
4488 case SECCAT_TDATA:
4489 case SECCAT_TBSS:
4490 /* We don't split these for medium model. Place them into
4491 default sections and hope for best. */
4492 break;
4493 }
4494 if (prefix)
4495 {
4496 const char *name, *linkonce;
4497 char *string;
4498
4499 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4500 name = targetm.strip_name_encoding (name);
4501
4502 /* If we're using one_only, then there needs to be a .gnu.linkonce
4503 prefix to the section name. */
4504 linkonce = one_only ? ".gnu.linkonce" : "";
4505
4506 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4507
4508 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4509 return;
4510 }
4511 }
4512 default_unique_section (decl, reloc);
4513 }
4514
4515 #ifdef COMMON_ASM_OP
4516 /* This says how to output assembler code to declare an
4517 uninitialized external linkage data object.
4518
4519 For medium model x86-64 we need to use .largecomm opcode for
4520 large objects. */
4521 void
4522 x86_elf_aligned_common (FILE *file,
4523 const char *name, unsigned HOST_WIDE_INT size,
4524 int align)
4525 {
4526 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4527 && size > (unsigned int)ix86_section_threshold)
4528 fputs (".largecomm\t", file);
4529 else
4530 fputs (COMMON_ASM_OP, file);
4531 assemble_name (file, name);
4532 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4533 size, align / BITS_PER_UNIT);
4534 }
4535 #endif
4536
4537 /* Utility function for targets to use in implementing
4538 ASM_OUTPUT_ALIGNED_BSS. */
4539
4540 void
4541 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4542 const char *name, unsigned HOST_WIDE_INT size,
4543 int align)
4544 {
4545 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4546 && size > (unsigned int)ix86_section_threshold)
4547 switch_to_section (get_named_section (decl, ".lbss", 0));
4548 else
4549 switch_to_section (bss_section);
4550 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4551 #ifdef ASM_DECLARE_OBJECT_NAME
4552 last_assemble_variable_decl = decl;
4553 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4554 #else
4555 /* Standard thing is just output label for the object. */
4556 ASM_OUTPUT_LABEL (file, name);
4557 #endif /* ASM_DECLARE_OBJECT_NAME */
4558 ASM_OUTPUT_SKIP (file, size ? size : 1);
4559 }
4560 \f
4561 /* Decide whether we must probe the stack before any space allocation
4562 on this target. It's essentially TARGET_STACK_PROBE except when
4563 -fstack-check causes the stack to be already probed differently. */
4564
4565 bool
4566 ix86_target_stack_probe (void)
4567 {
4568 /* Do not probe the stack twice if static stack checking is enabled. */
4569 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4570 return false;
4571
4572 return TARGET_STACK_PROBE;
4573 }
4574 \f
4575 /* Decide whether we can make a sibling call to a function. DECL is the
4576 declaration of the function being targeted by the call and EXP is the
4577 CALL_EXPR representing the call. */
4578
4579 static bool
4580 ix86_function_ok_for_sibcall (tree decl, tree exp)
4581 {
4582 tree type, decl_or_type;
4583 rtx a, b;
4584
4585 /* If we are generating position-independent code, we cannot sibcall
4586 optimize any indirect call, or a direct call to a global function,
4587 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4588 if (!TARGET_MACHO
4589 && !TARGET_64BIT
4590 && flag_pic
4591 && (!decl || !targetm.binds_local_p (decl)))
4592 return false;
4593
4594 /* If we need to align the outgoing stack, then sibcalling would
4595 unalign the stack, which may break the called function. */
4596 if (ix86_minimum_incoming_stack_boundary (true)
4597 < PREFERRED_STACK_BOUNDARY)
4598 return false;
4599
4600 if (decl)
4601 {
4602 decl_or_type = decl;
4603 type = TREE_TYPE (decl);
4604 }
4605 else
4606 {
4607 /* We're looking at the CALL_EXPR, we need the type of the function. */
4608 type = CALL_EXPR_FN (exp); /* pointer expression */
4609 type = TREE_TYPE (type); /* pointer type */
4610 type = TREE_TYPE (type); /* function type */
4611 decl_or_type = type;
4612 }
4613
4614 /* Check that the return value locations are the same. Like
4615 if we are returning floats on the 80387 register stack, we cannot
4616 make a sibcall from a function that doesn't return a float to a
4617 function that does or, conversely, from a function that does return
4618 a float to a function that doesn't; the necessary stack adjustment
4619 would not be executed. This is also the place we notice
4620 differences in the return value ABI. Note that it is ok for one
4621 of the functions to have void return type as long as the return
4622 value of the other is passed in a register. */
4623 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4624 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4625 cfun->decl, false);
4626 if (STACK_REG_P (a) || STACK_REG_P (b))
4627 {
4628 if (!rtx_equal_p (a, b))
4629 return false;
4630 }
4631 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4632 {
4633 /* Disable sibcall if we need to generate vzeroupper after
4634 callee returns. */
4635 if (TARGET_VZEROUPPER
4636 && cfun->machine->callee_return_avx256_p
4637 && !cfun->machine->caller_return_avx256_p)
4638 return false;
4639 }
4640 else if (!rtx_equal_p (a, b))
4641 return false;
4642
4643 if (TARGET_64BIT)
4644 {
4645 /* The SYSV ABI has more call-clobbered registers;
4646 disallow sibcalls from MS to SYSV. */
4647 if (cfun->machine->call_abi == MS_ABI
4648 && ix86_function_type_abi (type) == SYSV_ABI)
4649 return false;
4650 }
4651 else
4652 {
4653 /* If this call is indirect, we'll need to be able to use a
4654 call-clobbered register for the address of the target function.
4655 Make sure that all such registers are not used for passing
4656 parameters. Note that DLLIMPORT functions are indirect. */
4657 if (!decl
4658 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4659 {
4660 if (ix86_function_regparm (type, NULL) >= 3)
4661 {
4662 /* ??? Need to count the actual number of registers to be used,
4663 not the possible number of registers. Fix later. */
4664 return false;
4665 }
4666 }
4667 }
4668
4669 /* Otherwise okay. That also includes certain types of indirect calls. */
4670 return true;
4671 }
4672
4673 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4674 and "sseregparm" calling convention attributes;
4675 arguments as in struct attribute_spec.handler. */
4676
4677 static tree
4678 ix86_handle_cconv_attribute (tree *node, tree name,
4679 tree args,
4680 int flags ATTRIBUTE_UNUSED,
4681 bool *no_add_attrs)
4682 {
4683 if (TREE_CODE (*node) != FUNCTION_TYPE
4684 && TREE_CODE (*node) != METHOD_TYPE
4685 && TREE_CODE (*node) != FIELD_DECL
4686 && TREE_CODE (*node) != TYPE_DECL)
4687 {
4688 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4689 name);
4690 *no_add_attrs = true;
4691 return NULL_TREE;
4692 }
4693
4694 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4695 if (is_attribute_p ("regparm", name))
4696 {
4697 tree cst;
4698
4699 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4700 {
4701 error ("fastcall and regparm attributes are not compatible");
4702 }
4703
4704 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4705 {
4706 error ("regparam and thiscall attributes are not compatible");
4707 }
4708
4709 cst = TREE_VALUE (args);
4710 if (TREE_CODE (cst) != INTEGER_CST)
4711 {
4712 warning (OPT_Wattributes,
4713 "%qE attribute requires an integer constant argument",
4714 name);
4715 *no_add_attrs = true;
4716 }
4717 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4718 {
4719 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4720 name, REGPARM_MAX);
4721 *no_add_attrs = true;
4722 }
4723
4724 return NULL_TREE;
4725 }
4726
4727 if (TARGET_64BIT)
4728 {
4729 /* Do not warn when emulating the MS ABI. */
4730 if ((TREE_CODE (*node) != FUNCTION_TYPE
4731 && TREE_CODE (*node) != METHOD_TYPE)
4732 || ix86_function_type_abi (*node) != MS_ABI)
4733 warning (OPT_Wattributes, "%qE attribute ignored",
4734 name);
4735 *no_add_attrs = true;
4736 return NULL_TREE;
4737 }
4738
4739 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4740 if (is_attribute_p ("fastcall", name))
4741 {
4742 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4743 {
4744 error ("fastcall and cdecl attributes are not compatible");
4745 }
4746 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4747 {
4748 error ("fastcall and stdcall attributes are not compatible");
4749 }
4750 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4751 {
4752 error ("fastcall and regparm attributes are not compatible");
4753 }
4754 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4755 {
4756 error ("fastcall and thiscall attributes are not compatible");
4757 }
4758 }
4759
4760 /* Can combine stdcall with fastcall (redundant), regparm and
4761 sseregparm. */
4762 else if (is_attribute_p ("stdcall", name))
4763 {
4764 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4765 {
4766 error ("stdcall and cdecl attributes are not compatible");
4767 }
4768 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4769 {
4770 error ("stdcall and fastcall attributes are not compatible");
4771 }
4772 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4773 {
4774 error ("stdcall and thiscall attributes are not compatible");
4775 }
4776 }
4777
4778 /* Can combine cdecl with regparm and sseregparm. */
4779 else if (is_attribute_p ("cdecl", name))
4780 {
4781 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4782 {
4783 error ("stdcall and cdecl attributes are not compatible");
4784 }
4785 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4786 {
4787 error ("fastcall and cdecl attributes are not compatible");
4788 }
4789 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4790 {
4791 error ("cdecl and thiscall attributes are not compatible");
4792 }
4793 }
4794 else if (is_attribute_p ("thiscall", name))
4795 {
4796 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4797 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4798 name);
4799 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4800 {
4801 error ("stdcall and thiscall attributes are not compatible");
4802 }
4803 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4804 {
4805 error ("fastcall and thiscall attributes are not compatible");
4806 }
4807 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4808 {
4809 error ("cdecl and thiscall attributes are not compatible");
4810 }
4811 }
4812
4813 /* Can combine sseregparm with all attributes. */
4814
4815 return NULL_TREE;
4816 }
4817
4818 /* This function determines from TYPE the calling-convention. */
4819
4820 unsigned int
4821 ix86_get_callcvt (const_tree type)
4822 {
4823 unsigned int ret = 0;
4824 bool is_stdarg;
4825 tree attrs;
4826
4827 if (TARGET_64BIT)
4828 return IX86_CALLCVT_CDECL;
4829
4830 attrs = TYPE_ATTRIBUTES (type);
4831 if (attrs != NULL_TREE)
4832 {
4833 if (lookup_attribute ("cdecl", attrs))
4834 ret |= IX86_CALLCVT_CDECL;
4835 else if (lookup_attribute ("stdcall", attrs))
4836 ret |= IX86_CALLCVT_STDCALL;
4837 else if (lookup_attribute ("fastcall", attrs))
4838 ret |= IX86_CALLCVT_FASTCALL;
4839 else if (lookup_attribute ("thiscall", attrs))
4840 ret |= IX86_CALLCVT_THISCALL;
4841
4842 /* Regparam isn't allowed for thiscall and fastcall. */
4843 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4844 {
4845 if (lookup_attribute ("regparm", attrs))
4846 ret |= IX86_CALLCVT_REGPARM;
4847 if (lookup_attribute ("sseregparm", attrs))
4848 ret |= IX86_CALLCVT_SSEREGPARM;
4849 }
4850
4851 if (IX86_BASE_CALLCVT(ret) != 0)
4852 return ret;
4853 }
4854
4855 is_stdarg = stdarg_p (type);
4856 if (TARGET_RTD && !is_stdarg)
4857 return IX86_CALLCVT_STDCALL | ret;
4858
4859 if (ret != 0
4860 || is_stdarg
4861 || TREE_CODE (type) != METHOD_TYPE
4862 || ix86_function_type_abi (type) != MS_ABI)
4863 return IX86_CALLCVT_CDECL | ret;
4864
4865 return IX86_CALLCVT_THISCALL;
4866 }
4867
4868 /* Return 0 if the attributes for two types are incompatible, 1 if they
4869 are compatible, and 2 if they are nearly compatible (which causes a
4870 warning to be generated). */
4871
4872 static int
4873 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4874 {
4875 unsigned int ccvt1, ccvt2;
4876
4877 if (TREE_CODE (type1) != FUNCTION_TYPE
4878 && TREE_CODE (type1) != METHOD_TYPE)
4879 return 1;
4880
4881 ccvt1 = ix86_get_callcvt (type1);
4882 ccvt2 = ix86_get_callcvt (type2);
4883 if (ccvt1 != ccvt2)
4884 return 0;
4885 if (ix86_function_regparm (type1, NULL)
4886 != ix86_function_regparm (type2, NULL))
4887 return 0;
4888
4889 return 1;
4890 }
4891 \f
4892 /* Return the regparm value for a function with the indicated TYPE and DECL.
4893 DECL may be NULL when calling function indirectly
4894 or considering a libcall. */
4895
4896 static int
4897 ix86_function_regparm (const_tree type, const_tree decl)
4898 {
4899 tree attr;
4900 int regparm;
4901 unsigned int ccvt;
4902
4903 if (TARGET_64BIT)
4904 return (ix86_function_type_abi (type) == SYSV_ABI
4905 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4906 ccvt = ix86_get_callcvt (type);
4907 regparm = ix86_regparm;
4908
4909 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
4910 {
4911 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4912 if (attr)
4913 {
4914 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4915 return regparm;
4916 }
4917 }
4918 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
4919 return 2;
4920 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
4921 return 1;
4922
4923 /* Use register calling convention for local functions when possible. */
4924 if (decl
4925 && TREE_CODE (decl) == FUNCTION_DECL
4926 && optimize
4927 && !(profile_flag && !flag_fentry))
4928 {
4929 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4930 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4931 if (i && i->local && i->can_change_signature)
4932 {
4933 int local_regparm, globals = 0, regno;
4934
4935 /* Make sure no regparm register is taken by a
4936 fixed register variable. */
4937 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4938 if (fixed_regs[local_regparm])
4939 break;
4940
4941 /* We don't want to use regparm(3) for nested functions as
4942 these use a static chain pointer in the third argument. */
4943 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4944 local_regparm = 2;
4945
4946 /* In 32-bit mode save a register for the split stack. */
4947 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4948 local_regparm = 2;
4949
4950 /* Each fixed register usage increases register pressure,
4951 so less registers should be used for argument passing.
4952 This functionality can be overriden by an explicit
4953 regparm value. */
4954 for (regno = 0; regno <= DI_REG; regno++)
4955 if (fixed_regs[regno])
4956 globals++;
4957
4958 local_regparm
4959 = globals < local_regparm ? local_regparm - globals : 0;
4960
4961 if (local_regparm > regparm)
4962 regparm = local_regparm;
4963 }
4964 }
4965
4966 return regparm;
4967 }
4968
4969 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4970 DFmode (2) arguments in SSE registers for a function with the
4971 indicated TYPE and DECL. DECL may be NULL when calling function
4972 indirectly or considering a libcall. Otherwise return 0. */
4973
4974 static int
4975 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4976 {
4977 gcc_assert (!TARGET_64BIT);
4978
4979 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4980 by the sseregparm attribute. */
4981 if (TARGET_SSEREGPARM
4982 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4983 {
4984 if (!TARGET_SSE)
4985 {
4986 if (warn)
4987 {
4988 if (decl)
4989 error ("calling %qD with attribute sseregparm without "
4990 "SSE/SSE2 enabled", decl);
4991 else
4992 error ("calling %qT with attribute sseregparm without "
4993 "SSE/SSE2 enabled", type);
4994 }
4995 return 0;
4996 }
4997
4998 return 2;
4999 }
5000
5001 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5002 (and DFmode for SSE2) arguments in SSE registers. */
5003 if (decl && TARGET_SSE_MATH && optimize
5004 && !(profile_flag && !flag_fentry))
5005 {
5006 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5007 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5008 if (i && i->local && i->can_change_signature)
5009 return TARGET_SSE2 ? 2 : 1;
5010 }
5011
5012 return 0;
5013 }
5014
5015 /* Return true if EAX is live at the start of the function. Used by
5016 ix86_expand_prologue to determine if we need special help before
5017 calling allocate_stack_worker. */
5018
5019 static bool
5020 ix86_eax_live_at_start_p (void)
5021 {
5022 /* Cheat. Don't bother working forward from ix86_function_regparm
5023 to the function type to whether an actual argument is located in
5024 eax. Instead just look at cfg info, which is still close enough
5025 to correct at this point. This gives false positives for broken
5026 functions that might use uninitialized data that happens to be
5027 allocated in eax, but who cares? */
5028 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5029 }
5030
5031 static bool
5032 ix86_keep_aggregate_return_pointer (tree fntype)
5033 {
5034 tree attr;
5035
5036 if (!TARGET_64BIT)
5037 {
5038 attr = lookup_attribute ("callee_pop_aggregate_return",
5039 TYPE_ATTRIBUTES (fntype));
5040 if (attr)
5041 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5042
5043 /* For 32-bit MS-ABI the default is to keep aggregate
5044 return pointer. */
5045 if (ix86_function_type_abi (fntype) == MS_ABI)
5046 return true;
5047 }
5048 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5049 }
5050
5051 /* Value is the number of bytes of arguments automatically
5052 popped when returning from a subroutine call.
5053 FUNDECL is the declaration node of the function (as a tree),
5054 FUNTYPE is the data type of the function (as a tree),
5055 or for a library call it is an identifier node for the subroutine name.
5056 SIZE is the number of bytes of arguments passed on the stack.
5057
5058 On the 80386, the RTD insn may be used to pop them if the number
5059 of args is fixed, but if the number is variable then the caller
5060 must pop them all. RTD can't be used for library calls now
5061 because the library is compiled with the Unix compiler.
5062 Use of RTD is a selectable option, since it is incompatible with
5063 standard Unix calling sequences. If the option is not selected,
5064 the caller must always pop the args.
5065
5066 The attribute stdcall is equivalent to RTD on a per module basis. */
5067
5068 static int
5069 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5070 {
5071 unsigned int ccvt;
5072
5073 /* None of the 64-bit ABIs pop arguments. */
5074 if (TARGET_64BIT)
5075 return 0;
5076
5077 ccvt = ix86_get_callcvt (funtype);
5078
5079 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5080 | IX86_CALLCVT_THISCALL)) != 0
5081 && ! stdarg_p (funtype))
5082 return size;
5083
5084 /* Lose any fake structure return argument if it is passed on the stack. */
5085 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5086 && !ix86_keep_aggregate_return_pointer (funtype))
5087 {
5088 int nregs = ix86_function_regparm (funtype, fundecl);
5089 if (nregs == 0)
5090 return GET_MODE_SIZE (Pmode);
5091 }
5092
5093 return 0;
5094 }
5095 \f
5096 /* Argument support functions. */
5097
5098 /* Return true when register may be used to pass function parameters. */
5099 bool
5100 ix86_function_arg_regno_p (int regno)
5101 {
5102 int i;
5103 const int *parm_regs;
5104
5105 if (!TARGET_64BIT)
5106 {
5107 if (TARGET_MACHO)
5108 return (regno < REGPARM_MAX
5109 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5110 else
5111 return (regno < REGPARM_MAX
5112 || (TARGET_MMX && MMX_REGNO_P (regno)
5113 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5114 || (TARGET_SSE && SSE_REGNO_P (regno)
5115 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5116 }
5117
5118 if (TARGET_MACHO)
5119 {
5120 if (SSE_REGNO_P (regno) && TARGET_SSE)
5121 return true;
5122 }
5123 else
5124 {
5125 if (TARGET_SSE && SSE_REGNO_P (regno)
5126 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5127 return true;
5128 }
5129
5130 /* TODO: The function should depend on current function ABI but
5131 builtins.c would need updating then. Therefore we use the
5132 default ABI. */
5133
5134 /* RAX is used as hidden argument to va_arg functions. */
5135 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5136 return true;
5137
5138 if (ix86_abi == MS_ABI)
5139 parm_regs = x86_64_ms_abi_int_parameter_registers;
5140 else
5141 parm_regs = x86_64_int_parameter_registers;
5142 for (i = 0; i < (ix86_abi == MS_ABI
5143 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5144 if (regno == parm_regs[i])
5145 return true;
5146 return false;
5147 }
5148
5149 /* Return if we do not know how to pass TYPE solely in registers. */
5150
5151 static bool
5152 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5153 {
5154 if (must_pass_in_stack_var_size_or_pad (mode, type))
5155 return true;
5156
5157 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5158 The layout_type routine is crafty and tries to trick us into passing
5159 currently unsupported vector types on the stack by using TImode. */
5160 return (!TARGET_64BIT && mode == TImode
5161 && type && TREE_CODE (type) != VECTOR_TYPE);
5162 }
5163
5164 /* It returns the size, in bytes, of the area reserved for arguments passed
5165 in registers for the function represented by fndecl dependent to the used
5166 abi format. */
5167 int
5168 ix86_reg_parm_stack_space (const_tree fndecl)
5169 {
5170 enum calling_abi call_abi = SYSV_ABI;
5171 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5172 call_abi = ix86_function_abi (fndecl);
5173 else
5174 call_abi = ix86_function_type_abi (fndecl);
5175 if (TARGET_64BIT && call_abi == MS_ABI)
5176 return 32;
5177 return 0;
5178 }
5179
5180 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5181 call abi used. */
5182 enum calling_abi
5183 ix86_function_type_abi (const_tree fntype)
5184 {
5185 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5186 {
5187 enum calling_abi abi = ix86_abi;
5188 if (abi == SYSV_ABI)
5189 {
5190 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5191 abi = MS_ABI;
5192 }
5193 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5194 abi = SYSV_ABI;
5195 return abi;
5196 }
5197 return ix86_abi;
5198 }
5199
5200 static bool
5201 ix86_function_ms_hook_prologue (const_tree fn)
5202 {
5203 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5204 {
5205 if (decl_function_context (fn) != NULL_TREE)
5206 error_at (DECL_SOURCE_LOCATION (fn),
5207 "ms_hook_prologue is not compatible with nested function");
5208 else
5209 return true;
5210 }
5211 return false;
5212 }
5213
5214 static enum calling_abi
5215 ix86_function_abi (const_tree fndecl)
5216 {
5217 if (! fndecl)
5218 return ix86_abi;
5219 return ix86_function_type_abi (TREE_TYPE (fndecl));
5220 }
5221
5222 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5223 call abi used. */
5224 enum calling_abi
5225 ix86_cfun_abi (void)
5226 {
5227 if (! cfun)
5228 return ix86_abi;
5229 return cfun->machine->call_abi;
5230 }
5231
5232 /* Write the extra assembler code needed to declare a function properly. */
5233
5234 void
5235 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5236 tree decl)
5237 {
5238 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5239
5240 if (is_ms_hook)
5241 {
5242 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5243 unsigned int filler_cc = 0xcccccccc;
5244
5245 for (i = 0; i < filler_count; i += 4)
5246 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5247 }
5248
5249 #ifdef SUBTARGET_ASM_UNWIND_INIT
5250 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5251 #endif
5252
5253 ASM_OUTPUT_LABEL (asm_out_file, fname);
5254
5255 /* Output magic byte marker, if hot-patch attribute is set. */
5256 if (is_ms_hook)
5257 {
5258 if (TARGET_64BIT)
5259 {
5260 /* leaq [%rsp + 0], %rsp */
5261 asm_fprintf (asm_out_file, ASM_BYTE
5262 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5263 }
5264 else
5265 {
5266 /* movl.s %edi, %edi
5267 push %ebp
5268 movl.s %esp, %ebp */
5269 asm_fprintf (asm_out_file, ASM_BYTE
5270 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5271 }
5272 }
5273 }
5274
5275 /* regclass.c */
5276 extern void init_regs (void);
5277
5278 /* Implementation of call abi switching target hook. Specific to FNDECL
5279 the specific call register sets are set. See also
5280 ix86_conditional_register_usage for more details. */
5281 void
5282 ix86_call_abi_override (const_tree fndecl)
5283 {
5284 if (fndecl == NULL_TREE)
5285 cfun->machine->call_abi = ix86_abi;
5286 else
5287 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5288 }
5289
5290 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5291 expensive re-initialization of init_regs each time we switch function context
5292 since this is needed only during RTL expansion. */
5293 static void
5294 ix86_maybe_switch_abi (void)
5295 {
5296 if (TARGET_64BIT &&
5297 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5298 reinit_regs ();
5299 }
5300
5301 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5302 for a call to a function whose data type is FNTYPE.
5303 For a library call, FNTYPE is 0. */
5304
5305 void
5306 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5307 tree fntype, /* tree ptr for function decl */
5308 rtx libname, /* SYMBOL_REF of library name or 0 */
5309 tree fndecl,
5310 int caller)
5311 {
5312 struct cgraph_local_info *i;
5313 tree fnret_type;
5314
5315 memset (cum, 0, sizeof (*cum));
5316
5317 /* Initialize for the current callee. */
5318 if (caller)
5319 {
5320 cfun->machine->callee_pass_avx256_p = false;
5321 cfun->machine->callee_return_avx256_p = false;
5322 }
5323
5324 if (fndecl)
5325 {
5326 i = cgraph_local_info (fndecl);
5327 cum->call_abi = ix86_function_abi (fndecl);
5328 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5329 }
5330 else
5331 {
5332 i = NULL;
5333 cum->call_abi = ix86_function_type_abi (fntype);
5334 if (fntype)
5335 fnret_type = TREE_TYPE (fntype);
5336 else
5337 fnret_type = NULL;
5338 }
5339
5340 if (TARGET_VZEROUPPER && fnret_type)
5341 {
5342 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5343 false);
5344 if (function_pass_avx256_p (fnret_value))
5345 {
5346 /* The return value of this function uses 256bit AVX modes. */
5347 if (caller)
5348 cfun->machine->callee_return_avx256_p = true;
5349 else
5350 cfun->machine->caller_return_avx256_p = true;
5351 }
5352 }
5353
5354 cum->caller = caller;
5355
5356 /* Set up the number of registers to use for passing arguments. */
5357
5358 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5359 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5360 "or subtarget optimization implying it");
5361 cum->nregs = ix86_regparm;
5362 if (TARGET_64BIT)
5363 {
5364 cum->nregs = (cum->call_abi == SYSV_ABI
5365 ? X86_64_REGPARM_MAX
5366 : X86_64_MS_REGPARM_MAX);
5367 }
5368 if (TARGET_SSE)
5369 {
5370 cum->sse_nregs = SSE_REGPARM_MAX;
5371 if (TARGET_64BIT)
5372 {
5373 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5374 ? X86_64_SSE_REGPARM_MAX
5375 : X86_64_MS_SSE_REGPARM_MAX);
5376 }
5377 }
5378 if (TARGET_MMX)
5379 cum->mmx_nregs = MMX_REGPARM_MAX;
5380 cum->warn_avx = true;
5381 cum->warn_sse = true;
5382 cum->warn_mmx = true;
5383
5384 /* Because type might mismatch in between caller and callee, we need to
5385 use actual type of function for local calls.
5386 FIXME: cgraph_analyze can be told to actually record if function uses
5387 va_start so for local functions maybe_vaarg can be made aggressive
5388 helping K&R code.
5389 FIXME: once typesytem is fixed, we won't need this code anymore. */
5390 if (i && i->local && i->can_change_signature)
5391 fntype = TREE_TYPE (fndecl);
5392 cum->maybe_vaarg = (fntype
5393 ? (!prototype_p (fntype) || stdarg_p (fntype))
5394 : !libname);
5395
5396 if (!TARGET_64BIT)
5397 {
5398 /* If there are variable arguments, then we won't pass anything
5399 in registers in 32-bit mode. */
5400 if (stdarg_p (fntype))
5401 {
5402 cum->nregs = 0;
5403 cum->sse_nregs = 0;
5404 cum->mmx_nregs = 0;
5405 cum->warn_avx = 0;
5406 cum->warn_sse = 0;
5407 cum->warn_mmx = 0;
5408 return;
5409 }
5410
5411 /* Use ecx and edx registers if function has fastcall attribute,
5412 else look for regparm information. */
5413 if (fntype)
5414 {
5415 unsigned int ccvt = ix86_get_callcvt (fntype);
5416 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5417 {
5418 cum->nregs = 1;
5419 cum->fastcall = 1; /* Same first register as in fastcall. */
5420 }
5421 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5422 {
5423 cum->nregs = 2;
5424 cum->fastcall = 1;
5425 }
5426 else
5427 cum->nregs = ix86_function_regparm (fntype, fndecl);
5428 }
5429
5430 /* Set up the number of SSE registers used for passing SFmode
5431 and DFmode arguments. Warn for mismatching ABI. */
5432 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5433 }
5434 }
5435
5436 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5437 But in the case of vector types, it is some vector mode.
5438
5439 When we have only some of our vector isa extensions enabled, then there
5440 are some modes for which vector_mode_supported_p is false. For these
5441 modes, the generic vector support in gcc will choose some non-vector mode
5442 in order to implement the type. By computing the natural mode, we'll
5443 select the proper ABI location for the operand and not depend on whatever
5444 the middle-end decides to do with these vector types.
5445
5446 The midde-end can't deal with the vector types > 16 bytes. In this
5447 case, we return the original mode and warn ABI change if CUM isn't
5448 NULL. */
5449
5450 static enum machine_mode
5451 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5452 {
5453 enum machine_mode mode = TYPE_MODE (type);
5454
5455 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5456 {
5457 HOST_WIDE_INT size = int_size_in_bytes (type);
5458 if ((size == 8 || size == 16 || size == 32)
5459 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5460 && TYPE_VECTOR_SUBPARTS (type) > 1)
5461 {
5462 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5463
5464 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5465 mode = MIN_MODE_VECTOR_FLOAT;
5466 else
5467 mode = MIN_MODE_VECTOR_INT;
5468
5469 /* Get the mode which has this inner mode and number of units. */
5470 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5471 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5472 && GET_MODE_INNER (mode) == innermode)
5473 {
5474 if (size == 32 && !TARGET_AVX)
5475 {
5476 static bool warnedavx;
5477
5478 if (cum
5479 && !warnedavx
5480 && cum->warn_avx)
5481 {
5482 warnedavx = true;
5483 warning (0, "AVX vector argument without AVX "
5484 "enabled changes the ABI");
5485 }
5486 return TYPE_MODE (type);
5487 }
5488 else
5489 return mode;
5490 }
5491
5492 gcc_unreachable ();
5493 }
5494 }
5495
5496 return mode;
5497 }
5498
5499 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5500 this may not agree with the mode that the type system has chosen for the
5501 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5502 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5503
5504 static rtx
5505 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5506 unsigned int regno)
5507 {
5508 rtx tmp;
5509
5510 if (orig_mode != BLKmode)
5511 tmp = gen_rtx_REG (orig_mode, regno);
5512 else
5513 {
5514 tmp = gen_rtx_REG (mode, regno);
5515 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5516 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5517 }
5518
5519 return tmp;
5520 }
5521
5522 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5523 of this code is to classify each 8bytes of incoming argument by the register
5524 class and assign registers accordingly. */
5525
5526 /* Return the union class of CLASS1 and CLASS2.
5527 See the x86-64 PS ABI for details. */
5528
5529 static enum x86_64_reg_class
5530 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5531 {
5532 /* Rule #1: If both classes are equal, this is the resulting class. */
5533 if (class1 == class2)
5534 return class1;
5535
5536 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5537 the other class. */
5538 if (class1 == X86_64_NO_CLASS)
5539 return class2;
5540 if (class2 == X86_64_NO_CLASS)
5541 return class1;
5542
5543 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5544 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5545 return X86_64_MEMORY_CLASS;
5546
5547 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5548 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5549 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5550 return X86_64_INTEGERSI_CLASS;
5551 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5552 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5553 return X86_64_INTEGER_CLASS;
5554
5555 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5556 MEMORY is used. */
5557 if (class1 == X86_64_X87_CLASS
5558 || class1 == X86_64_X87UP_CLASS
5559 || class1 == X86_64_COMPLEX_X87_CLASS
5560 || class2 == X86_64_X87_CLASS
5561 || class2 == X86_64_X87UP_CLASS
5562 || class2 == X86_64_COMPLEX_X87_CLASS)
5563 return X86_64_MEMORY_CLASS;
5564
5565 /* Rule #6: Otherwise class SSE is used. */
5566 return X86_64_SSE_CLASS;
5567 }
5568
5569 /* Classify the argument of type TYPE and mode MODE.
5570 CLASSES will be filled by the register class used to pass each word
5571 of the operand. The number of words is returned. In case the parameter
5572 should be passed in memory, 0 is returned. As a special case for zero
5573 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5574
5575 BIT_OFFSET is used internally for handling records and specifies offset
5576 of the offset in bits modulo 256 to avoid overflow cases.
5577
5578 See the x86-64 PS ABI for details.
5579 */
5580
5581 static int
5582 classify_argument (enum machine_mode mode, const_tree type,
5583 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5584 {
5585 HOST_WIDE_INT bytes =
5586 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5587 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5588
5589 /* Variable sized entities are always passed/returned in memory. */
5590 if (bytes < 0)
5591 return 0;
5592
5593 if (mode != VOIDmode
5594 && targetm.calls.must_pass_in_stack (mode, type))
5595 return 0;
5596
5597 if (type && AGGREGATE_TYPE_P (type))
5598 {
5599 int i;
5600 tree field;
5601 enum x86_64_reg_class subclasses[MAX_CLASSES];
5602
5603 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5604 if (bytes > 32)
5605 return 0;
5606
5607 for (i = 0; i < words; i++)
5608 classes[i] = X86_64_NO_CLASS;
5609
5610 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5611 signalize memory class, so handle it as special case. */
5612 if (!words)
5613 {
5614 classes[0] = X86_64_NO_CLASS;
5615 return 1;
5616 }
5617
5618 /* Classify each field of record and merge classes. */
5619 switch (TREE_CODE (type))
5620 {
5621 case RECORD_TYPE:
5622 /* And now merge the fields of structure. */
5623 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5624 {
5625 if (TREE_CODE (field) == FIELD_DECL)
5626 {
5627 int num;
5628
5629 if (TREE_TYPE (field) == error_mark_node)
5630 continue;
5631
5632 /* Bitfields are always classified as integer. Handle them
5633 early, since later code would consider them to be
5634 misaligned integers. */
5635 if (DECL_BIT_FIELD (field))
5636 {
5637 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5638 i < ((int_bit_position (field) + (bit_offset % 64))
5639 + tree_low_cst (DECL_SIZE (field), 0)
5640 + 63) / 8 / 8; i++)
5641 classes[i] =
5642 merge_classes (X86_64_INTEGER_CLASS,
5643 classes[i]);
5644 }
5645 else
5646 {
5647 int pos;
5648
5649 type = TREE_TYPE (field);
5650
5651 /* Flexible array member is ignored. */
5652 if (TYPE_MODE (type) == BLKmode
5653 && TREE_CODE (type) == ARRAY_TYPE
5654 && TYPE_SIZE (type) == NULL_TREE
5655 && TYPE_DOMAIN (type) != NULL_TREE
5656 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5657 == NULL_TREE))
5658 {
5659 static bool warned;
5660
5661 if (!warned && warn_psabi)
5662 {
5663 warned = true;
5664 inform (input_location,
5665 "the ABI of passing struct with"
5666 " a flexible array member has"
5667 " changed in GCC 4.4");
5668 }
5669 continue;
5670 }
5671 num = classify_argument (TYPE_MODE (type), type,
5672 subclasses,
5673 (int_bit_position (field)
5674 + bit_offset) % 256);
5675 if (!num)
5676 return 0;
5677 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5678 for (i = 0; i < num && (i + pos) < words; i++)
5679 classes[i + pos] =
5680 merge_classes (subclasses[i], classes[i + pos]);
5681 }
5682 }
5683 }
5684 break;
5685
5686 case ARRAY_TYPE:
5687 /* Arrays are handled as small records. */
5688 {
5689 int num;
5690 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5691 TREE_TYPE (type), subclasses, bit_offset);
5692 if (!num)
5693 return 0;
5694
5695 /* The partial classes are now full classes. */
5696 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5697 subclasses[0] = X86_64_SSE_CLASS;
5698 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5699 && !((bit_offset % 64) == 0 && bytes == 4))
5700 subclasses[0] = X86_64_INTEGER_CLASS;
5701
5702 for (i = 0; i < words; i++)
5703 classes[i] = subclasses[i % num];
5704
5705 break;
5706 }
5707 case UNION_TYPE:
5708 case QUAL_UNION_TYPE:
5709 /* Unions are similar to RECORD_TYPE but offset is always 0.
5710 */
5711 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5712 {
5713 if (TREE_CODE (field) == FIELD_DECL)
5714 {
5715 int num;
5716
5717 if (TREE_TYPE (field) == error_mark_node)
5718 continue;
5719
5720 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5721 TREE_TYPE (field), subclasses,
5722 bit_offset);
5723 if (!num)
5724 return 0;
5725 for (i = 0; i < num; i++)
5726 classes[i] = merge_classes (subclasses[i], classes[i]);
5727 }
5728 }
5729 break;
5730
5731 default:
5732 gcc_unreachable ();
5733 }
5734
5735 if (words > 2)
5736 {
5737 /* When size > 16 bytes, if the first one isn't
5738 X86_64_SSE_CLASS or any other ones aren't
5739 X86_64_SSEUP_CLASS, everything should be passed in
5740 memory. */
5741 if (classes[0] != X86_64_SSE_CLASS)
5742 return 0;
5743
5744 for (i = 1; i < words; i++)
5745 if (classes[i] != X86_64_SSEUP_CLASS)
5746 return 0;
5747 }
5748
5749 /* Final merger cleanup. */
5750 for (i = 0; i < words; i++)
5751 {
5752 /* If one class is MEMORY, everything should be passed in
5753 memory. */
5754 if (classes[i] == X86_64_MEMORY_CLASS)
5755 return 0;
5756
5757 /* The X86_64_SSEUP_CLASS should be always preceded by
5758 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5759 if (classes[i] == X86_64_SSEUP_CLASS
5760 && classes[i - 1] != X86_64_SSE_CLASS
5761 && classes[i - 1] != X86_64_SSEUP_CLASS)
5762 {
5763 /* The first one should never be X86_64_SSEUP_CLASS. */
5764 gcc_assert (i != 0);
5765 classes[i] = X86_64_SSE_CLASS;
5766 }
5767
5768 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5769 everything should be passed in memory. */
5770 if (classes[i] == X86_64_X87UP_CLASS
5771 && (classes[i - 1] != X86_64_X87_CLASS))
5772 {
5773 static bool warned;
5774
5775 /* The first one should never be X86_64_X87UP_CLASS. */
5776 gcc_assert (i != 0);
5777 if (!warned && warn_psabi)
5778 {
5779 warned = true;
5780 inform (input_location,
5781 "the ABI of passing union with long double"
5782 " has changed in GCC 4.4");
5783 }
5784 return 0;
5785 }
5786 }
5787 return words;
5788 }
5789
5790 /* Compute alignment needed. We align all types to natural boundaries with
5791 exception of XFmode that is aligned to 64bits. */
5792 if (mode != VOIDmode && mode != BLKmode)
5793 {
5794 int mode_alignment = GET_MODE_BITSIZE (mode);
5795
5796 if (mode == XFmode)
5797 mode_alignment = 128;
5798 else if (mode == XCmode)
5799 mode_alignment = 256;
5800 if (COMPLEX_MODE_P (mode))
5801 mode_alignment /= 2;
5802 /* Misaligned fields are always returned in memory. */
5803 if (bit_offset % mode_alignment)
5804 return 0;
5805 }
5806
5807 /* for V1xx modes, just use the base mode */
5808 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5809 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5810 mode = GET_MODE_INNER (mode);
5811
5812 /* Classification of atomic types. */
5813 switch (mode)
5814 {
5815 case SDmode:
5816 case DDmode:
5817 classes[0] = X86_64_SSE_CLASS;
5818 return 1;
5819 case TDmode:
5820 classes[0] = X86_64_SSE_CLASS;
5821 classes[1] = X86_64_SSEUP_CLASS;
5822 return 2;
5823 case DImode:
5824 case SImode:
5825 case HImode:
5826 case QImode:
5827 case CSImode:
5828 case CHImode:
5829 case CQImode:
5830 {
5831 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5832
5833 if (size <= 32)
5834 {
5835 classes[0] = X86_64_INTEGERSI_CLASS;
5836 return 1;
5837 }
5838 else if (size <= 64)
5839 {
5840 classes[0] = X86_64_INTEGER_CLASS;
5841 return 1;
5842 }
5843 else if (size <= 64+32)
5844 {
5845 classes[0] = X86_64_INTEGER_CLASS;
5846 classes[1] = X86_64_INTEGERSI_CLASS;
5847 return 2;
5848 }
5849 else if (size <= 64+64)
5850 {
5851 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5852 return 2;
5853 }
5854 else
5855 gcc_unreachable ();
5856 }
5857 case CDImode:
5858 case TImode:
5859 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5860 return 2;
5861 case COImode:
5862 case OImode:
5863 /* OImode shouldn't be used directly. */
5864 gcc_unreachable ();
5865 case CTImode:
5866 return 0;
5867 case SFmode:
5868 if (!(bit_offset % 64))
5869 classes[0] = X86_64_SSESF_CLASS;
5870 else
5871 classes[0] = X86_64_SSE_CLASS;
5872 return 1;
5873 case DFmode:
5874 classes[0] = X86_64_SSEDF_CLASS;
5875 return 1;
5876 case XFmode:
5877 classes[0] = X86_64_X87_CLASS;
5878 classes[1] = X86_64_X87UP_CLASS;
5879 return 2;
5880 case TFmode:
5881 classes[0] = X86_64_SSE_CLASS;
5882 classes[1] = X86_64_SSEUP_CLASS;
5883 return 2;
5884 case SCmode:
5885 classes[0] = X86_64_SSE_CLASS;
5886 if (!(bit_offset % 64))
5887 return 1;
5888 else
5889 {
5890 static bool warned;
5891
5892 if (!warned && warn_psabi)
5893 {
5894 warned = true;
5895 inform (input_location,
5896 "the ABI of passing structure with complex float"
5897 " member has changed in GCC 4.4");
5898 }
5899 classes[1] = X86_64_SSESF_CLASS;
5900 return 2;
5901 }
5902 case DCmode:
5903 classes[0] = X86_64_SSEDF_CLASS;
5904 classes[1] = X86_64_SSEDF_CLASS;
5905 return 2;
5906 case XCmode:
5907 classes[0] = X86_64_COMPLEX_X87_CLASS;
5908 return 1;
5909 case TCmode:
5910 /* This modes is larger than 16 bytes. */
5911 return 0;
5912 case V8SFmode:
5913 case V8SImode:
5914 case V32QImode:
5915 case V16HImode:
5916 case V4DFmode:
5917 case V4DImode:
5918 classes[0] = X86_64_SSE_CLASS;
5919 classes[1] = X86_64_SSEUP_CLASS;
5920 classes[2] = X86_64_SSEUP_CLASS;
5921 classes[3] = X86_64_SSEUP_CLASS;
5922 return 4;
5923 case V4SFmode:
5924 case V4SImode:
5925 case V16QImode:
5926 case V8HImode:
5927 case V2DFmode:
5928 case V2DImode:
5929 classes[0] = X86_64_SSE_CLASS;
5930 classes[1] = X86_64_SSEUP_CLASS;
5931 return 2;
5932 case V1TImode:
5933 case V1DImode:
5934 case V2SFmode:
5935 case V2SImode:
5936 case V4HImode:
5937 case V8QImode:
5938 classes[0] = X86_64_SSE_CLASS;
5939 return 1;
5940 case BLKmode:
5941 case VOIDmode:
5942 return 0;
5943 default:
5944 gcc_assert (VECTOR_MODE_P (mode));
5945
5946 if (bytes > 16)
5947 return 0;
5948
5949 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5950
5951 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5952 classes[0] = X86_64_INTEGERSI_CLASS;
5953 else
5954 classes[0] = X86_64_INTEGER_CLASS;
5955 classes[1] = X86_64_INTEGER_CLASS;
5956 return 1 + (bytes > 8);
5957 }
5958 }
5959
5960 /* Examine the argument and return set number of register required in each
5961 class. Return 0 iff parameter should be passed in memory. */
5962 static int
5963 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5964 int *int_nregs, int *sse_nregs)
5965 {
5966 enum x86_64_reg_class regclass[MAX_CLASSES];
5967 int n = classify_argument (mode, type, regclass, 0);
5968
5969 *int_nregs = 0;
5970 *sse_nregs = 0;
5971 if (!n)
5972 return 0;
5973 for (n--; n >= 0; n--)
5974 switch (regclass[n])
5975 {
5976 case X86_64_INTEGER_CLASS:
5977 case X86_64_INTEGERSI_CLASS:
5978 (*int_nregs)++;
5979 break;
5980 case X86_64_SSE_CLASS:
5981 case X86_64_SSESF_CLASS:
5982 case X86_64_SSEDF_CLASS:
5983 (*sse_nregs)++;
5984 break;
5985 case X86_64_NO_CLASS:
5986 case X86_64_SSEUP_CLASS:
5987 break;
5988 case X86_64_X87_CLASS:
5989 case X86_64_X87UP_CLASS:
5990 if (!in_return)
5991 return 0;
5992 break;
5993 case X86_64_COMPLEX_X87_CLASS:
5994 return in_return ? 2 : 0;
5995 case X86_64_MEMORY_CLASS:
5996 gcc_unreachable ();
5997 }
5998 return 1;
5999 }
6000
6001 /* Construct container for the argument used by GCC interface. See
6002 FUNCTION_ARG for the detailed description. */
6003
6004 static rtx
6005 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6006 const_tree type, int in_return, int nintregs, int nsseregs,
6007 const int *intreg, int sse_regno)
6008 {
6009 /* The following variables hold the static issued_error state. */
6010 static bool issued_sse_arg_error;
6011 static bool issued_sse_ret_error;
6012 static bool issued_x87_ret_error;
6013
6014 enum machine_mode tmpmode;
6015 int bytes =
6016 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6017 enum x86_64_reg_class regclass[MAX_CLASSES];
6018 int n;
6019 int i;
6020 int nexps = 0;
6021 int needed_sseregs, needed_intregs;
6022 rtx exp[MAX_CLASSES];
6023 rtx ret;
6024
6025 n = classify_argument (mode, type, regclass, 0);
6026 if (!n)
6027 return NULL;
6028 if (!examine_argument (mode, type, in_return, &needed_intregs,
6029 &needed_sseregs))
6030 return NULL;
6031 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6032 return NULL;
6033
6034 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6035 some less clueful developer tries to use floating-point anyway. */
6036 if (needed_sseregs && !TARGET_SSE)
6037 {
6038 if (in_return)
6039 {
6040 if (!issued_sse_ret_error)
6041 {
6042 error ("SSE register return with SSE disabled");
6043 issued_sse_ret_error = true;
6044 }
6045 }
6046 else if (!issued_sse_arg_error)
6047 {
6048 error ("SSE register argument with SSE disabled");
6049 issued_sse_arg_error = true;
6050 }
6051 return NULL;
6052 }
6053
6054 /* Likewise, error if the ABI requires us to return values in the
6055 x87 registers and the user specified -mno-80387. */
6056 if (!TARGET_80387 && in_return)
6057 for (i = 0; i < n; i++)
6058 if (regclass[i] == X86_64_X87_CLASS
6059 || regclass[i] == X86_64_X87UP_CLASS
6060 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6061 {
6062 if (!issued_x87_ret_error)
6063 {
6064 error ("x87 register return with x87 disabled");
6065 issued_x87_ret_error = true;
6066 }
6067 return NULL;
6068 }
6069
6070 /* First construct simple cases. Avoid SCmode, since we want to use
6071 single register to pass this type. */
6072 if (n == 1 && mode != SCmode)
6073 switch (regclass[0])
6074 {
6075 case X86_64_INTEGER_CLASS:
6076 case X86_64_INTEGERSI_CLASS:
6077 return gen_rtx_REG (mode, intreg[0]);
6078 case X86_64_SSE_CLASS:
6079 case X86_64_SSESF_CLASS:
6080 case X86_64_SSEDF_CLASS:
6081 if (mode != BLKmode)
6082 return gen_reg_or_parallel (mode, orig_mode,
6083 SSE_REGNO (sse_regno));
6084 break;
6085 case X86_64_X87_CLASS:
6086 case X86_64_COMPLEX_X87_CLASS:
6087 return gen_rtx_REG (mode, FIRST_STACK_REG);
6088 case X86_64_NO_CLASS:
6089 /* Zero sized array, struct or class. */
6090 return NULL;
6091 default:
6092 gcc_unreachable ();
6093 }
6094 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6095 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6096 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6097 if (n == 4
6098 && regclass[0] == X86_64_SSE_CLASS
6099 && regclass[1] == X86_64_SSEUP_CLASS
6100 && regclass[2] == X86_64_SSEUP_CLASS
6101 && regclass[3] == X86_64_SSEUP_CLASS
6102 && mode != BLKmode)
6103 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6104
6105 if (n == 2
6106 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6107 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6108 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6109 && regclass[1] == X86_64_INTEGER_CLASS
6110 && (mode == CDImode || mode == TImode || mode == TFmode)
6111 && intreg[0] + 1 == intreg[1])
6112 return gen_rtx_REG (mode, intreg[0]);
6113
6114 /* Otherwise figure out the entries of the PARALLEL. */
6115 for (i = 0; i < n; i++)
6116 {
6117 int pos;
6118
6119 switch (regclass[i])
6120 {
6121 case X86_64_NO_CLASS:
6122 break;
6123 case X86_64_INTEGER_CLASS:
6124 case X86_64_INTEGERSI_CLASS:
6125 /* Merge TImodes on aligned occasions here too. */
6126 if (i * 8 + 8 > bytes)
6127 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6128 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6129 tmpmode = SImode;
6130 else
6131 tmpmode = DImode;
6132 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6133 if (tmpmode == BLKmode)
6134 tmpmode = DImode;
6135 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6136 gen_rtx_REG (tmpmode, *intreg),
6137 GEN_INT (i*8));
6138 intreg++;
6139 break;
6140 case X86_64_SSESF_CLASS:
6141 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6142 gen_rtx_REG (SFmode,
6143 SSE_REGNO (sse_regno)),
6144 GEN_INT (i*8));
6145 sse_regno++;
6146 break;
6147 case X86_64_SSEDF_CLASS:
6148 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6149 gen_rtx_REG (DFmode,
6150 SSE_REGNO (sse_regno)),
6151 GEN_INT (i*8));
6152 sse_regno++;
6153 break;
6154 case X86_64_SSE_CLASS:
6155 pos = i;
6156 switch (n)
6157 {
6158 case 1:
6159 tmpmode = DImode;
6160 break;
6161 case 2:
6162 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6163 {
6164 tmpmode = TImode;
6165 i++;
6166 }
6167 else
6168 tmpmode = DImode;
6169 break;
6170 case 4:
6171 gcc_assert (i == 0
6172 && regclass[1] == X86_64_SSEUP_CLASS
6173 && regclass[2] == X86_64_SSEUP_CLASS
6174 && regclass[3] == X86_64_SSEUP_CLASS);
6175 tmpmode = OImode;
6176 i += 3;
6177 break;
6178 default:
6179 gcc_unreachable ();
6180 }
6181 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6182 gen_rtx_REG (tmpmode,
6183 SSE_REGNO (sse_regno)),
6184 GEN_INT (pos*8));
6185 sse_regno++;
6186 break;
6187 default:
6188 gcc_unreachable ();
6189 }
6190 }
6191
6192 /* Empty aligned struct, union or class. */
6193 if (nexps == 0)
6194 return NULL;
6195
6196 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6197 for (i = 0; i < nexps; i++)
6198 XVECEXP (ret, 0, i) = exp [i];
6199 return ret;
6200 }
6201
6202 /* Update the data in CUM to advance over an argument of mode MODE
6203 and data type TYPE. (TYPE is null for libcalls where that information
6204 may not be available.) */
6205
6206 static void
6207 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6208 const_tree type, HOST_WIDE_INT bytes,
6209 HOST_WIDE_INT words)
6210 {
6211 switch (mode)
6212 {
6213 default:
6214 break;
6215
6216 case BLKmode:
6217 if (bytes < 0)
6218 break;
6219 /* FALLTHRU */
6220
6221 case DImode:
6222 case SImode:
6223 case HImode:
6224 case QImode:
6225 cum->words += words;
6226 cum->nregs -= words;
6227 cum->regno += words;
6228
6229 if (cum->nregs <= 0)
6230 {
6231 cum->nregs = 0;
6232 cum->regno = 0;
6233 }
6234 break;
6235
6236 case OImode:
6237 /* OImode shouldn't be used directly. */
6238 gcc_unreachable ();
6239
6240 case DFmode:
6241 if (cum->float_in_sse < 2)
6242 break;
6243 case SFmode:
6244 if (cum->float_in_sse < 1)
6245 break;
6246 /* FALLTHRU */
6247
6248 case V8SFmode:
6249 case V8SImode:
6250 case V32QImode:
6251 case V16HImode:
6252 case V4DFmode:
6253 case V4DImode:
6254 case TImode:
6255 case V16QImode:
6256 case V8HImode:
6257 case V4SImode:
6258 case V2DImode:
6259 case V4SFmode:
6260 case V2DFmode:
6261 if (!type || !AGGREGATE_TYPE_P (type))
6262 {
6263 cum->sse_words += words;
6264 cum->sse_nregs -= 1;
6265 cum->sse_regno += 1;
6266 if (cum->sse_nregs <= 0)
6267 {
6268 cum->sse_nregs = 0;
6269 cum->sse_regno = 0;
6270 }
6271 }
6272 break;
6273
6274 case V8QImode:
6275 case V4HImode:
6276 case V2SImode:
6277 case V2SFmode:
6278 case V1TImode:
6279 case V1DImode:
6280 if (!type || !AGGREGATE_TYPE_P (type))
6281 {
6282 cum->mmx_words += words;
6283 cum->mmx_nregs -= 1;
6284 cum->mmx_regno += 1;
6285 if (cum->mmx_nregs <= 0)
6286 {
6287 cum->mmx_nregs = 0;
6288 cum->mmx_regno = 0;
6289 }
6290 }
6291 break;
6292 }
6293 }
6294
6295 static void
6296 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6297 const_tree type, HOST_WIDE_INT words, bool named)
6298 {
6299 int int_nregs, sse_nregs;
6300
6301 /* Unnamed 256bit vector mode parameters are passed on stack. */
6302 if (!named && VALID_AVX256_REG_MODE (mode))
6303 return;
6304
6305 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6306 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6307 {
6308 cum->nregs -= int_nregs;
6309 cum->sse_nregs -= sse_nregs;
6310 cum->regno += int_nregs;
6311 cum->sse_regno += sse_nregs;
6312 }
6313 else
6314 {
6315 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6316 cum->words = (cum->words + align - 1) & ~(align - 1);
6317 cum->words += words;
6318 }
6319 }
6320
6321 static void
6322 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6323 HOST_WIDE_INT words)
6324 {
6325 /* Otherwise, this should be passed indirect. */
6326 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6327
6328 cum->words += words;
6329 if (cum->nregs > 0)
6330 {
6331 cum->nregs -= 1;
6332 cum->regno += 1;
6333 }
6334 }
6335
6336 /* Update the data in CUM to advance over an argument of mode MODE and
6337 data type TYPE. (TYPE is null for libcalls where that information
6338 may not be available.) */
6339
6340 static void
6341 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6342 const_tree type, bool named)
6343 {
6344 HOST_WIDE_INT bytes, words;
6345
6346 if (mode == BLKmode)
6347 bytes = int_size_in_bytes (type);
6348 else
6349 bytes = GET_MODE_SIZE (mode);
6350 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6351
6352 if (type)
6353 mode = type_natural_mode (type, NULL);
6354
6355 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6356 function_arg_advance_ms_64 (cum, bytes, words);
6357 else if (TARGET_64BIT)
6358 function_arg_advance_64 (cum, mode, type, words, named);
6359 else
6360 function_arg_advance_32 (cum, mode, type, bytes, words);
6361 }
6362
6363 /* Define where to put the arguments to a function.
6364 Value is zero to push the argument on the stack,
6365 or a hard register in which to store the argument.
6366
6367 MODE is the argument's machine mode.
6368 TYPE is the data type of the argument (as a tree).
6369 This is null for libcalls where that information may
6370 not be available.
6371 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6372 the preceding args and about the function being called.
6373 NAMED is nonzero if this argument is a named parameter
6374 (otherwise it is an extra parameter matching an ellipsis). */
6375
6376 static rtx
6377 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6378 enum machine_mode orig_mode, const_tree type,
6379 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6380 {
6381 static bool warnedsse, warnedmmx;
6382
6383 /* Avoid the AL settings for the Unix64 ABI. */
6384 if (mode == VOIDmode)
6385 return constm1_rtx;
6386
6387 switch (mode)
6388 {
6389 default:
6390 break;
6391
6392 case BLKmode:
6393 if (bytes < 0)
6394 break;
6395 /* FALLTHRU */
6396 case DImode:
6397 case SImode:
6398 case HImode:
6399 case QImode:
6400 if (words <= cum->nregs)
6401 {
6402 int regno = cum->regno;
6403
6404 /* Fastcall allocates the first two DWORD (SImode) or
6405 smaller arguments to ECX and EDX if it isn't an
6406 aggregate type . */
6407 if (cum->fastcall)
6408 {
6409 if (mode == BLKmode
6410 || mode == DImode
6411 || (type && AGGREGATE_TYPE_P (type)))
6412 break;
6413
6414 /* ECX not EAX is the first allocated register. */
6415 if (regno == AX_REG)
6416 regno = CX_REG;
6417 }
6418 return gen_rtx_REG (mode, regno);
6419 }
6420 break;
6421
6422 case DFmode:
6423 if (cum->float_in_sse < 2)
6424 break;
6425 case SFmode:
6426 if (cum->float_in_sse < 1)
6427 break;
6428 /* FALLTHRU */
6429 case TImode:
6430 /* In 32bit, we pass TImode in xmm registers. */
6431 case V16QImode:
6432 case V8HImode:
6433 case V4SImode:
6434 case V2DImode:
6435 case V4SFmode:
6436 case V2DFmode:
6437 if (!type || !AGGREGATE_TYPE_P (type))
6438 {
6439 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6440 {
6441 warnedsse = true;
6442 warning (0, "SSE vector argument without SSE enabled "
6443 "changes the ABI");
6444 }
6445 if (cum->sse_nregs)
6446 return gen_reg_or_parallel (mode, orig_mode,
6447 cum->sse_regno + FIRST_SSE_REG);
6448 }
6449 break;
6450
6451 case OImode:
6452 /* OImode shouldn't be used directly. */
6453 gcc_unreachable ();
6454
6455 case V8SFmode:
6456 case V8SImode:
6457 case V32QImode:
6458 case V16HImode:
6459 case V4DFmode:
6460 case V4DImode:
6461 if (!type || !AGGREGATE_TYPE_P (type))
6462 {
6463 if (cum->sse_nregs)
6464 return gen_reg_or_parallel (mode, orig_mode,
6465 cum->sse_regno + FIRST_SSE_REG);
6466 }
6467 break;
6468
6469 case V8QImode:
6470 case V4HImode:
6471 case V2SImode:
6472 case V2SFmode:
6473 case V1TImode:
6474 case V1DImode:
6475 if (!type || !AGGREGATE_TYPE_P (type))
6476 {
6477 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6478 {
6479 warnedmmx = true;
6480 warning (0, "MMX vector argument without MMX enabled "
6481 "changes the ABI");
6482 }
6483 if (cum->mmx_nregs)
6484 return gen_reg_or_parallel (mode, orig_mode,
6485 cum->mmx_regno + FIRST_MMX_REG);
6486 }
6487 break;
6488 }
6489
6490 return NULL_RTX;
6491 }
6492
6493 static rtx
6494 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6495 enum machine_mode orig_mode, const_tree type, bool named)
6496 {
6497 /* Handle a hidden AL argument containing number of registers
6498 for varargs x86-64 functions. */
6499 if (mode == VOIDmode)
6500 return GEN_INT (cum->maybe_vaarg
6501 ? (cum->sse_nregs < 0
6502 ? X86_64_SSE_REGPARM_MAX
6503 : cum->sse_regno)
6504 : -1);
6505
6506 switch (mode)
6507 {
6508 default:
6509 break;
6510
6511 case V8SFmode:
6512 case V8SImode:
6513 case V32QImode:
6514 case V16HImode:
6515 case V4DFmode:
6516 case V4DImode:
6517 /* Unnamed 256bit vector mode parameters are passed on stack. */
6518 if (!named)
6519 return NULL;
6520 break;
6521 }
6522
6523 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6524 cum->sse_nregs,
6525 &x86_64_int_parameter_registers [cum->regno],
6526 cum->sse_regno);
6527 }
6528
6529 static rtx
6530 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6531 enum machine_mode orig_mode, bool named,
6532 HOST_WIDE_INT bytes)
6533 {
6534 unsigned int regno;
6535
6536 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6537 We use value of -2 to specify that current function call is MSABI. */
6538 if (mode == VOIDmode)
6539 return GEN_INT (-2);
6540
6541 /* If we've run out of registers, it goes on the stack. */
6542 if (cum->nregs == 0)
6543 return NULL_RTX;
6544
6545 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6546
6547 /* Only floating point modes are passed in anything but integer regs. */
6548 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6549 {
6550 if (named)
6551 regno = cum->regno + FIRST_SSE_REG;
6552 else
6553 {
6554 rtx t1, t2;
6555
6556 /* Unnamed floating parameters are passed in both the
6557 SSE and integer registers. */
6558 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6559 t2 = gen_rtx_REG (mode, regno);
6560 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6561 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6562 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6563 }
6564 }
6565 /* Handle aggregated types passed in register. */
6566 if (orig_mode == BLKmode)
6567 {
6568 if (bytes > 0 && bytes <= 8)
6569 mode = (bytes > 4 ? DImode : SImode);
6570 if (mode == BLKmode)
6571 mode = DImode;
6572 }
6573
6574 return gen_reg_or_parallel (mode, orig_mode, regno);
6575 }
6576
6577 /* Return where to put the arguments to a function.
6578 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6579
6580 MODE is the argument's machine mode. TYPE is the data type of the
6581 argument. It is null for libcalls where that information may not be
6582 available. CUM gives information about the preceding args and about
6583 the function being called. NAMED is nonzero if this argument is a
6584 named parameter (otherwise it is an extra parameter matching an
6585 ellipsis). */
6586
6587 static rtx
6588 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6589 const_tree type, bool named)
6590 {
6591 enum machine_mode mode = omode;
6592 HOST_WIDE_INT bytes, words;
6593 rtx arg;
6594
6595 if (mode == BLKmode)
6596 bytes = int_size_in_bytes (type);
6597 else
6598 bytes = GET_MODE_SIZE (mode);
6599 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6600
6601 /* To simplify the code below, represent vector types with a vector mode
6602 even if MMX/SSE are not active. */
6603 if (type && TREE_CODE (type) == VECTOR_TYPE)
6604 mode = type_natural_mode (type, cum);
6605
6606 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6607 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6608 else if (TARGET_64BIT)
6609 arg = function_arg_64 (cum, mode, omode, type, named);
6610 else
6611 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6612
6613 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6614 {
6615 /* This argument uses 256bit AVX modes. */
6616 if (cum->caller)
6617 cfun->machine->callee_pass_avx256_p = true;
6618 else
6619 cfun->machine->caller_pass_avx256_p = true;
6620 }
6621
6622 return arg;
6623 }
6624
6625 /* A C expression that indicates when an argument must be passed by
6626 reference. If nonzero for an argument, a copy of that argument is
6627 made in memory and a pointer to the argument is passed instead of
6628 the argument itself. The pointer is passed in whatever way is
6629 appropriate for passing a pointer to that type. */
6630
6631 static bool
6632 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6633 enum machine_mode mode ATTRIBUTE_UNUSED,
6634 const_tree type, bool named ATTRIBUTE_UNUSED)
6635 {
6636 /* See Windows x64 Software Convention. */
6637 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6638 {
6639 int msize = (int) GET_MODE_SIZE (mode);
6640 if (type)
6641 {
6642 /* Arrays are passed by reference. */
6643 if (TREE_CODE (type) == ARRAY_TYPE)
6644 return true;
6645
6646 if (AGGREGATE_TYPE_P (type))
6647 {
6648 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6649 are passed by reference. */
6650 msize = int_size_in_bytes (type);
6651 }
6652 }
6653
6654 /* __m128 is passed by reference. */
6655 switch (msize) {
6656 case 1: case 2: case 4: case 8:
6657 break;
6658 default:
6659 return true;
6660 }
6661 }
6662 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6663 return 1;
6664
6665 return 0;
6666 }
6667
6668 /* Return true when TYPE should be 128bit aligned for 32bit argument
6669 passing ABI. XXX: This function is obsolete and is only used for
6670 checking psABI compatibility with previous versions of GCC. */
6671
6672 static bool
6673 ix86_compat_aligned_value_p (const_tree type)
6674 {
6675 enum machine_mode mode = TYPE_MODE (type);
6676 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6677 || mode == TDmode
6678 || mode == TFmode
6679 || mode == TCmode)
6680 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6681 return true;
6682 if (TYPE_ALIGN (type) < 128)
6683 return false;
6684
6685 if (AGGREGATE_TYPE_P (type))
6686 {
6687 /* Walk the aggregates recursively. */
6688 switch (TREE_CODE (type))
6689 {
6690 case RECORD_TYPE:
6691 case UNION_TYPE:
6692 case QUAL_UNION_TYPE:
6693 {
6694 tree field;
6695
6696 /* Walk all the structure fields. */
6697 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6698 {
6699 if (TREE_CODE (field) == FIELD_DECL
6700 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6701 return true;
6702 }
6703 break;
6704 }
6705
6706 case ARRAY_TYPE:
6707 /* Just for use if some languages passes arrays by value. */
6708 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6709 return true;
6710 break;
6711
6712 default:
6713 gcc_unreachable ();
6714 }
6715 }
6716 return false;
6717 }
6718
6719 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6720 XXX: This function is obsolete and is only used for checking psABI
6721 compatibility with previous versions of GCC. */
6722
6723 static unsigned int
6724 ix86_compat_function_arg_boundary (enum machine_mode mode,
6725 const_tree type, unsigned int align)
6726 {
6727 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6728 natural boundaries. */
6729 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6730 {
6731 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6732 make an exception for SSE modes since these require 128bit
6733 alignment.
6734
6735 The handling here differs from field_alignment. ICC aligns MMX
6736 arguments to 4 byte boundaries, while structure fields are aligned
6737 to 8 byte boundaries. */
6738 if (!type)
6739 {
6740 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6741 align = PARM_BOUNDARY;
6742 }
6743 else
6744 {
6745 if (!ix86_compat_aligned_value_p (type))
6746 align = PARM_BOUNDARY;
6747 }
6748 }
6749 if (align > BIGGEST_ALIGNMENT)
6750 align = BIGGEST_ALIGNMENT;
6751 return align;
6752 }
6753
6754 /* Return true when TYPE should be 128bit aligned for 32bit argument
6755 passing ABI. */
6756
6757 static bool
6758 ix86_contains_aligned_value_p (const_tree type)
6759 {
6760 enum machine_mode mode = TYPE_MODE (type);
6761
6762 if (mode == XFmode || mode == XCmode)
6763 return false;
6764
6765 if (TYPE_ALIGN (type) < 128)
6766 return false;
6767
6768 if (AGGREGATE_TYPE_P (type))
6769 {
6770 /* Walk the aggregates recursively. */
6771 switch (TREE_CODE (type))
6772 {
6773 case RECORD_TYPE:
6774 case UNION_TYPE:
6775 case QUAL_UNION_TYPE:
6776 {
6777 tree field;
6778
6779 /* Walk all the structure fields. */
6780 for (field = TYPE_FIELDS (type);
6781 field;
6782 field = DECL_CHAIN (field))
6783 {
6784 if (TREE_CODE (field) == FIELD_DECL
6785 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6786 return true;
6787 }
6788 break;
6789 }
6790
6791 case ARRAY_TYPE:
6792 /* Just for use if some languages passes arrays by value. */
6793 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6794 return true;
6795 break;
6796
6797 default:
6798 gcc_unreachable ();
6799 }
6800 }
6801 else
6802 return TYPE_ALIGN (type) >= 128;
6803
6804 return false;
6805 }
6806
6807 /* Gives the alignment boundary, in bits, of an argument with the
6808 specified mode and type. */
6809
6810 static unsigned int
6811 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6812 {
6813 unsigned int align;
6814 if (type)
6815 {
6816 /* Since the main variant type is used for call, we convert it to
6817 the main variant type. */
6818 type = TYPE_MAIN_VARIANT (type);
6819 align = TYPE_ALIGN (type);
6820 }
6821 else
6822 align = GET_MODE_ALIGNMENT (mode);
6823 if (align < PARM_BOUNDARY)
6824 align = PARM_BOUNDARY;
6825 else
6826 {
6827 static bool warned;
6828 unsigned int saved_align = align;
6829
6830 if (!TARGET_64BIT)
6831 {
6832 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6833 if (!type)
6834 {
6835 if (mode == XFmode || mode == XCmode)
6836 align = PARM_BOUNDARY;
6837 }
6838 else if (!ix86_contains_aligned_value_p (type))
6839 align = PARM_BOUNDARY;
6840
6841 if (align < 128)
6842 align = PARM_BOUNDARY;
6843 }
6844
6845 if (warn_psabi
6846 && !warned
6847 && align != ix86_compat_function_arg_boundary (mode, type,
6848 saved_align))
6849 {
6850 warned = true;
6851 inform (input_location,
6852 "The ABI for passing parameters with %d-byte"
6853 " alignment has changed in GCC 4.6",
6854 align / BITS_PER_UNIT);
6855 }
6856 }
6857
6858 return align;
6859 }
6860
6861 /* Return true if N is a possible register number of function value. */
6862
6863 static bool
6864 ix86_function_value_regno_p (const unsigned int regno)
6865 {
6866 switch (regno)
6867 {
6868 case 0:
6869 return true;
6870
6871 case FIRST_FLOAT_REG:
6872 /* TODO: The function should depend on current function ABI but
6873 builtins.c would need updating then. Therefore we use the
6874 default ABI. */
6875 if (TARGET_64BIT && ix86_abi == MS_ABI)
6876 return false;
6877 return TARGET_FLOAT_RETURNS_IN_80387;
6878
6879 case FIRST_SSE_REG:
6880 return TARGET_SSE;
6881
6882 case FIRST_MMX_REG:
6883 if (TARGET_MACHO || TARGET_64BIT)
6884 return false;
6885 return TARGET_MMX;
6886 }
6887
6888 return false;
6889 }
6890
6891 /* Define how to find the value returned by a function.
6892 VALTYPE is the data type of the value (as a tree).
6893 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6894 otherwise, FUNC is 0. */
6895
6896 static rtx
6897 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6898 const_tree fntype, const_tree fn)
6899 {
6900 unsigned int regno;
6901
6902 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6903 we normally prevent this case when mmx is not available. However
6904 some ABIs may require the result to be returned like DImode. */
6905 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6906 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6907
6908 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6909 we prevent this case when sse is not available. However some ABIs
6910 may require the result to be returned like integer TImode. */
6911 else if (mode == TImode
6912 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6913 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6914
6915 /* 32-byte vector modes in %ymm0. */
6916 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6917 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6918
6919 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6920 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6921 regno = FIRST_FLOAT_REG;
6922 else
6923 /* Most things go in %eax. */
6924 regno = AX_REG;
6925
6926 /* Override FP return register with %xmm0 for local functions when
6927 SSE math is enabled or for functions with sseregparm attribute. */
6928 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6929 {
6930 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6931 if ((sse_level >= 1 && mode == SFmode)
6932 || (sse_level == 2 && mode == DFmode))
6933 regno = FIRST_SSE_REG;
6934 }
6935
6936 /* OImode shouldn't be used directly. */
6937 gcc_assert (mode != OImode);
6938
6939 return gen_rtx_REG (orig_mode, regno);
6940 }
6941
6942 static rtx
6943 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6944 const_tree valtype)
6945 {
6946 rtx ret;
6947
6948 /* Handle libcalls, which don't provide a type node. */
6949 if (valtype == NULL)
6950 {
6951 switch (mode)
6952 {
6953 case SFmode:
6954 case SCmode:
6955 case DFmode:
6956 case DCmode:
6957 case TFmode:
6958 case SDmode:
6959 case DDmode:
6960 case TDmode:
6961 return gen_rtx_REG (mode, FIRST_SSE_REG);
6962 case XFmode:
6963 case XCmode:
6964 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6965 case TCmode:
6966 return NULL;
6967 default:
6968 return gen_rtx_REG (mode, AX_REG);
6969 }
6970 }
6971
6972 ret = construct_container (mode, orig_mode, valtype, 1,
6973 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6974 x86_64_int_return_registers, 0);
6975
6976 /* For zero sized structures, construct_container returns NULL, but we
6977 need to keep rest of compiler happy by returning meaningful value. */
6978 if (!ret)
6979 ret = gen_rtx_REG (orig_mode, AX_REG);
6980
6981 return ret;
6982 }
6983
6984 static rtx
6985 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6986 {
6987 unsigned int regno = AX_REG;
6988
6989 if (TARGET_SSE)
6990 {
6991 switch (GET_MODE_SIZE (mode))
6992 {
6993 case 16:
6994 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6995 && !COMPLEX_MODE_P (mode))
6996 regno = FIRST_SSE_REG;
6997 break;
6998 case 8:
6999 case 4:
7000 if (mode == SFmode || mode == DFmode)
7001 regno = FIRST_SSE_REG;
7002 break;
7003 default:
7004 break;
7005 }
7006 }
7007 return gen_rtx_REG (orig_mode, regno);
7008 }
7009
7010 static rtx
7011 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7012 enum machine_mode orig_mode, enum machine_mode mode)
7013 {
7014 const_tree fn, fntype;
7015
7016 fn = NULL_TREE;
7017 if (fntype_or_decl && DECL_P (fntype_or_decl))
7018 fn = fntype_or_decl;
7019 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7020
7021 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7022 return function_value_ms_64 (orig_mode, mode);
7023 else if (TARGET_64BIT)
7024 return function_value_64 (orig_mode, mode, valtype);
7025 else
7026 return function_value_32 (orig_mode, mode, fntype, fn);
7027 }
7028
7029 static rtx
7030 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7031 bool outgoing ATTRIBUTE_UNUSED)
7032 {
7033 enum machine_mode mode, orig_mode;
7034
7035 orig_mode = TYPE_MODE (valtype);
7036 mode = type_natural_mode (valtype, NULL);
7037 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7038 }
7039
7040 rtx
7041 ix86_libcall_value (enum machine_mode mode)
7042 {
7043 return ix86_function_value_1 (NULL, NULL, mode, mode);
7044 }
7045
7046 /* Return true iff type is returned in memory. */
7047
7048 static bool ATTRIBUTE_UNUSED
7049 return_in_memory_32 (const_tree type, enum machine_mode mode)
7050 {
7051 HOST_WIDE_INT size;
7052
7053 if (mode == BLKmode)
7054 return true;
7055
7056 size = int_size_in_bytes (type);
7057
7058 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7059 return false;
7060
7061 if (VECTOR_MODE_P (mode) || mode == TImode)
7062 {
7063 /* User-created vectors small enough to fit in EAX. */
7064 if (size < 8)
7065 return false;
7066
7067 /* MMX/3dNow values are returned in MM0,
7068 except when it doesn't exits or the ABI prescribes otherwise. */
7069 if (size == 8)
7070 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7071
7072 /* SSE values are returned in XMM0, except when it doesn't exist. */
7073 if (size == 16)
7074 return !TARGET_SSE;
7075
7076 /* AVX values are returned in YMM0, except when it doesn't exist. */
7077 if (size == 32)
7078 return !TARGET_AVX;
7079 }
7080
7081 if (mode == XFmode)
7082 return false;
7083
7084 if (size > 12)
7085 return true;
7086
7087 /* OImode shouldn't be used directly. */
7088 gcc_assert (mode != OImode);
7089
7090 return false;
7091 }
7092
7093 static bool ATTRIBUTE_UNUSED
7094 return_in_memory_64 (const_tree type, enum machine_mode mode)
7095 {
7096 int needed_intregs, needed_sseregs;
7097 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7098 }
7099
7100 static bool ATTRIBUTE_UNUSED
7101 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7102 {
7103 HOST_WIDE_INT size = int_size_in_bytes (type);
7104
7105 /* __m128 is returned in xmm0. */
7106 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7107 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7108 return false;
7109
7110 /* Otherwise, the size must be exactly in [1248]. */
7111 return size != 1 && size != 2 && size != 4 && size != 8;
7112 }
7113
7114 static bool
7115 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7116 {
7117 #ifdef SUBTARGET_RETURN_IN_MEMORY
7118 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7119 #else
7120 const enum machine_mode mode = type_natural_mode (type, NULL);
7121
7122 if (TARGET_64BIT)
7123 {
7124 if (ix86_function_type_abi (fntype) == MS_ABI)
7125 return return_in_memory_ms_64 (type, mode);
7126 else
7127 return return_in_memory_64 (type, mode);
7128 }
7129 else
7130 return return_in_memory_32 (type, mode);
7131 #endif
7132 }
7133
7134 /* When returning SSE vector types, we have a choice of either
7135 (1) being abi incompatible with a -march switch, or
7136 (2) generating an error.
7137 Given no good solution, I think the safest thing is one warning.
7138 The user won't be able to use -Werror, but....
7139
7140 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7141 called in response to actually generating a caller or callee that
7142 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7143 via aggregate_value_p for general type probing from tree-ssa. */
7144
7145 static rtx
7146 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7147 {
7148 static bool warnedsse, warnedmmx;
7149
7150 if (!TARGET_64BIT && type)
7151 {
7152 /* Look at the return type of the function, not the function type. */
7153 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7154
7155 if (!TARGET_SSE && !warnedsse)
7156 {
7157 if (mode == TImode
7158 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7159 {
7160 warnedsse = true;
7161 warning (0, "SSE vector return without SSE enabled "
7162 "changes the ABI");
7163 }
7164 }
7165
7166 if (!TARGET_MMX && !warnedmmx)
7167 {
7168 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7169 {
7170 warnedmmx = true;
7171 warning (0, "MMX vector return without MMX enabled "
7172 "changes the ABI");
7173 }
7174 }
7175 }
7176
7177 return NULL;
7178 }
7179
7180 \f
7181 /* Create the va_list data type. */
7182
7183 /* Returns the calling convention specific va_list date type.
7184 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7185
7186 static tree
7187 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7188 {
7189 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7190
7191 /* For i386 we use plain pointer to argument area. */
7192 if (!TARGET_64BIT || abi == MS_ABI)
7193 return build_pointer_type (char_type_node);
7194
7195 record = lang_hooks.types.make_type (RECORD_TYPE);
7196 type_decl = build_decl (BUILTINS_LOCATION,
7197 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7198
7199 f_gpr = build_decl (BUILTINS_LOCATION,
7200 FIELD_DECL, get_identifier ("gp_offset"),
7201 unsigned_type_node);
7202 f_fpr = build_decl (BUILTINS_LOCATION,
7203 FIELD_DECL, get_identifier ("fp_offset"),
7204 unsigned_type_node);
7205 f_ovf = build_decl (BUILTINS_LOCATION,
7206 FIELD_DECL, get_identifier ("overflow_arg_area"),
7207 ptr_type_node);
7208 f_sav = build_decl (BUILTINS_LOCATION,
7209 FIELD_DECL, get_identifier ("reg_save_area"),
7210 ptr_type_node);
7211
7212 va_list_gpr_counter_field = f_gpr;
7213 va_list_fpr_counter_field = f_fpr;
7214
7215 DECL_FIELD_CONTEXT (f_gpr) = record;
7216 DECL_FIELD_CONTEXT (f_fpr) = record;
7217 DECL_FIELD_CONTEXT (f_ovf) = record;
7218 DECL_FIELD_CONTEXT (f_sav) = record;
7219
7220 TYPE_STUB_DECL (record) = type_decl;
7221 TYPE_NAME (record) = type_decl;
7222 TYPE_FIELDS (record) = f_gpr;
7223 DECL_CHAIN (f_gpr) = f_fpr;
7224 DECL_CHAIN (f_fpr) = f_ovf;
7225 DECL_CHAIN (f_ovf) = f_sav;
7226
7227 layout_type (record);
7228
7229 /* The correct type is an array type of one element. */
7230 return build_array_type (record, build_index_type (size_zero_node));
7231 }
7232
7233 /* Setup the builtin va_list data type and for 64-bit the additional
7234 calling convention specific va_list data types. */
7235
7236 static tree
7237 ix86_build_builtin_va_list (void)
7238 {
7239 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7240
7241 /* Initialize abi specific va_list builtin types. */
7242 if (TARGET_64BIT)
7243 {
7244 tree t;
7245 if (ix86_abi == MS_ABI)
7246 {
7247 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7248 if (TREE_CODE (t) != RECORD_TYPE)
7249 t = build_variant_type_copy (t);
7250 sysv_va_list_type_node = t;
7251 }
7252 else
7253 {
7254 t = ret;
7255 if (TREE_CODE (t) != RECORD_TYPE)
7256 t = build_variant_type_copy (t);
7257 sysv_va_list_type_node = t;
7258 }
7259 if (ix86_abi != MS_ABI)
7260 {
7261 t = ix86_build_builtin_va_list_abi (MS_ABI);
7262 if (TREE_CODE (t) != RECORD_TYPE)
7263 t = build_variant_type_copy (t);
7264 ms_va_list_type_node = t;
7265 }
7266 else
7267 {
7268 t = ret;
7269 if (TREE_CODE (t) != RECORD_TYPE)
7270 t = build_variant_type_copy (t);
7271 ms_va_list_type_node = t;
7272 }
7273 }
7274
7275 return ret;
7276 }
7277
7278 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7279
7280 static void
7281 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7282 {
7283 rtx save_area, mem;
7284 alias_set_type set;
7285 int i, max;
7286
7287 /* GPR size of varargs save area. */
7288 if (cfun->va_list_gpr_size)
7289 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7290 else
7291 ix86_varargs_gpr_size = 0;
7292
7293 /* FPR size of varargs save area. We don't need it if we don't pass
7294 anything in SSE registers. */
7295 if (TARGET_SSE && cfun->va_list_fpr_size)
7296 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7297 else
7298 ix86_varargs_fpr_size = 0;
7299
7300 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7301 return;
7302
7303 save_area = frame_pointer_rtx;
7304 set = get_varargs_alias_set ();
7305
7306 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7307 if (max > X86_64_REGPARM_MAX)
7308 max = X86_64_REGPARM_MAX;
7309
7310 for (i = cum->regno; i < max; i++)
7311 {
7312 mem = gen_rtx_MEM (Pmode,
7313 plus_constant (save_area, i * UNITS_PER_WORD));
7314 MEM_NOTRAP_P (mem) = 1;
7315 set_mem_alias_set (mem, set);
7316 emit_move_insn (mem, gen_rtx_REG (Pmode,
7317 x86_64_int_parameter_registers[i]));
7318 }
7319
7320 if (ix86_varargs_fpr_size)
7321 {
7322 enum machine_mode smode;
7323 rtx label, test;
7324
7325 /* Now emit code to save SSE registers. The AX parameter contains number
7326 of SSE parameter registers used to call this function, though all we
7327 actually check here is the zero/non-zero status. */
7328
7329 label = gen_label_rtx ();
7330 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7331 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7332 label));
7333
7334 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7335 we used movdqa (i.e. TImode) instead? Perhaps even better would
7336 be if we could determine the real mode of the data, via a hook
7337 into pass_stdarg. Ignore all that for now. */
7338 smode = V4SFmode;
7339 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7340 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7341
7342 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7343 if (max > X86_64_SSE_REGPARM_MAX)
7344 max = X86_64_SSE_REGPARM_MAX;
7345
7346 for (i = cum->sse_regno; i < max; ++i)
7347 {
7348 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7349 mem = gen_rtx_MEM (smode, mem);
7350 MEM_NOTRAP_P (mem) = 1;
7351 set_mem_alias_set (mem, set);
7352 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7353
7354 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7355 }
7356
7357 emit_label (label);
7358 }
7359 }
7360
7361 static void
7362 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7363 {
7364 alias_set_type set = get_varargs_alias_set ();
7365 int i;
7366
7367 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7368 {
7369 rtx reg, mem;
7370
7371 mem = gen_rtx_MEM (Pmode,
7372 plus_constant (virtual_incoming_args_rtx,
7373 i * UNITS_PER_WORD));
7374 MEM_NOTRAP_P (mem) = 1;
7375 set_mem_alias_set (mem, set);
7376
7377 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7378 emit_move_insn (mem, reg);
7379 }
7380 }
7381
7382 static void
7383 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7384 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7385 int no_rtl)
7386 {
7387 CUMULATIVE_ARGS next_cum;
7388 tree fntype;
7389
7390 /* This argument doesn't appear to be used anymore. Which is good,
7391 because the old code here didn't suppress rtl generation. */
7392 gcc_assert (!no_rtl);
7393
7394 if (!TARGET_64BIT)
7395 return;
7396
7397 fntype = TREE_TYPE (current_function_decl);
7398
7399 /* For varargs, we do not want to skip the dummy va_dcl argument.
7400 For stdargs, we do want to skip the last named argument. */
7401 next_cum = *cum;
7402 if (stdarg_p (fntype))
7403 ix86_function_arg_advance (&next_cum, mode, type, true);
7404
7405 if (cum->call_abi == MS_ABI)
7406 setup_incoming_varargs_ms_64 (&next_cum);
7407 else
7408 setup_incoming_varargs_64 (&next_cum);
7409 }
7410
7411 /* Checks if TYPE is of kind va_list char *. */
7412
7413 static bool
7414 is_va_list_char_pointer (tree type)
7415 {
7416 tree canonic;
7417
7418 /* For 32-bit it is always true. */
7419 if (!TARGET_64BIT)
7420 return true;
7421 canonic = ix86_canonical_va_list_type (type);
7422 return (canonic == ms_va_list_type_node
7423 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7424 }
7425
7426 /* Implement va_start. */
7427
7428 static void
7429 ix86_va_start (tree valist, rtx nextarg)
7430 {
7431 HOST_WIDE_INT words, n_gpr, n_fpr;
7432 tree f_gpr, f_fpr, f_ovf, f_sav;
7433 tree gpr, fpr, ovf, sav, t;
7434 tree type;
7435 rtx ovf_rtx;
7436
7437 if (flag_split_stack
7438 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7439 {
7440 unsigned int scratch_regno;
7441
7442 /* When we are splitting the stack, we can't refer to the stack
7443 arguments using internal_arg_pointer, because they may be on
7444 the old stack. The split stack prologue will arrange to
7445 leave a pointer to the old stack arguments in a scratch
7446 register, which we here copy to a pseudo-register. The split
7447 stack prologue can't set the pseudo-register directly because
7448 it (the prologue) runs before any registers have been saved. */
7449
7450 scratch_regno = split_stack_prologue_scratch_regno ();
7451 if (scratch_regno != INVALID_REGNUM)
7452 {
7453 rtx reg, seq;
7454
7455 reg = gen_reg_rtx (Pmode);
7456 cfun->machine->split_stack_varargs_pointer = reg;
7457
7458 start_sequence ();
7459 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7460 seq = get_insns ();
7461 end_sequence ();
7462
7463 push_topmost_sequence ();
7464 emit_insn_after (seq, entry_of_function ());
7465 pop_topmost_sequence ();
7466 }
7467 }
7468
7469 /* Only 64bit target needs something special. */
7470 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7471 {
7472 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7473 std_expand_builtin_va_start (valist, nextarg);
7474 else
7475 {
7476 rtx va_r, next;
7477
7478 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7479 next = expand_binop (ptr_mode, add_optab,
7480 cfun->machine->split_stack_varargs_pointer,
7481 crtl->args.arg_offset_rtx,
7482 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7483 convert_move (va_r, next, 0);
7484 }
7485 return;
7486 }
7487
7488 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7489 f_fpr = DECL_CHAIN (f_gpr);
7490 f_ovf = DECL_CHAIN (f_fpr);
7491 f_sav = DECL_CHAIN (f_ovf);
7492
7493 valist = build_simple_mem_ref (valist);
7494 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7495 /* The following should be folded into the MEM_REF offset. */
7496 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7497 f_gpr, NULL_TREE);
7498 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7499 f_fpr, NULL_TREE);
7500 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7501 f_ovf, NULL_TREE);
7502 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7503 f_sav, NULL_TREE);
7504
7505 /* Count number of gp and fp argument registers used. */
7506 words = crtl->args.info.words;
7507 n_gpr = crtl->args.info.regno;
7508 n_fpr = crtl->args.info.sse_regno;
7509
7510 if (cfun->va_list_gpr_size)
7511 {
7512 type = TREE_TYPE (gpr);
7513 t = build2 (MODIFY_EXPR, type,
7514 gpr, build_int_cst (type, n_gpr * 8));
7515 TREE_SIDE_EFFECTS (t) = 1;
7516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7517 }
7518
7519 if (TARGET_SSE && cfun->va_list_fpr_size)
7520 {
7521 type = TREE_TYPE (fpr);
7522 t = build2 (MODIFY_EXPR, type, fpr,
7523 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7524 TREE_SIDE_EFFECTS (t) = 1;
7525 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7526 }
7527
7528 /* Find the overflow area. */
7529 type = TREE_TYPE (ovf);
7530 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7531 ovf_rtx = crtl->args.internal_arg_pointer;
7532 else
7533 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7534 t = make_tree (type, ovf_rtx);
7535 if (words != 0)
7536 t = build2 (POINTER_PLUS_EXPR, type, t,
7537 size_int (words * UNITS_PER_WORD));
7538 t = build2 (MODIFY_EXPR, type, ovf, t);
7539 TREE_SIDE_EFFECTS (t) = 1;
7540 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7541
7542 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7543 {
7544 /* Find the register save area.
7545 Prologue of the function save it right above stack frame. */
7546 type = TREE_TYPE (sav);
7547 t = make_tree (type, frame_pointer_rtx);
7548 if (!ix86_varargs_gpr_size)
7549 t = build2 (POINTER_PLUS_EXPR, type, t,
7550 size_int (-8 * X86_64_REGPARM_MAX));
7551 t = build2 (MODIFY_EXPR, type, sav, t);
7552 TREE_SIDE_EFFECTS (t) = 1;
7553 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7554 }
7555 }
7556
7557 /* Implement va_arg. */
7558
7559 static tree
7560 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7561 gimple_seq *post_p)
7562 {
7563 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7564 tree f_gpr, f_fpr, f_ovf, f_sav;
7565 tree gpr, fpr, ovf, sav, t;
7566 int size, rsize;
7567 tree lab_false, lab_over = NULL_TREE;
7568 tree addr, t2;
7569 rtx container;
7570 int indirect_p = 0;
7571 tree ptrtype;
7572 enum machine_mode nat_mode;
7573 unsigned int arg_boundary;
7574
7575 /* Only 64bit target needs something special. */
7576 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7577 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7578
7579 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7580 f_fpr = DECL_CHAIN (f_gpr);
7581 f_ovf = DECL_CHAIN (f_fpr);
7582 f_sav = DECL_CHAIN (f_ovf);
7583
7584 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7585 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7586 valist = build_va_arg_indirect_ref (valist);
7587 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7588 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7589 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7590
7591 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7592 if (indirect_p)
7593 type = build_pointer_type (type);
7594 size = int_size_in_bytes (type);
7595 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7596
7597 nat_mode = type_natural_mode (type, NULL);
7598 switch (nat_mode)
7599 {
7600 case V8SFmode:
7601 case V8SImode:
7602 case V32QImode:
7603 case V16HImode:
7604 case V4DFmode:
7605 case V4DImode:
7606 /* Unnamed 256bit vector mode parameters are passed on stack. */
7607 if (!TARGET_64BIT_MS_ABI)
7608 {
7609 container = NULL;
7610 break;
7611 }
7612
7613 default:
7614 container = construct_container (nat_mode, TYPE_MODE (type),
7615 type, 0, X86_64_REGPARM_MAX,
7616 X86_64_SSE_REGPARM_MAX, intreg,
7617 0);
7618 break;
7619 }
7620
7621 /* Pull the value out of the saved registers. */
7622
7623 addr = create_tmp_var (ptr_type_node, "addr");
7624
7625 if (container)
7626 {
7627 int needed_intregs, needed_sseregs;
7628 bool need_temp;
7629 tree int_addr, sse_addr;
7630
7631 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7632 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7633
7634 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7635
7636 need_temp = (!REG_P (container)
7637 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7638 || TYPE_ALIGN (type) > 128));
7639
7640 /* In case we are passing structure, verify that it is consecutive block
7641 on the register save area. If not we need to do moves. */
7642 if (!need_temp && !REG_P (container))
7643 {
7644 /* Verify that all registers are strictly consecutive */
7645 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7646 {
7647 int i;
7648
7649 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7650 {
7651 rtx slot = XVECEXP (container, 0, i);
7652 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7653 || INTVAL (XEXP (slot, 1)) != i * 16)
7654 need_temp = 1;
7655 }
7656 }
7657 else
7658 {
7659 int i;
7660
7661 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7662 {
7663 rtx slot = XVECEXP (container, 0, i);
7664 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7665 || INTVAL (XEXP (slot, 1)) != i * 8)
7666 need_temp = 1;
7667 }
7668 }
7669 }
7670 if (!need_temp)
7671 {
7672 int_addr = addr;
7673 sse_addr = addr;
7674 }
7675 else
7676 {
7677 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7678 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7679 }
7680
7681 /* First ensure that we fit completely in registers. */
7682 if (needed_intregs)
7683 {
7684 t = build_int_cst (TREE_TYPE (gpr),
7685 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7686 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7687 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7688 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7689 gimplify_and_add (t, pre_p);
7690 }
7691 if (needed_sseregs)
7692 {
7693 t = build_int_cst (TREE_TYPE (fpr),
7694 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7695 + X86_64_REGPARM_MAX * 8);
7696 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7697 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7698 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7699 gimplify_and_add (t, pre_p);
7700 }
7701
7702 /* Compute index to start of area used for integer regs. */
7703 if (needed_intregs)
7704 {
7705 /* int_addr = gpr + sav; */
7706 t = fold_convert (sizetype, gpr);
7707 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7708 gimplify_assign (int_addr, t, pre_p);
7709 }
7710 if (needed_sseregs)
7711 {
7712 /* sse_addr = fpr + sav; */
7713 t = fold_convert (sizetype, fpr);
7714 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7715 gimplify_assign (sse_addr, t, pre_p);
7716 }
7717 if (need_temp)
7718 {
7719 int i, prev_size = 0;
7720 tree temp = create_tmp_var (type, "va_arg_tmp");
7721
7722 /* addr = &temp; */
7723 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7724 gimplify_assign (addr, t, pre_p);
7725
7726 for (i = 0; i < XVECLEN (container, 0); i++)
7727 {
7728 rtx slot = XVECEXP (container, 0, i);
7729 rtx reg = XEXP (slot, 0);
7730 enum machine_mode mode = GET_MODE (reg);
7731 tree piece_type;
7732 tree addr_type;
7733 tree daddr_type;
7734 tree src_addr, src;
7735 int src_offset;
7736 tree dest_addr, dest;
7737 int cur_size = GET_MODE_SIZE (mode);
7738
7739 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7740 prev_size = INTVAL (XEXP (slot, 1));
7741 if (prev_size + cur_size > size)
7742 {
7743 cur_size = size - prev_size;
7744 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7745 if (mode == BLKmode)
7746 mode = QImode;
7747 }
7748 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7749 if (mode == GET_MODE (reg))
7750 addr_type = build_pointer_type (piece_type);
7751 else
7752 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7753 true);
7754 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7755 true);
7756
7757 if (SSE_REGNO_P (REGNO (reg)))
7758 {
7759 src_addr = sse_addr;
7760 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7761 }
7762 else
7763 {
7764 src_addr = int_addr;
7765 src_offset = REGNO (reg) * 8;
7766 }
7767 src_addr = fold_convert (addr_type, src_addr);
7768 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7769 size_int (src_offset));
7770
7771 dest_addr = fold_convert (daddr_type, addr);
7772 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7773 size_int (prev_size));
7774 if (cur_size == GET_MODE_SIZE (mode))
7775 {
7776 src = build_va_arg_indirect_ref (src_addr);
7777 dest = build_va_arg_indirect_ref (dest_addr);
7778
7779 gimplify_assign (dest, src, pre_p);
7780 }
7781 else
7782 {
7783 tree copy
7784 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7785 3, dest_addr, src_addr,
7786 size_int (cur_size));
7787 gimplify_and_add (copy, pre_p);
7788 }
7789 prev_size += cur_size;
7790 }
7791 }
7792
7793 if (needed_intregs)
7794 {
7795 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7796 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7797 gimplify_assign (gpr, t, pre_p);
7798 }
7799
7800 if (needed_sseregs)
7801 {
7802 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7803 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7804 gimplify_assign (fpr, t, pre_p);
7805 }
7806
7807 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7808
7809 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7810 }
7811
7812 /* ... otherwise out of the overflow area. */
7813
7814 /* When we align parameter on stack for caller, if the parameter
7815 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7816 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7817 here with caller. */
7818 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7819 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7820 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7821
7822 /* Care for on-stack alignment if needed. */
7823 if (arg_boundary <= 64 || size == 0)
7824 t = ovf;
7825 else
7826 {
7827 HOST_WIDE_INT align = arg_boundary / 8;
7828 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7829 size_int (align - 1));
7830 t = fold_convert (sizetype, t);
7831 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7832 size_int (-align));
7833 t = fold_convert (TREE_TYPE (ovf), t);
7834 }
7835
7836 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7837 gimplify_assign (addr, t, pre_p);
7838
7839 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7840 size_int (rsize * UNITS_PER_WORD));
7841 gimplify_assign (unshare_expr (ovf), t, pre_p);
7842
7843 if (container)
7844 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7845
7846 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7847 addr = fold_convert (ptrtype, addr);
7848
7849 if (indirect_p)
7850 addr = build_va_arg_indirect_ref (addr);
7851 return build_va_arg_indirect_ref (addr);
7852 }
7853 \f
7854 /* Return true if OPNUM's MEM should be matched
7855 in movabs* patterns. */
7856
7857 bool
7858 ix86_check_movabs (rtx insn, int opnum)
7859 {
7860 rtx set, mem;
7861
7862 set = PATTERN (insn);
7863 if (GET_CODE (set) == PARALLEL)
7864 set = XVECEXP (set, 0, 0);
7865 gcc_assert (GET_CODE (set) == SET);
7866 mem = XEXP (set, opnum);
7867 while (GET_CODE (mem) == SUBREG)
7868 mem = SUBREG_REG (mem);
7869 gcc_assert (MEM_P (mem));
7870 return volatile_ok || !MEM_VOLATILE_P (mem);
7871 }
7872 \f
7873 /* Initialize the table of extra 80387 mathematical constants. */
7874
7875 static void
7876 init_ext_80387_constants (void)
7877 {
7878 static const char * cst[5] =
7879 {
7880 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7881 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7882 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7883 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7884 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7885 };
7886 int i;
7887
7888 for (i = 0; i < 5; i++)
7889 {
7890 real_from_string (&ext_80387_constants_table[i], cst[i]);
7891 /* Ensure each constant is rounded to XFmode precision. */
7892 real_convert (&ext_80387_constants_table[i],
7893 XFmode, &ext_80387_constants_table[i]);
7894 }
7895
7896 ext_80387_constants_init = 1;
7897 }
7898
7899 /* Return non-zero if the constant is something that
7900 can be loaded with a special instruction. */
7901
7902 int
7903 standard_80387_constant_p (rtx x)
7904 {
7905 enum machine_mode mode = GET_MODE (x);
7906
7907 REAL_VALUE_TYPE r;
7908
7909 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7910 return -1;
7911
7912 if (x == CONST0_RTX (mode))
7913 return 1;
7914 if (x == CONST1_RTX (mode))
7915 return 2;
7916
7917 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7918
7919 /* For XFmode constants, try to find a special 80387 instruction when
7920 optimizing for size or on those CPUs that benefit from them. */
7921 if (mode == XFmode
7922 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7923 {
7924 int i;
7925
7926 if (! ext_80387_constants_init)
7927 init_ext_80387_constants ();
7928
7929 for (i = 0; i < 5; i++)
7930 if (real_identical (&r, &ext_80387_constants_table[i]))
7931 return i + 3;
7932 }
7933
7934 /* Load of the constant -0.0 or -1.0 will be split as
7935 fldz;fchs or fld1;fchs sequence. */
7936 if (real_isnegzero (&r))
7937 return 8;
7938 if (real_identical (&r, &dconstm1))
7939 return 9;
7940
7941 return 0;
7942 }
7943
7944 /* Return the opcode of the special instruction to be used to load
7945 the constant X. */
7946
7947 const char *
7948 standard_80387_constant_opcode (rtx x)
7949 {
7950 switch (standard_80387_constant_p (x))
7951 {
7952 case 1:
7953 return "fldz";
7954 case 2:
7955 return "fld1";
7956 case 3:
7957 return "fldlg2";
7958 case 4:
7959 return "fldln2";
7960 case 5:
7961 return "fldl2e";
7962 case 6:
7963 return "fldl2t";
7964 case 7:
7965 return "fldpi";
7966 case 8:
7967 case 9:
7968 return "#";
7969 default:
7970 gcc_unreachable ();
7971 }
7972 }
7973
7974 /* Return the CONST_DOUBLE representing the 80387 constant that is
7975 loaded by the specified special instruction. The argument IDX
7976 matches the return value from standard_80387_constant_p. */
7977
7978 rtx
7979 standard_80387_constant_rtx (int idx)
7980 {
7981 int i;
7982
7983 if (! ext_80387_constants_init)
7984 init_ext_80387_constants ();
7985
7986 switch (idx)
7987 {
7988 case 3:
7989 case 4:
7990 case 5:
7991 case 6:
7992 case 7:
7993 i = idx - 3;
7994 break;
7995
7996 default:
7997 gcc_unreachable ();
7998 }
7999
8000 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8001 XFmode);
8002 }
8003
8004 /* Return 1 if X is all 0s and 2 if x is all 1s
8005 in supported SSE vector mode. */
8006
8007 int
8008 standard_sse_constant_p (rtx x)
8009 {
8010 enum machine_mode mode = GET_MODE (x);
8011
8012 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8013 return 1;
8014 if (vector_all_ones_operand (x, mode))
8015 switch (mode)
8016 {
8017 case V16QImode:
8018 case V8HImode:
8019 case V4SImode:
8020 case V2DImode:
8021 if (TARGET_SSE2)
8022 return 2;
8023 default:
8024 break;
8025 }
8026
8027 return 0;
8028 }
8029
8030 /* Return the opcode of the special instruction to be used to load
8031 the constant X. */
8032
8033 const char *
8034 standard_sse_constant_opcode (rtx insn, rtx x)
8035 {
8036 switch (standard_sse_constant_p (x))
8037 {
8038 case 1:
8039 switch (get_attr_mode (insn))
8040 {
8041 case MODE_TI:
8042 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8043 return "%vpxor\t%0, %d0";
8044 case MODE_V2DF:
8045 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8046 return "%vxorpd\t%0, %d0";
8047 case MODE_V4SF:
8048 return "%vxorps\t%0, %d0";
8049
8050 case MODE_OI:
8051 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8052 return "vpxor\t%x0, %x0, %x0";
8053 case MODE_V4DF:
8054 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8055 return "vxorpd\t%x0, %x0, %x0";
8056 case MODE_V8SF:
8057 return "vxorps\t%x0, %x0, %x0";
8058
8059 default:
8060 break;
8061 }
8062
8063 case 2:
8064 return "%vpcmpeqd\t%0, %d0";
8065 default:
8066 break;
8067 }
8068 gcc_unreachable ();
8069 }
8070
8071 /* Returns true if OP contains a symbol reference */
8072
8073 bool
8074 symbolic_reference_mentioned_p (rtx op)
8075 {
8076 const char *fmt;
8077 int i;
8078
8079 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8080 return true;
8081
8082 fmt = GET_RTX_FORMAT (GET_CODE (op));
8083 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8084 {
8085 if (fmt[i] == 'E')
8086 {
8087 int j;
8088
8089 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8090 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8091 return true;
8092 }
8093
8094 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8095 return true;
8096 }
8097
8098 return false;
8099 }
8100
8101 /* Return true if it is appropriate to emit `ret' instructions in the
8102 body of a function. Do this only if the epilogue is simple, needing a
8103 couple of insns. Prior to reloading, we can't tell how many registers
8104 must be saved, so return false then. Return false if there is no frame
8105 marker to de-allocate. */
8106
8107 bool
8108 ix86_can_use_return_insn_p (void)
8109 {
8110 struct ix86_frame frame;
8111
8112 if (! reload_completed || frame_pointer_needed)
8113 return 0;
8114
8115 /* Don't allow more than 32k pop, since that's all we can do
8116 with one instruction. */
8117 if (crtl->args.pops_args && crtl->args.size >= 32768)
8118 return 0;
8119
8120 ix86_compute_frame_layout (&frame);
8121 return (frame.stack_pointer_offset == UNITS_PER_WORD
8122 && (frame.nregs + frame.nsseregs) == 0);
8123 }
8124 \f
8125 /* Value should be nonzero if functions must have frame pointers.
8126 Zero means the frame pointer need not be set up (and parms may
8127 be accessed via the stack pointer) in functions that seem suitable. */
8128
8129 static bool
8130 ix86_frame_pointer_required (void)
8131 {
8132 /* If we accessed previous frames, then the generated code expects
8133 to be able to access the saved ebp value in our frame. */
8134 if (cfun->machine->accesses_prev_frame)
8135 return true;
8136
8137 /* Several x86 os'es need a frame pointer for other reasons,
8138 usually pertaining to setjmp. */
8139 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8140 return true;
8141
8142 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8143 turns off the frame pointer by default. Turn it back on now if
8144 we've not got a leaf function. */
8145 if (TARGET_OMIT_LEAF_FRAME_POINTER
8146 && (!current_function_is_leaf
8147 || ix86_current_function_calls_tls_descriptor))
8148 return true;
8149
8150 if (crtl->profile && !flag_fentry)
8151 return true;
8152
8153 return false;
8154 }
8155
8156 /* Record that the current function accesses previous call frames. */
8157
8158 void
8159 ix86_setup_frame_addresses (void)
8160 {
8161 cfun->machine->accesses_prev_frame = 1;
8162 }
8163 \f
8164 #ifndef USE_HIDDEN_LINKONCE
8165 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8166 # define USE_HIDDEN_LINKONCE 1
8167 # else
8168 # define USE_HIDDEN_LINKONCE 0
8169 # endif
8170 #endif
8171
8172 static int pic_labels_used;
8173
8174 /* Fills in the label name that should be used for a pc thunk for
8175 the given register. */
8176
8177 static void
8178 get_pc_thunk_name (char name[32], unsigned int regno)
8179 {
8180 gcc_assert (!TARGET_64BIT);
8181
8182 if (USE_HIDDEN_LINKONCE)
8183 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8184 else
8185 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8186 }
8187
8188
8189 /* This function generates code for -fpic that loads %ebx with
8190 the return address of the caller and then returns. */
8191
8192 static void
8193 ix86_code_end (void)
8194 {
8195 rtx xops[2];
8196 int regno;
8197
8198 #ifdef TARGET_SOLARIS
8199 solaris_code_end ();
8200 #endif
8201
8202 for (regno = AX_REG; regno <= SP_REG; regno++)
8203 {
8204 char name[32];
8205 tree decl;
8206
8207 if (!(pic_labels_used & (1 << regno)))
8208 continue;
8209
8210 get_pc_thunk_name (name, regno);
8211
8212 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8213 get_identifier (name),
8214 build_function_type_list (void_type_node, NULL_TREE));
8215 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8216 NULL_TREE, void_type_node);
8217 TREE_PUBLIC (decl) = 1;
8218 TREE_STATIC (decl) = 1;
8219
8220 #if TARGET_MACHO
8221 if (TARGET_MACHO)
8222 {
8223 switch_to_section (darwin_sections[text_coal_section]);
8224 fputs ("\t.weak_definition\t", asm_out_file);
8225 assemble_name (asm_out_file, name);
8226 fputs ("\n\t.private_extern\t", asm_out_file);
8227 assemble_name (asm_out_file, name);
8228 putc ('\n', asm_out_file);
8229 ASM_OUTPUT_LABEL (asm_out_file, name);
8230 DECL_WEAK (decl) = 1;
8231 }
8232 else
8233 #endif
8234 if (USE_HIDDEN_LINKONCE)
8235 {
8236 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8237
8238 targetm.asm_out.unique_section (decl, 0);
8239 switch_to_section (get_named_section (decl, NULL, 0));
8240
8241 targetm.asm_out.globalize_label (asm_out_file, name);
8242 fputs ("\t.hidden\t", asm_out_file);
8243 assemble_name (asm_out_file, name);
8244 putc ('\n', asm_out_file);
8245 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8246 }
8247 else
8248 {
8249 switch_to_section (text_section);
8250 ASM_OUTPUT_LABEL (asm_out_file, name);
8251 }
8252
8253 DECL_INITIAL (decl) = make_node (BLOCK);
8254 current_function_decl = decl;
8255 init_function_start (decl);
8256 first_function_block_is_cold = false;
8257 /* Make sure unwind info is emitted for the thunk if needed. */
8258 final_start_function (emit_barrier (), asm_out_file, 1);
8259
8260 /* Pad stack IP move with 4 instructions (two NOPs count
8261 as one instruction). */
8262 if (TARGET_PAD_SHORT_FUNCTION)
8263 {
8264 int i = 8;
8265
8266 while (i--)
8267 fputs ("\tnop\n", asm_out_file);
8268 }
8269
8270 xops[0] = gen_rtx_REG (Pmode, regno);
8271 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8272 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8273 fputs ("\tret\n", asm_out_file);
8274 final_end_function ();
8275 init_insn_lengths ();
8276 free_after_compilation (cfun);
8277 set_cfun (NULL);
8278 current_function_decl = NULL;
8279 }
8280
8281 if (flag_split_stack)
8282 file_end_indicate_split_stack ();
8283 }
8284
8285 /* Emit code for the SET_GOT patterns. */
8286
8287 const char *
8288 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8289 {
8290 rtx xops[3];
8291
8292 xops[0] = dest;
8293
8294 if (TARGET_VXWORKS_RTP && flag_pic)
8295 {
8296 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8297 xops[2] = gen_rtx_MEM (Pmode,
8298 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8299 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8300
8301 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8302 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8303 an unadorned address. */
8304 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8305 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8306 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8307 return "";
8308 }
8309
8310 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8311
8312 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8313 {
8314 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8315
8316 if (!flag_pic)
8317 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8318 else
8319 {
8320 output_asm_insn ("call\t%a2", xops);
8321 #ifdef DWARF2_UNWIND_INFO
8322 /* The call to next label acts as a push. */
8323 if (dwarf2out_do_frame ())
8324 {
8325 rtx insn;
8326 start_sequence ();
8327 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8328 gen_rtx_PLUS (Pmode,
8329 stack_pointer_rtx,
8330 GEN_INT (-4))));
8331 RTX_FRAME_RELATED_P (insn) = 1;
8332 dwarf2out_frame_debug (insn, true);
8333 end_sequence ();
8334 }
8335 #endif
8336 }
8337
8338 #if TARGET_MACHO
8339 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8340 is what will be referenced by the Mach-O PIC subsystem. */
8341 if (!label)
8342 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8343 #endif
8344
8345 targetm.asm_out.internal_label (asm_out_file, "L",
8346 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8347
8348 if (flag_pic)
8349 {
8350 output_asm_insn ("pop%z0\t%0", xops);
8351 #ifdef DWARF2_UNWIND_INFO
8352 /* The pop is a pop and clobbers dest, but doesn't restore it
8353 for unwind info purposes. */
8354 if (dwarf2out_do_frame ())
8355 {
8356 rtx insn;
8357 start_sequence ();
8358 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8359 dwarf2out_frame_debug (insn, true);
8360 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8361 gen_rtx_PLUS (Pmode,
8362 stack_pointer_rtx,
8363 GEN_INT (4))));
8364 RTX_FRAME_RELATED_P (insn) = 1;
8365 dwarf2out_frame_debug (insn, true);
8366 end_sequence ();
8367 }
8368 #endif
8369 }
8370 }
8371 else
8372 {
8373 char name[32];
8374 get_pc_thunk_name (name, REGNO (dest));
8375 pic_labels_used |= 1 << REGNO (dest);
8376
8377 #ifdef DWARF2_UNWIND_INFO
8378 /* Ensure all queued register saves are flushed before the
8379 call. */
8380 if (dwarf2out_do_frame ())
8381 dwarf2out_flush_queued_reg_saves ();
8382 #endif
8383 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8384 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8385 output_asm_insn ("call\t%X2", xops);
8386 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8387 is what will be referenced by the Mach-O PIC subsystem. */
8388 #if TARGET_MACHO
8389 if (!label)
8390 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8391 else
8392 targetm.asm_out.internal_label (asm_out_file, "L",
8393 CODE_LABEL_NUMBER (label));
8394 #endif
8395 }
8396
8397 if (TARGET_MACHO)
8398 return "";
8399
8400 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8401 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8402 else
8403 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8404
8405 return "";
8406 }
8407
8408 /* Generate an "push" pattern for input ARG. */
8409
8410 static rtx
8411 gen_push (rtx arg)
8412 {
8413 struct machine_function *m = cfun->machine;
8414
8415 if (m->fs.cfa_reg == stack_pointer_rtx)
8416 m->fs.cfa_offset += UNITS_PER_WORD;
8417 m->fs.sp_offset += UNITS_PER_WORD;
8418
8419 return gen_rtx_SET (VOIDmode,
8420 gen_rtx_MEM (Pmode,
8421 gen_rtx_PRE_DEC (Pmode,
8422 stack_pointer_rtx)),
8423 arg);
8424 }
8425
8426 /* Generate an "pop" pattern for input ARG. */
8427
8428 static rtx
8429 gen_pop (rtx arg)
8430 {
8431 return gen_rtx_SET (VOIDmode,
8432 arg,
8433 gen_rtx_MEM (Pmode,
8434 gen_rtx_POST_INC (Pmode,
8435 stack_pointer_rtx)));
8436 }
8437
8438 /* Return >= 0 if there is an unused call-clobbered register available
8439 for the entire function. */
8440
8441 static unsigned int
8442 ix86_select_alt_pic_regnum (void)
8443 {
8444 if (current_function_is_leaf
8445 && !crtl->profile
8446 && !ix86_current_function_calls_tls_descriptor)
8447 {
8448 int i, drap;
8449 /* Can't use the same register for both PIC and DRAP. */
8450 if (crtl->drap_reg)
8451 drap = REGNO (crtl->drap_reg);
8452 else
8453 drap = -1;
8454 for (i = 2; i >= 0; --i)
8455 if (i != drap && !df_regs_ever_live_p (i))
8456 return i;
8457 }
8458
8459 return INVALID_REGNUM;
8460 }
8461
8462 /* Return TRUE if we need to save REGNO. */
8463
8464 static bool
8465 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8466 {
8467 if (pic_offset_table_rtx
8468 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8469 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8470 || crtl->profile
8471 || crtl->calls_eh_return
8472 || crtl->uses_const_pool))
8473 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8474
8475 if (crtl->calls_eh_return && maybe_eh_return)
8476 {
8477 unsigned i;
8478 for (i = 0; ; i++)
8479 {
8480 unsigned test = EH_RETURN_DATA_REGNO (i);
8481 if (test == INVALID_REGNUM)
8482 break;
8483 if (test == regno)
8484 return true;
8485 }
8486 }
8487
8488 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8489 return true;
8490
8491 return (df_regs_ever_live_p (regno)
8492 && !call_used_regs[regno]
8493 && !fixed_regs[regno]
8494 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8495 }
8496
8497 /* Return number of saved general prupose registers. */
8498
8499 static int
8500 ix86_nsaved_regs (void)
8501 {
8502 int nregs = 0;
8503 int regno;
8504
8505 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8506 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8507 nregs ++;
8508 return nregs;
8509 }
8510
8511 /* Return number of saved SSE registrers. */
8512
8513 static int
8514 ix86_nsaved_sseregs (void)
8515 {
8516 int nregs = 0;
8517 int regno;
8518
8519 if (!TARGET_64BIT_MS_ABI)
8520 return 0;
8521 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8522 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8523 nregs ++;
8524 return nregs;
8525 }
8526
8527 /* Given FROM and TO register numbers, say whether this elimination is
8528 allowed. If stack alignment is needed, we can only replace argument
8529 pointer with hard frame pointer, or replace frame pointer with stack
8530 pointer. Otherwise, frame pointer elimination is automatically
8531 handled and all other eliminations are valid. */
8532
8533 static bool
8534 ix86_can_eliminate (const int from, const int to)
8535 {
8536 if (stack_realign_fp)
8537 return ((from == ARG_POINTER_REGNUM
8538 && to == HARD_FRAME_POINTER_REGNUM)
8539 || (from == FRAME_POINTER_REGNUM
8540 && to == STACK_POINTER_REGNUM));
8541 else
8542 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8543 }
8544
8545 /* Return the offset between two registers, one to be eliminated, and the other
8546 its replacement, at the start of a routine. */
8547
8548 HOST_WIDE_INT
8549 ix86_initial_elimination_offset (int from, int to)
8550 {
8551 struct ix86_frame frame;
8552 ix86_compute_frame_layout (&frame);
8553
8554 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8555 return frame.hard_frame_pointer_offset;
8556 else if (from == FRAME_POINTER_REGNUM
8557 && to == HARD_FRAME_POINTER_REGNUM)
8558 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8559 else
8560 {
8561 gcc_assert (to == STACK_POINTER_REGNUM);
8562
8563 if (from == ARG_POINTER_REGNUM)
8564 return frame.stack_pointer_offset;
8565
8566 gcc_assert (from == FRAME_POINTER_REGNUM);
8567 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8568 }
8569 }
8570
8571 /* In a dynamically-aligned function, we can't know the offset from
8572 stack pointer to frame pointer, so we must ensure that setjmp
8573 eliminates fp against the hard fp (%ebp) rather than trying to
8574 index from %esp up to the top of the frame across a gap that is
8575 of unknown (at compile-time) size. */
8576 static rtx
8577 ix86_builtin_setjmp_frame_value (void)
8578 {
8579 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8580 }
8581
8582 /* When using -fsplit-stack, the allocation routines set a field in
8583 the TCB to the bottom of the stack plus this much space, measured
8584 in bytes. */
8585
8586 #define SPLIT_STACK_AVAILABLE 256
8587
8588 /* Fill structure ix86_frame about frame of currently computed function. */
8589
8590 static void
8591 ix86_compute_frame_layout (struct ix86_frame *frame)
8592 {
8593 unsigned int stack_alignment_needed;
8594 HOST_WIDE_INT offset;
8595 unsigned int preferred_alignment;
8596 HOST_WIDE_INT size = get_frame_size ();
8597 HOST_WIDE_INT to_allocate;
8598
8599 frame->nregs = ix86_nsaved_regs ();
8600 frame->nsseregs = ix86_nsaved_sseregs ();
8601
8602 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8603 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8604
8605 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8606 function prologues and leaf. */
8607 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8608 && (!current_function_is_leaf || cfun->calls_alloca != 0
8609 || ix86_current_function_calls_tls_descriptor))
8610 {
8611 preferred_alignment = 16;
8612 stack_alignment_needed = 16;
8613 crtl->preferred_stack_boundary = 128;
8614 crtl->stack_alignment_needed = 128;
8615 }
8616
8617 gcc_assert (!size || stack_alignment_needed);
8618 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8619 gcc_assert (preferred_alignment <= stack_alignment_needed);
8620
8621 /* For SEH we have to limit the amount of code movement into the prologue.
8622 At present we do this via a BLOCKAGE, at which point there's very little
8623 scheduling that can be done, which means that there's very little point
8624 in doing anything except PUSHs. */
8625 if (TARGET_SEH)
8626 cfun->machine->use_fast_prologue_epilogue = false;
8627
8628 /* During reload iteration the amount of registers saved can change.
8629 Recompute the value as needed. Do not recompute when amount of registers
8630 didn't change as reload does multiple calls to the function and does not
8631 expect the decision to change within single iteration. */
8632 else if (!optimize_function_for_size_p (cfun)
8633 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8634 {
8635 int count = frame->nregs;
8636 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8637
8638 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8639
8640 /* The fast prologue uses move instead of push to save registers. This
8641 is significantly longer, but also executes faster as modern hardware
8642 can execute the moves in parallel, but can't do that for push/pop.
8643
8644 Be careful about choosing what prologue to emit: When function takes
8645 many instructions to execute we may use slow version as well as in
8646 case function is known to be outside hot spot (this is known with
8647 feedback only). Weight the size of function by number of registers
8648 to save as it is cheap to use one or two push instructions but very
8649 slow to use many of them. */
8650 if (count)
8651 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8652 if (node->frequency < NODE_FREQUENCY_NORMAL
8653 || (flag_branch_probabilities
8654 && node->frequency < NODE_FREQUENCY_HOT))
8655 cfun->machine->use_fast_prologue_epilogue = false;
8656 else
8657 cfun->machine->use_fast_prologue_epilogue
8658 = !expensive_function_p (count);
8659 }
8660 if (TARGET_PROLOGUE_USING_MOVE
8661 && cfun->machine->use_fast_prologue_epilogue)
8662 frame->save_regs_using_mov = true;
8663 else
8664 frame->save_regs_using_mov = false;
8665
8666 /* If static stack checking is enabled and done with probes, the registers
8667 need to be saved before allocating the frame. */
8668 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8669 frame->save_regs_using_mov = false;
8670
8671 /* Skip return address. */
8672 offset = UNITS_PER_WORD;
8673
8674 /* Skip pushed static chain. */
8675 if (ix86_static_chain_on_stack)
8676 offset += UNITS_PER_WORD;
8677
8678 /* Skip saved base pointer. */
8679 if (frame_pointer_needed)
8680 offset += UNITS_PER_WORD;
8681 frame->hfp_save_offset = offset;
8682
8683 /* The traditional frame pointer location is at the top of the frame. */
8684 frame->hard_frame_pointer_offset = offset;
8685
8686 /* Register save area */
8687 offset += frame->nregs * UNITS_PER_WORD;
8688 frame->reg_save_offset = offset;
8689
8690 /* Align and set SSE register save area. */
8691 if (frame->nsseregs)
8692 {
8693 /* The only ABI that has saved SSE registers (Win64) also has a
8694 16-byte aligned default stack, and thus we don't need to be
8695 within the re-aligned local stack frame to save them. */
8696 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8697 offset = (offset + 16 - 1) & -16;
8698 offset += frame->nsseregs * 16;
8699 }
8700 frame->sse_reg_save_offset = offset;
8701
8702 /* The re-aligned stack starts here. Values before this point are not
8703 directly comparable with values below this point. In order to make
8704 sure that no value happens to be the same before and after, force
8705 the alignment computation below to add a non-zero value. */
8706 if (stack_realign_fp)
8707 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8708
8709 /* Va-arg area */
8710 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8711 offset += frame->va_arg_size;
8712
8713 /* Align start of frame for local function. */
8714 if (stack_realign_fp
8715 || offset != frame->sse_reg_save_offset
8716 || size != 0
8717 || !current_function_is_leaf
8718 || cfun->calls_alloca
8719 || ix86_current_function_calls_tls_descriptor)
8720 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8721
8722 /* Frame pointer points here. */
8723 frame->frame_pointer_offset = offset;
8724
8725 offset += size;
8726
8727 /* Add outgoing arguments area. Can be skipped if we eliminated
8728 all the function calls as dead code.
8729 Skipping is however impossible when function calls alloca. Alloca
8730 expander assumes that last crtl->outgoing_args_size
8731 of stack frame are unused. */
8732 if (ACCUMULATE_OUTGOING_ARGS
8733 && (!current_function_is_leaf || cfun->calls_alloca
8734 || ix86_current_function_calls_tls_descriptor))
8735 {
8736 offset += crtl->outgoing_args_size;
8737 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8738 }
8739 else
8740 frame->outgoing_arguments_size = 0;
8741
8742 /* Align stack boundary. Only needed if we're calling another function
8743 or using alloca. */
8744 if (!current_function_is_leaf || cfun->calls_alloca
8745 || ix86_current_function_calls_tls_descriptor)
8746 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8747
8748 /* We've reached end of stack frame. */
8749 frame->stack_pointer_offset = offset;
8750
8751 /* Size prologue needs to allocate. */
8752 to_allocate = offset - frame->sse_reg_save_offset;
8753
8754 if ((!to_allocate && frame->nregs <= 1)
8755 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8756 frame->save_regs_using_mov = false;
8757
8758 if (ix86_using_red_zone ()
8759 && current_function_sp_is_unchanging
8760 && current_function_is_leaf
8761 && !ix86_current_function_calls_tls_descriptor)
8762 {
8763 frame->red_zone_size = to_allocate;
8764 if (frame->save_regs_using_mov)
8765 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8766 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8767 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8768 }
8769 else
8770 frame->red_zone_size = 0;
8771 frame->stack_pointer_offset -= frame->red_zone_size;
8772
8773 /* The SEH frame pointer location is near the bottom of the frame.
8774 This is enforced by the fact that the difference between the
8775 stack pointer and the frame pointer is limited to 240 bytes in
8776 the unwind data structure. */
8777 if (TARGET_SEH)
8778 {
8779 HOST_WIDE_INT diff;
8780
8781 /* If we can leave the frame pointer where it is, do so. */
8782 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8783 if (diff > 240 || (diff & 15) != 0)
8784 {
8785 /* Ideally we'd determine what portion of the local stack frame
8786 (within the constraint of the lowest 240) is most heavily used.
8787 But without that complication, simply bias the frame pointer
8788 by 128 bytes so as to maximize the amount of the local stack
8789 frame that is addressable with 8-bit offsets. */
8790 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8791 }
8792 }
8793 }
8794
8795 /* This is semi-inlined memory_address_length, but simplified
8796 since we know that we're always dealing with reg+offset, and
8797 to avoid having to create and discard all that rtl. */
8798
8799 static inline int
8800 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8801 {
8802 int len = 4;
8803
8804 if (offset == 0)
8805 {
8806 /* EBP and R13 cannot be encoded without an offset. */
8807 len = (regno == BP_REG || regno == R13_REG);
8808 }
8809 else if (IN_RANGE (offset, -128, 127))
8810 len = 1;
8811
8812 /* ESP and R12 must be encoded with a SIB byte. */
8813 if (regno == SP_REG || regno == R12_REG)
8814 len++;
8815
8816 return len;
8817 }
8818
8819 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8820 The valid base registers are taken from CFUN->MACHINE->FS. */
8821
8822 static rtx
8823 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8824 {
8825 const struct machine_function *m = cfun->machine;
8826 rtx base_reg = NULL;
8827 HOST_WIDE_INT base_offset = 0;
8828
8829 if (m->use_fast_prologue_epilogue)
8830 {
8831 /* Choose the base register most likely to allow the most scheduling
8832 opportunities. Generally FP is valid througout the function,
8833 while DRAP must be reloaded within the epilogue. But choose either
8834 over the SP due to increased encoding size. */
8835
8836 if (m->fs.fp_valid)
8837 {
8838 base_reg = hard_frame_pointer_rtx;
8839 base_offset = m->fs.fp_offset - cfa_offset;
8840 }
8841 else if (m->fs.drap_valid)
8842 {
8843 base_reg = crtl->drap_reg;
8844 base_offset = 0 - cfa_offset;
8845 }
8846 else if (m->fs.sp_valid)
8847 {
8848 base_reg = stack_pointer_rtx;
8849 base_offset = m->fs.sp_offset - cfa_offset;
8850 }
8851 }
8852 else
8853 {
8854 HOST_WIDE_INT toffset;
8855 int len = 16, tlen;
8856
8857 /* Choose the base register with the smallest address encoding.
8858 With a tie, choose FP > DRAP > SP. */
8859 if (m->fs.sp_valid)
8860 {
8861 base_reg = stack_pointer_rtx;
8862 base_offset = m->fs.sp_offset - cfa_offset;
8863 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8864 }
8865 if (m->fs.drap_valid)
8866 {
8867 toffset = 0 - cfa_offset;
8868 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8869 if (tlen <= len)
8870 {
8871 base_reg = crtl->drap_reg;
8872 base_offset = toffset;
8873 len = tlen;
8874 }
8875 }
8876 if (m->fs.fp_valid)
8877 {
8878 toffset = m->fs.fp_offset - cfa_offset;
8879 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8880 if (tlen <= len)
8881 {
8882 base_reg = hard_frame_pointer_rtx;
8883 base_offset = toffset;
8884 len = tlen;
8885 }
8886 }
8887 }
8888 gcc_assert (base_reg != NULL);
8889
8890 return plus_constant (base_reg, base_offset);
8891 }
8892
8893 /* Emit code to save registers in the prologue. */
8894
8895 static void
8896 ix86_emit_save_regs (void)
8897 {
8898 unsigned int regno;
8899 rtx insn;
8900
8901 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8902 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8903 {
8904 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8905 RTX_FRAME_RELATED_P (insn) = 1;
8906 }
8907 }
8908
8909 /* Emit a single register save at CFA - CFA_OFFSET. */
8910
8911 static void
8912 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8913 HOST_WIDE_INT cfa_offset)
8914 {
8915 struct machine_function *m = cfun->machine;
8916 rtx reg = gen_rtx_REG (mode, regno);
8917 rtx mem, addr, base, insn;
8918
8919 addr = choose_baseaddr (cfa_offset);
8920 mem = gen_frame_mem (mode, addr);
8921
8922 /* For SSE saves, we need to indicate the 128-bit alignment. */
8923 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8924
8925 insn = emit_move_insn (mem, reg);
8926 RTX_FRAME_RELATED_P (insn) = 1;
8927
8928 base = addr;
8929 if (GET_CODE (base) == PLUS)
8930 base = XEXP (base, 0);
8931 gcc_checking_assert (REG_P (base));
8932
8933 /* When saving registers into a re-aligned local stack frame, avoid
8934 any tricky guessing by dwarf2out. */
8935 if (m->fs.realigned)
8936 {
8937 gcc_checking_assert (stack_realign_drap);
8938
8939 if (regno == REGNO (crtl->drap_reg))
8940 {
8941 /* A bit of a hack. We force the DRAP register to be saved in
8942 the re-aligned stack frame, which provides us with a copy
8943 of the CFA that will last past the prologue. Install it. */
8944 gcc_checking_assert (cfun->machine->fs.fp_valid);
8945 addr = plus_constant (hard_frame_pointer_rtx,
8946 cfun->machine->fs.fp_offset - cfa_offset);
8947 mem = gen_rtx_MEM (mode, addr);
8948 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8949 }
8950 else
8951 {
8952 /* The frame pointer is a stable reference within the
8953 aligned frame. Use it. */
8954 gcc_checking_assert (cfun->machine->fs.fp_valid);
8955 addr = plus_constant (hard_frame_pointer_rtx,
8956 cfun->machine->fs.fp_offset - cfa_offset);
8957 mem = gen_rtx_MEM (mode, addr);
8958 add_reg_note (insn, REG_CFA_EXPRESSION,
8959 gen_rtx_SET (VOIDmode, mem, reg));
8960 }
8961 }
8962
8963 /* The memory may not be relative to the current CFA register,
8964 which means that we may need to generate a new pattern for
8965 use by the unwind info. */
8966 else if (base != m->fs.cfa_reg)
8967 {
8968 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8969 mem = gen_rtx_MEM (mode, addr);
8970 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8971 }
8972 }
8973
8974 /* Emit code to save registers using MOV insns.
8975 First register is stored at CFA - CFA_OFFSET. */
8976 static void
8977 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8978 {
8979 unsigned int regno;
8980
8981 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8982 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8983 {
8984 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8985 cfa_offset -= UNITS_PER_WORD;
8986 }
8987 }
8988
8989 /* Emit code to save SSE registers using MOV insns.
8990 First register is stored at CFA - CFA_OFFSET. */
8991 static void
8992 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8993 {
8994 unsigned int regno;
8995
8996 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8997 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8998 {
8999 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9000 cfa_offset -= 16;
9001 }
9002 }
9003
9004 static GTY(()) rtx queued_cfa_restores;
9005
9006 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9007 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9008 Don't add the note if the previously saved value will be left untouched
9009 within stack red-zone till return, as unwinders can find the same value
9010 in the register and on the stack. */
9011
9012 static void
9013 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9014 {
9015 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9016 return;
9017
9018 if (insn)
9019 {
9020 add_reg_note (insn, REG_CFA_RESTORE, reg);
9021 RTX_FRAME_RELATED_P (insn) = 1;
9022 }
9023 else
9024 queued_cfa_restores
9025 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9026 }
9027
9028 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9029
9030 static void
9031 ix86_add_queued_cfa_restore_notes (rtx insn)
9032 {
9033 rtx last;
9034 if (!queued_cfa_restores)
9035 return;
9036 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9037 ;
9038 XEXP (last, 1) = REG_NOTES (insn);
9039 REG_NOTES (insn) = queued_cfa_restores;
9040 queued_cfa_restores = NULL_RTX;
9041 RTX_FRAME_RELATED_P (insn) = 1;
9042 }
9043
9044 /* Expand prologue or epilogue stack adjustment.
9045 The pattern exist to put a dependency on all ebp-based memory accesses.
9046 STYLE should be negative if instructions should be marked as frame related,
9047 zero if %r11 register is live and cannot be freely used and positive
9048 otherwise. */
9049
9050 static void
9051 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9052 int style, bool set_cfa)
9053 {
9054 struct machine_function *m = cfun->machine;
9055 rtx insn;
9056 bool add_frame_related_expr = false;
9057
9058 if (! TARGET_64BIT)
9059 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9060 else if (x86_64_immediate_operand (offset, DImode))
9061 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9062 else
9063 {
9064 rtx tmp;
9065 /* r11 is used by indirect sibcall return as well, set before the
9066 epilogue and used after the epilogue. */
9067 if (style)
9068 tmp = gen_rtx_REG (DImode, R11_REG);
9069 else
9070 {
9071 gcc_assert (src != hard_frame_pointer_rtx
9072 && dest != hard_frame_pointer_rtx);
9073 tmp = hard_frame_pointer_rtx;
9074 }
9075 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9076 if (style < 0)
9077 add_frame_related_expr = true;
9078
9079 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9080 }
9081
9082 insn = emit_insn (insn);
9083 if (style >= 0)
9084 ix86_add_queued_cfa_restore_notes (insn);
9085
9086 if (set_cfa)
9087 {
9088 rtx r;
9089
9090 gcc_assert (m->fs.cfa_reg == src);
9091 m->fs.cfa_offset += INTVAL (offset);
9092 m->fs.cfa_reg = dest;
9093
9094 r = gen_rtx_PLUS (Pmode, src, offset);
9095 r = gen_rtx_SET (VOIDmode, dest, r);
9096 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9097 RTX_FRAME_RELATED_P (insn) = 1;
9098 }
9099 else if (style < 0)
9100 {
9101 RTX_FRAME_RELATED_P (insn) = 1;
9102 if (add_frame_related_expr)
9103 {
9104 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9105 r = gen_rtx_SET (VOIDmode, dest, r);
9106 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9107 }
9108 }
9109
9110 if (dest == stack_pointer_rtx)
9111 {
9112 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9113 bool valid = m->fs.sp_valid;
9114
9115 if (src == hard_frame_pointer_rtx)
9116 {
9117 valid = m->fs.fp_valid;
9118 ooffset = m->fs.fp_offset;
9119 }
9120 else if (src == crtl->drap_reg)
9121 {
9122 valid = m->fs.drap_valid;
9123 ooffset = 0;
9124 }
9125 else
9126 {
9127 /* Else there are two possibilities: SP itself, which we set
9128 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9129 taken care of this by hand along the eh_return path. */
9130 gcc_checking_assert (src == stack_pointer_rtx
9131 || offset == const0_rtx);
9132 }
9133
9134 m->fs.sp_offset = ooffset - INTVAL (offset);
9135 m->fs.sp_valid = valid;
9136 }
9137 }
9138
9139 /* Find an available register to be used as dynamic realign argument
9140 pointer regsiter. Such a register will be written in prologue and
9141 used in begin of body, so it must not be
9142 1. parameter passing register.
9143 2. GOT pointer.
9144 We reuse static-chain register if it is available. Otherwise, we
9145 use DI for i386 and R13 for x86-64. We chose R13 since it has
9146 shorter encoding.
9147
9148 Return: the regno of chosen register. */
9149
9150 static unsigned int
9151 find_drap_reg (void)
9152 {
9153 tree decl = cfun->decl;
9154
9155 if (TARGET_64BIT)
9156 {
9157 /* Use R13 for nested function or function need static chain.
9158 Since function with tail call may use any caller-saved
9159 registers in epilogue, DRAP must not use caller-saved
9160 register in such case. */
9161 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9162 return R13_REG;
9163
9164 return R10_REG;
9165 }
9166 else
9167 {
9168 /* Use DI for nested function or function need static chain.
9169 Since function with tail call may use any caller-saved
9170 registers in epilogue, DRAP must not use caller-saved
9171 register in such case. */
9172 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9173 return DI_REG;
9174
9175 /* Reuse static chain register if it isn't used for parameter
9176 passing. */
9177 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9178 {
9179 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9180 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9181 return CX_REG;
9182 }
9183 return DI_REG;
9184 }
9185 }
9186
9187 /* Return minimum incoming stack alignment. */
9188
9189 static unsigned int
9190 ix86_minimum_incoming_stack_boundary (bool sibcall)
9191 {
9192 unsigned int incoming_stack_boundary;
9193
9194 /* Prefer the one specified at command line. */
9195 if (ix86_user_incoming_stack_boundary)
9196 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9197 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9198 if -mstackrealign is used, it isn't used for sibcall check and
9199 estimated stack alignment is 128bit. */
9200 else if (!sibcall
9201 && !TARGET_64BIT
9202 && ix86_force_align_arg_pointer
9203 && crtl->stack_alignment_estimated == 128)
9204 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9205 else
9206 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9207
9208 /* Incoming stack alignment can be changed on individual functions
9209 via force_align_arg_pointer attribute. We use the smallest
9210 incoming stack boundary. */
9211 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9212 && lookup_attribute (ix86_force_align_arg_pointer_string,
9213 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9214 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9215
9216 /* The incoming stack frame has to be aligned at least at
9217 parm_stack_boundary. */
9218 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9219 incoming_stack_boundary = crtl->parm_stack_boundary;
9220
9221 /* Stack at entrance of main is aligned by runtime. We use the
9222 smallest incoming stack boundary. */
9223 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9224 && DECL_NAME (current_function_decl)
9225 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9226 && DECL_FILE_SCOPE_P (current_function_decl))
9227 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9228
9229 return incoming_stack_boundary;
9230 }
9231
9232 /* Update incoming stack boundary and estimated stack alignment. */
9233
9234 static void
9235 ix86_update_stack_boundary (void)
9236 {
9237 ix86_incoming_stack_boundary
9238 = ix86_minimum_incoming_stack_boundary (false);
9239
9240 /* x86_64 vararg needs 16byte stack alignment for register save
9241 area. */
9242 if (TARGET_64BIT
9243 && cfun->stdarg
9244 && crtl->stack_alignment_estimated < 128)
9245 crtl->stack_alignment_estimated = 128;
9246 }
9247
9248 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9249 needed or an rtx for DRAP otherwise. */
9250
9251 static rtx
9252 ix86_get_drap_rtx (void)
9253 {
9254 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9255 crtl->need_drap = true;
9256
9257 if (stack_realign_drap)
9258 {
9259 /* Assign DRAP to vDRAP and returns vDRAP */
9260 unsigned int regno = find_drap_reg ();
9261 rtx drap_vreg;
9262 rtx arg_ptr;
9263 rtx seq, insn;
9264
9265 arg_ptr = gen_rtx_REG (Pmode, regno);
9266 crtl->drap_reg = arg_ptr;
9267
9268 start_sequence ();
9269 drap_vreg = copy_to_reg (arg_ptr);
9270 seq = get_insns ();
9271 end_sequence ();
9272
9273 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9274 if (!optimize)
9275 {
9276 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9277 RTX_FRAME_RELATED_P (insn) = 1;
9278 }
9279 return drap_vreg;
9280 }
9281 else
9282 return NULL;
9283 }
9284
9285 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9286
9287 static rtx
9288 ix86_internal_arg_pointer (void)
9289 {
9290 return virtual_incoming_args_rtx;
9291 }
9292
9293 struct scratch_reg {
9294 rtx reg;
9295 bool saved;
9296 };
9297
9298 /* Return a short-lived scratch register for use on function entry.
9299 In 32-bit mode, it is valid only after the registers are saved
9300 in the prologue. This register must be released by means of
9301 release_scratch_register_on_entry once it is dead. */
9302
9303 static void
9304 get_scratch_register_on_entry (struct scratch_reg *sr)
9305 {
9306 int regno;
9307
9308 sr->saved = false;
9309
9310 if (TARGET_64BIT)
9311 {
9312 /* We always use R11 in 64-bit mode. */
9313 regno = R11_REG;
9314 }
9315 else
9316 {
9317 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9318 bool fastcall_p
9319 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9320 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9321 int regparm = ix86_function_regparm (fntype, decl);
9322 int drap_regno
9323 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9324
9325 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9326 for the static chain register. */
9327 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9328 && drap_regno != AX_REG)
9329 regno = AX_REG;
9330 else if (regparm < 2 && drap_regno != DX_REG)
9331 regno = DX_REG;
9332 /* ecx is the static chain register. */
9333 else if (regparm < 3 && !fastcall_p && !static_chain_p
9334 && drap_regno != CX_REG)
9335 regno = CX_REG;
9336 else if (ix86_save_reg (BX_REG, true))
9337 regno = BX_REG;
9338 /* esi is the static chain register. */
9339 else if (!(regparm == 3 && static_chain_p)
9340 && ix86_save_reg (SI_REG, true))
9341 regno = SI_REG;
9342 else if (ix86_save_reg (DI_REG, true))
9343 regno = DI_REG;
9344 else
9345 {
9346 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9347 sr->saved = true;
9348 }
9349 }
9350
9351 sr->reg = gen_rtx_REG (Pmode, regno);
9352 if (sr->saved)
9353 {
9354 rtx insn = emit_insn (gen_push (sr->reg));
9355 RTX_FRAME_RELATED_P (insn) = 1;
9356 }
9357 }
9358
9359 /* Release a scratch register obtained from the preceding function. */
9360
9361 static void
9362 release_scratch_register_on_entry (struct scratch_reg *sr)
9363 {
9364 if (sr->saved)
9365 {
9366 rtx x, insn = emit_insn (gen_pop (sr->reg));
9367
9368 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9369 RTX_FRAME_RELATED_P (insn) = 1;
9370 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9371 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9372 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9373 }
9374 }
9375
9376 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9377
9378 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9379
9380 static void
9381 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9382 {
9383 /* We skip the probe for the first interval + a small dope of 4 words and
9384 probe that many bytes past the specified size to maintain a protection
9385 area at the botton of the stack. */
9386 const int dope = 4 * UNITS_PER_WORD;
9387 rtx size_rtx = GEN_INT (size), last;
9388
9389 /* See if we have a constant small number of probes to generate. If so,
9390 that's the easy case. The run-time loop is made up of 11 insns in the
9391 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9392 for n # of intervals. */
9393 if (size <= 5 * PROBE_INTERVAL)
9394 {
9395 HOST_WIDE_INT i, adjust;
9396 bool first_probe = true;
9397
9398 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9399 values of N from 1 until it exceeds SIZE. If only one probe is
9400 needed, this will not generate any code. Then adjust and probe
9401 to PROBE_INTERVAL + SIZE. */
9402 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9403 {
9404 if (first_probe)
9405 {
9406 adjust = 2 * PROBE_INTERVAL + dope;
9407 first_probe = false;
9408 }
9409 else
9410 adjust = PROBE_INTERVAL;
9411
9412 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9413 plus_constant (stack_pointer_rtx, -adjust)));
9414 emit_stack_probe (stack_pointer_rtx);
9415 }
9416
9417 if (first_probe)
9418 adjust = size + PROBE_INTERVAL + dope;
9419 else
9420 adjust = size + PROBE_INTERVAL - i;
9421
9422 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9423 plus_constant (stack_pointer_rtx, -adjust)));
9424 emit_stack_probe (stack_pointer_rtx);
9425
9426 /* Adjust back to account for the additional first interval. */
9427 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9428 plus_constant (stack_pointer_rtx,
9429 PROBE_INTERVAL + dope)));
9430 }
9431
9432 /* Otherwise, do the same as above, but in a loop. Note that we must be
9433 extra careful with variables wrapping around because we might be at
9434 the very top (or the very bottom) of the address space and we have
9435 to be able to handle this case properly; in particular, we use an
9436 equality test for the loop condition. */
9437 else
9438 {
9439 HOST_WIDE_INT rounded_size;
9440 struct scratch_reg sr;
9441
9442 get_scratch_register_on_entry (&sr);
9443
9444
9445 /* Step 1: round SIZE to the previous multiple of the interval. */
9446
9447 rounded_size = size & -PROBE_INTERVAL;
9448
9449
9450 /* Step 2: compute initial and final value of the loop counter. */
9451
9452 /* SP = SP_0 + PROBE_INTERVAL. */
9453 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9454 plus_constant (stack_pointer_rtx,
9455 - (PROBE_INTERVAL + dope))));
9456
9457 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9458 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9459 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9460 gen_rtx_PLUS (Pmode, sr.reg,
9461 stack_pointer_rtx)));
9462
9463
9464 /* Step 3: the loop
9465
9466 while (SP != LAST_ADDR)
9467 {
9468 SP = SP + PROBE_INTERVAL
9469 probe at SP
9470 }
9471
9472 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9473 values of N from 1 until it is equal to ROUNDED_SIZE. */
9474
9475 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9476
9477
9478 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9479 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9480
9481 if (size != rounded_size)
9482 {
9483 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9484 plus_constant (stack_pointer_rtx,
9485 rounded_size - size)));
9486 emit_stack_probe (stack_pointer_rtx);
9487 }
9488
9489 /* Adjust back to account for the additional first interval. */
9490 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9491 plus_constant (stack_pointer_rtx,
9492 PROBE_INTERVAL + dope)));
9493
9494 release_scratch_register_on_entry (&sr);
9495 }
9496
9497 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9498
9499 /* Even if the stack pointer isn't the CFA register, we need to correctly
9500 describe the adjustments made to it, in particular differentiate the
9501 frame-related ones from the frame-unrelated ones. */
9502 if (size > 0)
9503 {
9504 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9505 XVECEXP (expr, 0, 0)
9506 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9507 plus_constant (stack_pointer_rtx, -size));
9508 XVECEXP (expr, 0, 1)
9509 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9510 plus_constant (stack_pointer_rtx,
9511 PROBE_INTERVAL + dope + size));
9512 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9513 RTX_FRAME_RELATED_P (last) = 1;
9514
9515 cfun->machine->fs.sp_offset += size;
9516 }
9517
9518 /* Make sure nothing is scheduled before we are done. */
9519 emit_insn (gen_blockage ());
9520 }
9521
9522 /* Adjust the stack pointer up to REG while probing it. */
9523
9524 const char *
9525 output_adjust_stack_and_probe (rtx reg)
9526 {
9527 static int labelno = 0;
9528 char loop_lab[32], end_lab[32];
9529 rtx xops[2];
9530
9531 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9532 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9533
9534 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9535
9536 /* Jump to END_LAB if SP == LAST_ADDR. */
9537 xops[0] = stack_pointer_rtx;
9538 xops[1] = reg;
9539 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9540 fputs ("\tje\t", asm_out_file);
9541 assemble_name_raw (asm_out_file, end_lab);
9542 fputc ('\n', asm_out_file);
9543
9544 /* SP = SP + PROBE_INTERVAL. */
9545 xops[1] = GEN_INT (PROBE_INTERVAL);
9546 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9547
9548 /* Probe at SP. */
9549 xops[1] = const0_rtx;
9550 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9551
9552 fprintf (asm_out_file, "\tjmp\t");
9553 assemble_name_raw (asm_out_file, loop_lab);
9554 fputc ('\n', asm_out_file);
9555
9556 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9557
9558 return "";
9559 }
9560
9561 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9562 inclusive. These are offsets from the current stack pointer. */
9563
9564 static void
9565 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9566 {
9567 /* See if we have a constant small number of probes to generate. If so,
9568 that's the easy case. The run-time loop is made up of 7 insns in the
9569 generic case while the compile-time loop is made up of n insns for n #
9570 of intervals. */
9571 if (size <= 7 * PROBE_INTERVAL)
9572 {
9573 HOST_WIDE_INT i;
9574
9575 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9576 it exceeds SIZE. If only one probe is needed, this will not
9577 generate any code. Then probe at FIRST + SIZE. */
9578 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9579 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9580
9581 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9582 }
9583
9584 /* Otherwise, do the same as above, but in a loop. Note that we must be
9585 extra careful with variables wrapping around because we might be at
9586 the very top (or the very bottom) of the address space and we have
9587 to be able to handle this case properly; in particular, we use an
9588 equality test for the loop condition. */
9589 else
9590 {
9591 HOST_WIDE_INT rounded_size, last;
9592 struct scratch_reg sr;
9593
9594 get_scratch_register_on_entry (&sr);
9595
9596
9597 /* Step 1: round SIZE to the previous multiple of the interval. */
9598
9599 rounded_size = size & -PROBE_INTERVAL;
9600
9601
9602 /* Step 2: compute initial and final value of the loop counter. */
9603
9604 /* TEST_OFFSET = FIRST. */
9605 emit_move_insn (sr.reg, GEN_INT (-first));
9606
9607 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9608 last = first + rounded_size;
9609
9610
9611 /* Step 3: the loop
9612
9613 while (TEST_ADDR != LAST_ADDR)
9614 {
9615 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9616 probe at TEST_ADDR
9617 }
9618
9619 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9620 until it is equal to ROUNDED_SIZE. */
9621
9622 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9623
9624
9625 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9626 that SIZE is equal to ROUNDED_SIZE. */
9627
9628 if (size != rounded_size)
9629 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9630 stack_pointer_rtx,
9631 sr.reg),
9632 rounded_size - size));
9633
9634 release_scratch_register_on_entry (&sr);
9635 }
9636
9637 /* Make sure nothing is scheduled before we are done. */
9638 emit_insn (gen_blockage ());
9639 }
9640
9641 /* Probe a range of stack addresses from REG to END, inclusive. These are
9642 offsets from the current stack pointer. */
9643
9644 const char *
9645 output_probe_stack_range (rtx reg, rtx end)
9646 {
9647 static int labelno = 0;
9648 char loop_lab[32], end_lab[32];
9649 rtx xops[3];
9650
9651 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9652 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9653
9654 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9655
9656 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9657 xops[0] = reg;
9658 xops[1] = end;
9659 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9660 fputs ("\tje\t", asm_out_file);
9661 assemble_name_raw (asm_out_file, end_lab);
9662 fputc ('\n', asm_out_file);
9663
9664 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9665 xops[1] = GEN_INT (PROBE_INTERVAL);
9666 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9667
9668 /* Probe at TEST_ADDR. */
9669 xops[0] = stack_pointer_rtx;
9670 xops[1] = reg;
9671 xops[2] = const0_rtx;
9672 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9673
9674 fprintf (asm_out_file, "\tjmp\t");
9675 assemble_name_raw (asm_out_file, loop_lab);
9676 fputc ('\n', asm_out_file);
9677
9678 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9679
9680 return "";
9681 }
9682
9683 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9684 to be generated in correct form. */
9685 static void
9686 ix86_finalize_stack_realign_flags (void)
9687 {
9688 /* Check if stack realign is really needed after reload, and
9689 stores result in cfun */
9690 unsigned int incoming_stack_boundary
9691 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9692 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9693 unsigned int stack_realign = (incoming_stack_boundary
9694 < (current_function_is_leaf
9695 ? crtl->max_used_stack_slot_alignment
9696 : crtl->stack_alignment_needed));
9697
9698 if (crtl->stack_realign_finalized)
9699 {
9700 /* After stack_realign_needed is finalized, we can't no longer
9701 change it. */
9702 gcc_assert (crtl->stack_realign_needed == stack_realign);
9703 }
9704 else
9705 {
9706 crtl->stack_realign_needed = stack_realign;
9707 crtl->stack_realign_finalized = true;
9708 }
9709 }
9710
9711 /* Expand the prologue into a bunch of separate insns. */
9712
9713 void
9714 ix86_expand_prologue (void)
9715 {
9716 struct machine_function *m = cfun->machine;
9717 rtx insn, t;
9718 bool pic_reg_used;
9719 struct ix86_frame frame;
9720 HOST_WIDE_INT allocate;
9721 bool int_registers_saved;
9722
9723 ix86_finalize_stack_realign_flags ();
9724
9725 /* DRAP should not coexist with stack_realign_fp */
9726 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9727
9728 memset (&m->fs, 0, sizeof (m->fs));
9729
9730 /* Initialize CFA state for before the prologue. */
9731 m->fs.cfa_reg = stack_pointer_rtx;
9732 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9733
9734 /* Track SP offset to the CFA. We continue tracking this after we've
9735 swapped the CFA register away from SP. In the case of re-alignment
9736 this is fudged; we're interested to offsets within the local frame. */
9737 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9738 m->fs.sp_valid = true;
9739
9740 ix86_compute_frame_layout (&frame);
9741
9742 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9743 {
9744 /* We should have already generated an error for any use of
9745 ms_hook on a nested function. */
9746 gcc_checking_assert (!ix86_static_chain_on_stack);
9747
9748 /* Check if profiling is active and we shall use profiling before
9749 prologue variant. If so sorry. */
9750 if (crtl->profile && flag_fentry != 0)
9751 sorry ("ms_hook_prologue attribute isn%'t compatible "
9752 "with -mfentry for 32-bit");
9753
9754 /* In ix86_asm_output_function_label we emitted:
9755 8b ff movl.s %edi,%edi
9756 55 push %ebp
9757 8b ec movl.s %esp,%ebp
9758
9759 This matches the hookable function prologue in Win32 API
9760 functions in Microsoft Windows XP Service Pack 2 and newer.
9761 Wine uses this to enable Windows apps to hook the Win32 API
9762 functions provided by Wine.
9763
9764 What that means is that we've already set up the frame pointer. */
9765
9766 if (frame_pointer_needed
9767 && !(crtl->drap_reg && crtl->stack_realign_needed))
9768 {
9769 rtx push, mov;
9770
9771 /* We've decided to use the frame pointer already set up.
9772 Describe this to the unwinder by pretending that both
9773 push and mov insns happen right here.
9774
9775 Putting the unwind info here at the end of the ms_hook
9776 is done so that we can make absolutely certain we get
9777 the required byte sequence at the start of the function,
9778 rather than relying on an assembler that can produce
9779 the exact encoding required.
9780
9781 However it does mean (in the unpatched case) that we have
9782 a 1 insn window where the asynchronous unwind info is
9783 incorrect. However, if we placed the unwind info at
9784 its correct location we would have incorrect unwind info
9785 in the patched case. Which is probably all moot since
9786 I don't expect Wine generates dwarf2 unwind info for the
9787 system libraries that use this feature. */
9788
9789 insn = emit_insn (gen_blockage ());
9790
9791 push = gen_push (hard_frame_pointer_rtx);
9792 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9793 stack_pointer_rtx);
9794 RTX_FRAME_RELATED_P (push) = 1;
9795 RTX_FRAME_RELATED_P (mov) = 1;
9796
9797 RTX_FRAME_RELATED_P (insn) = 1;
9798 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9799 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9800
9801 /* Note that gen_push incremented m->fs.cfa_offset, even
9802 though we didn't emit the push insn here. */
9803 m->fs.cfa_reg = hard_frame_pointer_rtx;
9804 m->fs.fp_offset = m->fs.cfa_offset;
9805 m->fs.fp_valid = true;
9806 }
9807 else
9808 {
9809 /* The frame pointer is not needed so pop %ebp again.
9810 This leaves us with a pristine state. */
9811 emit_insn (gen_pop (hard_frame_pointer_rtx));
9812 }
9813 }
9814
9815 /* The first insn of a function that accepts its static chain on the
9816 stack is to push the register that would be filled in by a direct
9817 call. This insn will be skipped by the trampoline. */
9818 else if (ix86_static_chain_on_stack)
9819 {
9820 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9821 emit_insn (gen_blockage ());
9822
9823 /* We don't want to interpret this push insn as a register save,
9824 only as a stack adjustment. The real copy of the register as
9825 a save will be done later, if needed. */
9826 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9827 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9828 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9829 RTX_FRAME_RELATED_P (insn) = 1;
9830 }
9831
9832 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9833 of DRAP is needed and stack realignment is really needed after reload */
9834 if (stack_realign_drap)
9835 {
9836 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9837
9838 /* Only need to push parameter pointer reg if it is caller saved. */
9839 if (!call_used_regs[REGNO (crtl->drap_reg)])
9840 {
9841 /* Push arg pointer reg */
9842 insn = emit_insn (gen_push (crtl->drap_reg));
9843 RTX_FRAME_RELATED_P (insn) = 1;
9844 }
9845
9846 /* Grab the argument pointer. */
9847 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9848 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9849 RTX_FRAME_RELATED_P (insn) = 1;
9850 m->fs.cfa_reg = crtl->drap_reg;
9851 m->fs.cfa_offset = 0;
9852
9853 /* Align the stack. */
9854 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9855 stack_pointer_rtx,
9856 GEN_INT (-align_bytes)));
9857 RTX_FRAME_RELATED_P (insn) = 1;
9858
9859 /* Replicate the return address on the stack so that return
9860 address can be reached via (argp - 1) slot. This is needed
9861 to implement macro RETURN_ADDR_RTX and intrinsic function
9862 expand_builtin_return_addr etc. */
9863 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9864 t = gen_frame_mem (Pmode, t);
9865 insn = emit_insn (gen_push (t));
9866 RTX_FRAME_RELATED_P (insn) = 1;
9867
9868 /* For the purposes of frame and register save area addressing,
9869 we've started over with a new frame. */
9870 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9871 m->fs.realigned = true;
9872 }
9873
9874 if (frame_pointer_needed && !m->fs.fp_valid)
9875 {
9876 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9877 slower on all targets. Also sdb doesn't like it. */
9878 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9879 RTX_FRAME_RELATED_P (insn) = 1;
9880
9881 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9882 {
9883 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9884 RTX_FRAME_RELATED_P (insn) = 1;
9885
9886 if (m->fs.cfa_reg == stack_pointer_rtx)
9887 m->fs.cfa_reg = hard_frame_pointer_rtx;
9888 m->fs.fp_offset = m->fs.sp_offset;
9889 m->fs.fp_valid = true;
9890 }
9891 }
9892
9893 int_registers_saved = (frame.nregs == 0);
9894
9895 if (!int_registers_saved)
9896 {
9897 /* If saving registers via PUSH, do so now. */
9898 if (!frame.save_regs_using_mov)
9899 {
9900 ix86_emit_save_regs ();
9901 int_registers_saved = true;
9902 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9903 }
9904
9905 /* When using red zone we may start register saving before allocating
9906 the stack frame saving one cycle of the prologue. However, avoid
9907 doing this if we have to probe the stack; at least on x86_64 the
9908 stack probe can turn into a call that clobbers a red zone location. */
9909 else if (ix86_using_red_zone ()
9910 && (! TARGET_STACK_PROBE
9911 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9912 {
9913 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9914 int_registers_saved = true;
9915 }
9916 }
9917
9918 if (stack_realign_fp)
9919 {
9920 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9921 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9922
9923 /* The computation of the size of the re-aligned stack frame means
9924 that we must allocate the size of the register save area before
9925 performing the actual alignment. Otherwise we cannot guarantee
9926 that there's enough storage above the realignment point. */
9927 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9928 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9929 GEN_INT (m->fs.sp_offset
9930 - frame.sse_reg_save_offset),
9931 -1, false);
9932
9933 /* Align the stack. */
9934 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9935 stack_pointer_rtx,
9936 GEN_INT (-align_bytes)));
9937
9938 /* For the purposes of register save area addressing, the stack
9939 pointer is no longer valid. As for the value of sp_offset,
9940 see ix86_compute_frame_layout, which we need to match in order
9941 to pass verification of stack_pointer_offset at the end. */
9942 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9943 m->fs.sp_valid = false;
9944 }
9945
9946 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9947
9948 if (flag_stack_usage_info)
9949 {
9950 /* We start to count from ARG_POINTER. */
9951 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9952
9953 /* If it was realigned, take into account the fake frame. */
9954 if (stack_realign_drap)
9955 {
9956 if (ix86_static_chain_on_stack)
9957 stack_size += UNITS_PER_WORD;
9958
9959 if (!call_used_regs[REGNO (crtl->drap_reg)])
9960 stack_size += UNITS_PER_WORD;
9961
9962 /* This over-estimates by 1 minimal-stack-alignment-unit but
9963 mitigates that by counting in the new return address slot. */
9964 current_function_dynamic_stack_size
9965 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9966 }
9967
9968 current_function_static_stack_size = stack_size;
9969 }
9970
9971 /* The stack has already been decremented by the instruction calling us
9972 so probe if the size is non-negative to preserve the protection area. */
9973 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9974 {
9975 /* We expect the registers to be saved when probes are used. */
9976 gcc_assert (int_registers_saved);
9977
9978 if (STACK_CHECK_MOVING_SP)
9979 {
9980 ix86_adjust_stack_and_probe (allocate);
9981 allocate = 0;
9982 }
9983 else
9984 {
9985 HOST_WIDE_INT size = allocate;
9986
9987 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9988 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9989
9990 if (TARGET_STACK_PROBE)
9991 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9992 else
9993 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9994 }
9995 }
9996
9997 if (allocate == 0)
9998 ;
9999 else if (!ix86_target_stack_probe ()
10000 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10001 {
10002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10003 GEN_INT (-allocate), -1,
10004 m->fs.cfa_reg == stack_pointer_rtx);
10005 }
10006 else
10007 {
10008 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10009 rtx r10 = NULL;
10010 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10011
10012 bool eax_live = false;
10013 bool r10_live = false;
10014
10015 if (TARGET_64BIT)
10016 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10017 if (!TARGET_64BIT_MS_ABI)
10018 eax_live = ix86_eax_live_at_start_p ();
10019
10020 if (eax_live)
10021 {
10022 emit_insn (gen_push (eax));
10023 allocate -= UNITS_PER_WORD;
10024 }
10025 if (r10_live)
10026 {
10027 r10 = gen_rtx_REG (Pmode, R10_REG);
10028 emit_insn (gen_push (r10));
10029 allocate -= UNITS_PER_WORD;
10030 }
10031
10032 emit_move_insn (eax, GEN_INT (allocate));
10033 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10034
10035 /* Use the fact that AX still contains ALLOCATE. */
10036 adjust_stack_insn = (TARGET_64BIT
10037 ? gen_pro_epilogue_adjust_stack_di_sub
10038 : gen_pro_epilogue_adjust_stack_si_sub);
10039
10040 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10041 stack_pointer_rtx, eax));
10042
10043 /* Note that SEH directives need to continue tracking the stack
10044 pointer even after the frame pointer has been set up. */
10045 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10046 {
10047 if (m->fs.cfa_reg == stack_pointer_rtx)
10048 m->fs.cfa_offset += allocate;
10049
10050 RTX_FRAME_RELATED_P (insn) = 1;
10051 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10052 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10053 plus_constant (stack_pointer_rtx,
10054 -allocate)));
10055 }
10056 m->fs.sp_offset += allocate;
10057
10058 if (r10_live && eax_live)
10059 {
10060 t = choose_baseaddr (m->fs.sp_offset - allocate);
10061 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10062 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10063 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10064 }
10065 else if (eax_live || r10_live)
10066 {
10067 t = choose_baseaddr (m->fs.sp_offset - allocate);
10068 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10069 }
10070 }
10071 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10072
10073 /* If we havn't already set up the frame pointer, do so now. */
10074 if (frame_pointer_needed && !m->fs.fp_valid)
10075 {
10076 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10077 GEN_INT (frame.stack_pointer_offset
10078 - frame.hard_frame_pointer_offset));
10079 insn = emit_insn (insn);
10080 RTX_FRAME_RELATED_P (insn) = 1;
10081 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10082
10083 if (m->fs.cfa_reg == stack_pointer_rtx)
10084 m->fs.cfa_reg = hard_frame_pointer_rtx;
10085 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10086 m->fs.fp_valid = true;
10087 }
10088
10089 if (!int_registers_saved)
10090 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10091 if (frame.nsseregs)
10092 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10093
10094 pic_reg_used = false;
10095 if (pic_offset_table_rtx
10096 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10097 || crtl->profile))
10098 {
10099 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10100
10101 if (alt_pic_reg_used != INVALID_REGNUM)
10102 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10103
10104 pic_reg_used = true;
10105 }
10106
10107 if (pic_reg_used)
10108 {
10109 if (TARGET_64BIT)
10110 {
10111 if (ix86_cmodel == CM_LARGE_PIC)
10112 {
10113 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10114 rtx label = gen_label_rtx ();
10115 emit_label (label);
10116 LABEL_PRESERVE_P (label) = 1;
10117 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10118 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10119 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10120 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10121 pic_offset_table_rtx, tmp_reg));
10122 }
10123 else
10124 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10125 }
10126 else
10127 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10128 }
10129
10130 /* In the pic_reg_used case, make sure that the got load isn't deleted
10131 when mcount needs it. Blockage to avoid call movement across mcount
10132 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10133 note. */
10134 if (crtl->profile && !flag_fentry && pic_reg_used)
10135 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10136
10137 if (crtl->drap_reg && !crtl->stack_realign_needed)
10138 {
10139 /* vDRAP is setup but after reload it turns out stack realign
10140 isn't necessary, here we will emit prologue to setup DRAP
10141 without stack realign adjustment */
10142 t = choose_baseaddr (0);
10143 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10144 }
10145
10146 /* Prevent instructions from being scheduled into register save push
10147 sequence when access to the redzone area is done through frame pointer.
10148 The offset between the frame pointer and the stack pointer is calculated
10149 relative to the value of the stack pointer at the end of the function
10150 prologue, and moving instructions that access redzone area via frame
10151 pointer inside push sequence violates this assumption. */
10152 if (frame_pointer_needed && frame.red_zone_size)
10153 emit_insn (gen_memory_blockage ());
10154
10155 /* Emit cld instruction if stringops are used in the function. */
10156 if (TARGET_CLD && ix86_current_function_needs_cld)
10157 emit_insn (gen_cld ());
10158
10159 /* SEH requires that the prologue end within 256 bytes of the start of
10160 the function. Prevent instruction schedules that would extend that. */
10161 if (TARGET_SEH)
10162 emit_insn (gen_blockage ());
10163 }
10164
10165 /* Emit code to restore REG using a POP insn. */
10166
10167 static void
10168 ix86_emit_restore_reg_using_pop (rtx reg)
10169 {
10170 struct machine_function *m = cfun->machine;
10171 rtx insn = emit_insn (gen_pop (reg));
10172
10173 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10174 m->fs.sp_offset -= UNITS_PER_WORD;
10175
10176 if (m->fs.cfa_reg == crtl->drap_reg
10177 && REGNO (reg) == REGNO (crtl->drap_reg))
10178 {
10179 /* Previously we'd represented the CFA as an expression
10180 like *(%ebp - 8). We've just popped that value from
10181 the stack, which means we need to reset the CFA to
10182 the drap register. This will remain until we restore
10183 the stack pointer. */
10184 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10185 RTX_FRAME_RELATED_P (insn) = 1;
10186
10187 /* This means that the DRAP register is valid for addressing too. */
10188 m->fs.drap_valid = true;
10189 return;
10190 }
10191
10192 if (m->fs.cfa_reg == stack_pointer_rtx)
10193 {
10194 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10195 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10196 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10197 RTX_FRAME_RELATED_P (insn) = 1;
10198
10199 m->fs.cfa_offset -= UNITS_PER_WORD;
10200 }
10201
10202 /* When the frame pointer is the CFA, and we pop it, we are
10203 swapping back to the stack pointer as the CFA. This happens
10204 for stack frames that don't allocate other data, so we assume
10205 the stack pointer is now pointing at the return address, i.e.
10206 the function entry state, which makes the offset be 1 word. */
10207 if (reg == hard_frame_pointer_rtx)
10208 {
10209 m->fs.fp_valid = false;
10210 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10211 {
10212 m->fs.cfa_reg = stack_pointer_rtx;
10213 m->fs.cfa_offset -= UNITS_PER_WORD;
10214
10215 add_reg_note (insn, REG_CFA_DEF_CFA,
10216 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10217 GEN_INT (m->fs.cfa_offset)));
10218 RTX_FRAME_RELATED_P (insn) = 1;
10219 }
10220 }
10221 }
10222
10223 /* Emit code to restore saved registers using POP insns. */
10224
10225 static void
10226 ix86_emit_restore_regs_using_pop (void)
10227 {
10228 unsigned int regno;
10229
10230 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10231 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10232 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10233 }
10234
10235 /* Emit code and notes for the LEAVE instruction. */
10236
10237 static void
10238 ix86_emit_leave (void)
10239 {
10240 struct machine_function *m = cfun->machine;
10241 rtx insn = emit_insn (ix86_gen_leave ());
10242
10243 ix86_add_queued_cfa_restore_notes (insn);
10244
10245 gcc_assert (m->fs.fp_valid);
10246 m->fs.sp_valid = true;
10247 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10248 m->fs.fp_valid = false;
10249
10250 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10251 {
10252 m->fs.cfa_reg = stack_pointer_rtx;
10253 m->fs.cfa_offset = m->fs.sp_offset;
10254
10255 add_reg_note (insn, REG_CFA_DEF_CFA,
10256 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10257 RTX_FRAME_RELATED_P (insn) = 1;
10258 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10259 m->fs.fp_offset);
10260 }
10261 }
10262
10263 /* Emit code to restore saved registers using MOV insns.
10264 First register is restored from CFA - CFA_OFFSET. */
10265 static void
10266 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10267 bool maybe_eh_return)
10268 {
10269 struct machine_function *m = cfun->machine;
10270 unsigned int regno;
10271
10272 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10273 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10274 {
10275 rtx reg = gen_rtx_REG (Pmode, regno);
10276 rtx insn, mem;
10277
10278 mem = choose_baseaddr (cfa_offset);
10279 mem = gen_frame_mem (Pmode, mem);
10280 insn = emit_move_insn (reg, mem);
10281
10282 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10283 {
10284 /* Previously we'd represented the CFA as an expression
10285 like *(%ebp - 8). We've just popped that value from
10286 the stack, which means we need to reset the CFA to
10287 the drap register. This will remain until we restore
10288 the stack pointer. */
10289 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10290 RTX_FRAME_RELATED_P (insn) = 1;
10291
10292 /* This means that the DRAP register is valid for addressing. */
10293 m->fs.drap_valid = true;
10294 }
10295 else
10296 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10297
10298 cfa_offset -= UNITS_PER_WORD;
10299 }
10300 }
10301
10302 /* Emit code to restore saved registers using MOV insns.
10303 First register is restored from CFA - CFA_OFFSET. */
10304 static void
10305 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10306 bool maybe_eh_return)
10307 {
10308 unsigned int regno;
10309
10310 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10311 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10312 {
10313 rtx reg = gen_rtx_REG (V4SFmode, regno);
10314 rtx mem;
10315
10316 mem = choose_baseaddr (cfa_offset);
10317 mem = gen_rtx_MEM (V4SFmode, mem);
10318 set_mem_align (mem, 128);
10319 emit_move_insn (reg, mem);
10320
10321 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10322
10323 cfa_offset -= 16;
10324 }
10325 }
10326
10327 /* Restore function stack, frame, and registers. */
10328
10329 void
10330 ix86_expand_epilogue (int style)
10331 {
10332 struct machine_function *m = cfun->machine;
10333 struct machine_frame_state frame_state_save = m->fs;
10334 struct ix86_frame frame;
10335 bool restore_regs_via_mov;
10336 bool using_drap;
10337
10338 ix86_finalize_stack_realign_flags ();
10339 ix86_compute_frame_layout (&frame);
10340
10341 m->fs.sp_valid = (!frame_pointer_needed
10342 || (current_function_sp_is_unchanging
10343 && !stack_realign_fp));
10344 gcc_assert (!m->fs.sp_valid
10345 || m->fs.sp_offset == frame.stack_pointer_offset);
10346
10347 /* The FP must be valid if the frame pointer is present. */
10348 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10349 gcc_assert (!m->fs.fp_valid
10350 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10351
10352 /* We must have *some* valid pointer to the stack frame. */
10353 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10354
10355 /* The DRAP is never valid at this point. */
10356 gcc_assert (!m->fs.drap_valid);
10357
10358 /* See the comment about red zone and frame
10359 pointer usage in ix86_expand_prologue. */
10360 if (frame_pointer_needed && frame.red_zone_size)
10361 emit_insn (gen_memory_blockage ());
10362
10363 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10364 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10365
10366 /* Determine the CFA offset of the end of the red-zone. */
10367 m->fs.red_zone_offset = 0;
10368 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10369 {
10370 /* The red-zone begins below the return address. */
10371 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10372
10373 /* When the register save area is in the aligned portion of
10374 the stack, determine the maximum runtime displacement that
10375 matches up with the aligned frame. */
10376 if (stack_realign_drap)
10377 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10378 + UNITS_PER_WORD);
10379 }
10380
10381 /* Special care must be taken for the normal return case of a function
10382 using eh_return: the eax and edx registers are marked as saved, but
10383 not restored along this path. Adjust the save location to match. */
10384 if (crtl->calls_eh_return && style != 2)
10385 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10386
10387 /* EH_RETURN requires the use of moves to function properly. */
10388 if (crtl->calls_eh_return)
10389 restore_regs_via_mov = true;
10390 /* SEH requires the use of pops to identify the epilogue. */
10391 else if (TARGET_SEH)
10392 restore_regs_via_mov = false;
10393 /* If we're only restoring one register and sp is not valid then
10394 using a move instruction to restore the register since it's
10395 less work than reloading sp and popping the register. */
10396 else if (!m->fs.sp_valid && frame.nregs <= 1)
10397 restore_regs_via_mov = true;
10398 else if (TARGET_EPILOGUE_USING_MOVE
10399 && cfun->machine->use_fast_prologue_epilogue
10400 && (frame.nregs > 1
10401 || m->fs.sp_offset != frame.reg_save_offset))
10402 restore_regs_via_mov = true;
10403 else if (frame_pointer_needed
10404 && !frame.nregs
10405 && m->fs.sp_offset != frame.reg_save_offset)
10406 restore_regs_via_mov = true;
10407 else if (frame_pointer_needed
10408 && TARGET_USE_LEAVE
10409 && cfun->machine->use_fast_prologue_epilogue
10410 && frame.nregs == 1)
10411 restore_regs_via_mov = true;
10412 else
10413 restore_regs_via_mov = false;
10414
10415 if (restore_regs_via_mov || frame.nsseregs)
10416 {
10417 /* Ensure that the entire register save area is addressable via
10418 the stack pointer, if we will restore via sp. */
10419 if (TARGET_64BIT
10420 && m->fs.sp_offset > 0x7fffffff
10421 && !(m->fs.fp_valid || m->fs.drap_valid)
10422 && (frame.nsseregs + frame.nregs) != 0)
10423 {
10424 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10425 GEN_INT (m->fs.sp_offset
10426 - frame.sse_reg_save_offset),
10427 style,
10428 m->fs.cfa_reg == stack_pointer_rtx);
10429 }
10430 }
10431
10432 /* If there are any SSE registers to restore, then we have to do it
10433 via moves, since there's obviously no pop for SSE regs. */
10434 if (frame.nsseregs)
10435 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10436 style == 2);
10437
10438 if (restore_regs_via_mov)
10439 {
10440 rtx t;
10441
10442 if (frame.nregs)
10443 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10444
10445 /* eh_return epilogues need %ecx added to the stack pointer. */
10446 if (style == 2)
10447 {
10448 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10449
10450 /* Stack align doesn't work with eh_return. */
10451 gcc_assert (!stack_realign_drap);
10452 /* Neither does regparm nested functions. */
10453 gcc_assert (!ix86_static_chain_on_stack);
10454
10455 if (frame_pointer_needed)
10456 {
10457 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10458 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10459 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10460
10461 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10462 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10463
10464 /* Note that we use SA as a temporary CFA, as the return
10465 address is at the proper place relative to it. We
10466 pretend this happens at the FP restore insn because
10467 prior to this insn the FP would be stored at the wrong
10468 offset relative to SA, and after this insn we have no
10469 other reasonable register to use for the CFA. We don't
10470 bother resetting the CFA to the SP for the duration of
10471 the return insn. */
10472 add_reg_note (insn, REG_CFA_DEF_CFA,
10473 plus_constant (sa, UNITS_PER_WORD));
10474 ix86_add_queued_cfa_restore_notes (insn);
10475 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10476 RTX_FRAME_RELATED_P (insn) = 1;
10477
10478 m->fs.cfa_reg = sa;
10479 m->fs.cfa_offset = UNITS_PER_WORD;
10480 m->fs.fp_valid = false;
10481
10482 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10483 const0_rtx, style, false);
10484 }
10485 else
10486 {
10487 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10488 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10489 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10490 ix86_add_queued_cfa_restore_notes (insn);
10491
10492 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10493 if (m->fs.cfa_offset != UNITS_PER_WORD)
10494 {
10495 m->fs.cfa_offset = UNITS_PER_WORD;
10496 add_reg_note (insn, REG_CFA_DEF_CFA,
10497 plus_constant (stack_pointer_rtx,
10498 UNITS_PER_WORD));
10499 RTX_FRAME_RELATED_P (insn) = 1;
10500 }
10501 }
10502 m->fs.sp_offset = UNITS_PER_WORD;
10503 m->fs.sp_valid = true;
10504 }
10505 }
10506 else
10507 {
10508 /* SEH requires that the function end with (1) a stack adjustment
10509 if necessary, (2) a sequence of pops, and (3) a return or
10510 jump instruction. Prevent insns from the function body from
10511 being scheduled into this sequence. */
10512 if (TARGET_SEH)
10513 {
10514 /* Prevent a catch region from being adjacent to the standard
10515 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10516 several other flags that would be interesting to test are
10517 not yet set up. */
10518 if (flag_non_call_exceptions)
10519 emit_insn (gen_nops (const1_rtx));
10520 else
10521 emit_insn (gen_blockage ());
10522 }
10523
10524 /* First step is to deallocate the stack frame so that we can
10525 pop the registers. */
10526 if (!m->fs.sp_valid)
10527 {
10528 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10529 GEN_INT (m->fs.fp_offset
10530 - frame.reg_save_offset),
10531 style, false);
10532 }
10533 else if (m->fs.sp_offset != frame.reg_save_offset)
10534 {
10535 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10536 GEN_INT (m->fs.sp_offset
10537 - frame.reg_save_offset),
10538 style,
10539 m->fs.cfa_reg == stack_pointer_rtx);
10540 }
10541
10542 ix86_emit_restore_regs_using_pop ();
10543 }
10544
10545 /* If we used a stack pointer and haven't already got rid of it,
10546 then do so now. */
10547 if (m->fs.fp_valid)
10548 {
10549 /* If the stack pointer is valid and pointing at the frame
10550 pointer store address, then we only need a pop. */
10551 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10552 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10553 /* Leave results in shorter dependency chains on CPUs that are
10554 able to grok it fast. */
10555 else if (TARGET_USE_LEAVE
10556 || optimize_function_for_size_p (cfun)
10557 || !cfun->machine->use_fast_prologue_epilogue)
10558 ix86_emit_leave ();
10559 else
10560 {
10561 pro_epilogue_adjust_stack (stack_pointer_rtx,
10562 hard_frame_pointer_rtx,
10563 const0_rtx, style, !using_drap);
10564 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10565 }
10566 }
10567
10568 if (using_drap)
10569 {
10570 int param_ptr_offset = UNITS_PER_WORD;
10571 rtx insn;
10572
10573 gcc_assert (stack_realign_drap);
10574
10575 if (ix86_static_chain_on_stack)
10576 param_ptr_offset += UNITS_PER_WORD;
10577 if (!call_used_regs[REGNO (crtl->drap_reg)])
10578 param_ptr_offset += UNITS_PER_WORD;
10579
10580 insn = emit_insn (gen_rtx_SET
10581 (VOIDmode, stack_pointer_rtx,
10582 gen_rtx_PLUS (Pmode,
10583 crtl->drap_reg,
10584 GEN_INT (-param_ptr_offset))));
10585 m->fs.cfa_reg = stack_pointer_rtx;
10586 m->fs.cfa_offset = param_ptr_offset;
10587 m->fs.sp_offset = param_ptr_offset;
10588 m->fs.realigned = false;
10589
10590 add_reg_note (insn, REG_CFA_DEF_CFA,
10591 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10592 GEN_INT (param_ptr_offset)));
10593 RTX_FRAME_RELATED_P (insn) = 1;
10594
10595 if (!call_used_regs[REGNO (crtl->drap_reg)])
10596 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10597 }
10598
10599 /* At this point the stack pointer must be valid, and we must have
10600 restored all of the registers. We may not have deallocated the
10601 entire stack frame. We've delayed this until now because it may
10602 be possible to merge the local stack deallocation with the
10603 deallocation forced by ix86_static_chain_on_stack. */
10604 gcc_assert (m->fs.sp_valid);
10605 gcc_assert (!m->fs.fp_valid);
10606 gcc_assert (!m->fs.realigned);
10607 if (m->fs.sp_offset != UNITS_PER_WORD)
10608 {
10609 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10610 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10611 style, true);
10612 }
10613
10614 /* Sibcall epilogues don't want a return instruction. */
10615 if (style == 0)
10616 {
10617 m->fs = frame_state_save;
10618 return;
10619 }
10620
10621 /* Emit vzeroupper if needed. */
10622 if (TARGET_VZEROUPPER
10623 && !TREE_THIS_VOLATILE (cfun->decl)
10624 && !cfun->machine->caller_return_avx256_p)
10625 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10626
10627 if (crtl->args.pops_args && crtl->args.size)
10628 {
10629 rtx popc = GEN_INT (crtl->args.pops_args);
10630
10631 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10632 address, do explicit add, and jump indirectly to the caller. */
10633
10634 if (crtl->args.pops_args >= 65536)
10635 {
10636 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10637 rtx insn;
10638
10639 /* There is no "pascal" calling convention in any 64bit ABI. */
10640 gcc_assert (!TARGET_64BIT);
10641
10642 insn = emit_insn (gen_pop (ecx));
10643 m->fs.cfa_offset -= UNITS_PER_WORD;
10644 m->fs.sp_offset -= UNITS_PER_WORD;
10645
10646 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10647 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10648 add_reg_note (insn, REG_CFA_REGISTER,
10649 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10650 RTX_FRAME_RELATED_P (insn) = 1;
10651
10652 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10653 popc, -1, true);
10654 emit_jump_insn (gen_return_indirect_internal (ecx));
10655 }
10656 else
10657 emit_jump_insn (gen_return_pop_internal (popc));
10658 }
10659 else
10660 emit_jump_insn (gen_return_internal ());
10661
10662 /* Restore the state back to the state from the prologue,
10663 so that it's correct for the next epilogue. */
10664 m->fs = frame_state_save;
10665 }
10666
10667 /* Reset from the function's potential modifications. */
10668
10669 static void
10670 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10671 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10672 {
10673 if (pic_offset_table_rtx)
10674 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10675 #if TARGET_MACHO
10676 /* Mach-O doesn't support labels at the end of objects, so if
10677 it looks like we might want one, insert a NOP. */
10678 {
10679 rtx insn = get_last_insn ();
10680 while (insn
10681 && NOTE_P (insn)
10682 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10683 insn = PREV_INSN (insn);
10684 if (insn
10685 && (LABEL_P (insn)
10686 || (NOTE_P (insn)
10687 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10688 fputs ("\tnop\n", file);
10689 }
10690 #endif
10691
10692 }
10693
10694 /* Return a scratch register to use in the split stack prologue. The
10695 split stack prologue is used for -fsplit-stack. It is the first
10696 instructions in the function, even before the regular prologue.
10697 The scratch register can be any caller-saved register which is not
10698 used for parameters or for the static chain. */
10699
10700 static unsigned int
10701 split_stack_prologue_scratch_regno (void)
10702 {
10703 if (TARGET_64BIT)
10704 return R11_REG;
10705 else
10706 {
10707 bool is_fastcall;
10708 int regparm;
10709
10710 is_fastcall = (lookup_attribute ("fastcall",
10711 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10712 != NULL);
10713 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10714
10715 if (is_fastcall)
10716 {
10717 if (DECL_STATIC_CHAIN (cfun->decl))
10718 {
10719 sorry ("-fsplit-stack does not support fastcall with "
10720 "nested function");
10721 return INVALID_REGNUM;
10722 }
10723 return AX_REG;
10724 }
10725 else if (regparm < 3)
10726 {
10727 if (!DECL_STATIC_CHAIN (cfun->decl))
10728 return CX_REG;
10729 else
10730 {
10731 if (regparm >= 2)
10732 {
10733 sorry ("-fsplit-stack does not support 2 register "
10734 " parameters for a nested function");
10735 return INVALID_REGNUM;
10736 }
10737 return DX_REG;
10738 }
10739 }
10740 else
10741 {
10742 /* FIXME: We could make this work by pushing a register
10743 around the addition and comparison. */
10744 sorry ("-fsplit-stack does not support 3 register parameters");
10745 return INVALID_REGNUM;
10746 }
10747 }
10748 }
10749
10750 /* A SYMBOL_REF for the function which allocates new stackspace for
10751 -fsplit-stack. */
10752
10753 static GTY(()) rtx split_stack_fn;
10754
10755 /* A SYMBOL_REF for the more stack function when using the large
10756 model. */
10757
10758 static GTY(()) rtx split_stack_fn_large;
10759
10760 /* Handle -fsplit-stack. These are the first instructions in the
10761 function, even before the regular prologue. */
10762
10763 void
10764 ix86_expand_split_stack_prologue (void)
10765 {
10766 struct ix86_frame frame;
10767 HOST_WIDE_INT allocate;
10768 unsigned HOST_WIDE_INT args_size;
10769 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10770 rtx scratch_reg = NULL_RTX;
10771 rtx varargs_label = NULL_RTX;
10772 rtx fn;
10773
10774 gcc_assert (flag_split_stack && reload_completed);
10775
10776 ix86_finalize_stack_realign_flags ();
10777 ix86_compute_frame_layout (&frame);
10778 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10779
10780 /* This is the label we will branch to if we have enough stack
10781 space. We expect the basic block reordering pass to reverse this
10782 branch if optimizing, so that we branch in the unlikely case. */
10783 label = gen_label_rtx ();
10784
10785 /* We need to compare the stack pointer minus the frame size with
10786 the stack boundary in the TCB. The stack boundary always gives
10787 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10788 can compare directly. Otherwise we need to do an addition. */
10789
10790 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10791 UNSPEC_STACK_CHECK);
10792 limit = gen_rtx_CONST (Pmode, limit);
10793 limit = gen_rtx_MEM (Pmode, limit);
10794 if (allocate < SPLIT_STACK_AVAILABLE)
10795 current = stack_pointer_rtx;
10796 else
10797 {
10798 unsigned int scratch_regno;
10799 rtx offset;
10800
10801 /* We need a scratch register to hold the stack pointer minus
10802 the required frame size. Since this is the very start of the
10803 function, the scratch register can be any caller-saved
10804 register which is not used for parameters. */
10805 offset = GEN_INT (- allocate);
10806 scratch_regno = split_stack_prologue_scratch_regno ();
10807 if (scratch_regno == INVALID_REGNUM)
10808 return;
10809 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10810 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10811 {
10812 /* We don't use ix86_gen_add3 in this case because it will
10813 want to split to lea, but when not optimizing the insn
10814 will not be split after this point. */
10815 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10816 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10817 offset)));
10818 }
10819 else
10820 {
10821 emit_move_insn (scratch_reg, offset);
10822 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10823 stack_pointer_rtx));
10824 }
10825 current = scratch_reg;
10826 }
10827
10828 ix86_expand_branch (GEU, current, limit, label);
10829 jump_insn = get_last_insn ();
10830 JUMP_LABEL (jump_insn) = label;
10831
10832 /* Mark the jump as very likely to be taken. */
10833 add_reg_note (jump_insn, REG_BR_PROB,
10834 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10835
10836 if (split_stack_fn == NULL_RTX)
10837 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10838 fn = split_stack_fn;
10839
10840 /* Get more stack space. We pass in the desired stack space and the
10841 size of the arguments to copy to the new stack. In 32-bit mode
10842 we push the parameters; __morestack will return on a new stack
10843 anyhow. In 64-bit mode we pass the parameters in r10 and
10844 r11. */
10845 allocate_rtx = GEN_INT (allocate);
10846 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10847 call_fusage = NULL_RTX;
10848 if (TARGET_64BIT)
10849 {
10850 rtx reg10, reg11;
10851
10852 reg10 = gen_rtx_REG (Pmode, R10_REG);
10853 reg11 = gen_rtx_REG (Pmode, R11_REG);
10854
10855 /* If this function uses a static chain, it will be in %r10.
10856 Preserve it across the call to __morestack. */
10857 if (DECL_STATIC_CHAIN (cfun->decl))
10858 {
10859 rtx rax;
10860
10861 rax = gen_rtx_REG (Pmode, AX_REG);
10862 emit_move_insn (rax, reg10);
10863 use_reg (&call_fusage, rax);
10864 }
10865
10866 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10867 {
10868 HOST_WIDE_INT argval;
10869
10870 /* When using the large model we need to load the address
10871 into a register, and we've run out of registers. So we
10872 switch to a different calling convention, and we call a
10873 different function: __morestack_large. We pass the
10874 argument size in the upper 32 bits of r10 and pass the
10875 frame size in the lower 32 bits. */
10876 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10877 gcc_assert ((args_size & 0xffffffff) == args_size);
10878
10879 if (split_stack_fn_large == NULL_RTX)
10880 split_stack_fn_large =
10881 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10882
10883 if (ix86_cmodel == CM_LARGE_PIC)
10884 {
10885 rtx label, x;
10886
10887 label = gen_label_rtx ();
10888 emit_label (label);
10889 LABEL_PRESERVE_P (label) = 1;
10890 emit_insn (gen_set_rip_rex64 (reg10, label));
10891 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10892 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10893 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10894 UNSPEC_GOT);
10895 x = gen_rtx_CONST (Pmode, x);
10896 emit_move_insn (reg11, x);
10897 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10898 x = gen_const_mem (Pmode, x);
10899 emit_move_insn (reg11, x);
10900 }
10901 else
10902 emit_move_insn (reg11, split_stack_fn_large);
10903
10904 fn = reg11;
10905
10906 argval = ((args_size << 16) << 16) + allocate;
10907 emit_move_insn (reg10, GEN_INT (argval));
10908 }
10909 else
10910 {
10911 emit_move_insn (reg10, allocate_rtx);
10912 emit_move_insn (reg11, GEN_INT (args_size));
10913 use_reg (&call_fusage, reg11);
10914 }
10915
10916 use_reg (&call_fusage, reg10);
10917 }
10918 else
10919 {
10920 emit_insn (gen_push (GEN_INT (args_size)));
10921 emit_insn (gen_push (allocate_rtx));
10922 }
10923 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10924 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10925 NULL_RTX, false);
10926 add_function_usage_to (call_insn, call_fusage);
10927
10928 /* In order to make call/return prediction work right, we now need
10929 to execute a return instruction. See
10930 libgcc/config/i386/morestack.S for the details on how this works.
10931
10932 For flow purposes gcc must not see this as a return
10933 instruction--we need control flow to continue at the subsequent
10934 label. Therefore, we use an unspec. */
10935 gcc_assert (crtl->args.pops_args < 65536);
10936 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10937
10938 /* If we are in 64-bit mode and this function uses a static chain,
10939 we saved %r10 in %rax before calling _morestack. */
10940 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10941 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10942 gen_rtx_REG (Pmode, AX_REG));
10943
10944 /* If this function calls va_start, we need to store a pointer to
10945 the arguments on the old stack, because they may not have been
10946 all copied to the new stack. At this point the old stack can be
10947 found at the frame pointer value used by __morestack, because
10948 __morestack has set that up before calling back to us. Here we
10949 store that pointer in a scratch register, and in
10950 ix86_expand_prologue we store the scratch register in a stack
10951 slot. */
10952 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10953 {
10954 unsigned int scratch_regno;
10955 rtx frame_reg;
10956 int words;
10957
10958 scratch_regno = split_stack_prologue_scratch_regno ();
10959 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10960 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10961
10962 /* 64-bit:
10963 fp -> old fp value
10964 return address within this function
10965 return address of caller of this function
10966 stack arguments
10967 So we add three words to get to the stack arguments.
10968
10969 32-bit:
10970 fp -> old fp value
10971 return address within this function
10972 first argument to __morestack
10973 second argument to __morestack
10974 return address of caller of this function
10975 stack arguments
10976 So we add five words to get to the stack arguments.
10977 */
10978 words = TARGET_64BIT ? 3 : 5;
10979 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10980 gen_rtx_PLUS (Pmode, frame_reg,
10981 GEN_INT (words * UNITS_PER_WORD))));
10982
10983 varargs_label = gen_label_rtx ();
10984 emit_jump_insn (gen_jump (varargs_label));
10985 JUMP_LABEL (get_last_insn ()) = varargs_label;
10986
10987 emit_barrier ();
10988 }
10989
10990 emit_label (label);
10991 LABEL_NUSES (label) = 1;
10992
10993 /* If this function calls va_start, we now have to set the scratch
10994 register for the case where we do not call __morestack. In this
10995 case we need to set it based on the stack pointer. */
10996 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10997 {
10998 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10999 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11000 GEN_INT (UNITS_PER_WORD))));
11001
11002 emit_label (varargs_label);
11003 LABEL_NUSES (varargs_label) = 1;
11004 }
11005 }
11006
11007 /* We may have to tell the dataflow pass that the split stack prologue
11008 is initializing a scratch register. */
11009
11010 static void
11011 ix86_live_on_entry (bitmap regs)
11012 {
11013 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11014 {
11015 gcc_assert (flag_split_stack);
11016 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11017 }
11018 }
11019 \f
11020 /* Extract the parts of an RTL expression that is a valid memory address
11021 for an instruction. Return 0 if the structure of the address is
11022 grossly off. Return -1 if the address contains ASHIFT, so it is not
11023 strictly valid, but still used for computing length of lea instruction. */
11024
11025 int
11026 ix86_decompose_address (rtx addr, struct ix86_address *out)
11027 {
11028 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11029 rtx base_reg, index_reg;
11030 HOST_WIDE_INT scale = 1;
11031 rtx scale_rtx = NULL_RTX;
11032 rtx tmp;
11033 int retval = 1;
11034 enum ix86_address_seg seg = SEG_DEFAULT;
11035
11036 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11037 base = addr;
11038 else if (GET_CODE (addr) == PLUS)
11039 {
11040 rtx addends[4], op;
11041 int n = 0, i;
11042
11043 op = addr;
11044 do
11045 {
11046 if (n >= 4)
11047 return 0;
11048 addends[n++] = XEXP (op, 1);
11049 op = XEXP (op, 0);
11050 }
11051 while (GET_CODE (op) == PLUS);
11052 if (n >= 4)
11053 return 0;
11054 addends[n] = op;
11055
11056 for (i = n; i >= 0; --i)
11057 {
11058 op = addends[i];
11059 switch (GET_CODE (op))
11060 {
11061 case MULT:
11062 if (index)
11063 return 0;
11064 index = XEXP (op, 0);
11065 scale_rtx = XEXP (op, 1);
11066 break;
11067
11068 case ASHIFT:
11069 if (index)
11070 return 0;
11071 index = XEXP (op, 0);
11072 tmp = XEXP (op, 1);
11073 if (!CONST_INT_P (tmp))
11074 return 0;
11075 scale = INTVAL (tmp);
11076 if ((unsigned HOST_WIDE_INT) scale > 3)
11077 return 0;
11078 scale = 1 << scale;
11079 break;
11080
11081 case UNSPEC:
11082 if (XINT (op, 1) == UNSPEC_TP
11083 && TARGET_TLS_DIRECT_SEG_REFS
11084 && seg == SEG_DEFAULT)
11085 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11086 else
11087 return 0;
11088 break;
11089
11090 case REG:
11091 case SUBREG:
11092 if (!base)
11093 base = op;
11094 else if (!index)
11095 index = op;
11096 else
11097 return 0;
11098 break;
11099
11100 case CONST:
11101 case CONST_INT:
11102 case SYMBOL_REF:
11103 case LABEL_REF:
11104 if (disp)
11105 return 0;
11106 disp = op;
11107 break;
11108
11109 default:
11110 return 0;
11111 }
11112 }
11113 }
11114 else if (GET_CODE (addr) == MULT)
11115 {
11116 index = XEXP (addr, 0); /* index*scale */
11117 scale_rtx = XEXP (addr, 1);
11118 }
11119 else if (GET_CODE (addr) == ASHIFT)
11120 {
11121 /* We're called for lea too, which implements ashift on occasion. */
11122 index = XEXP (addr, 0);
11123 tmp = XEXP (addr, 1);
11124 if (!CONST_INT_P (tmp))
11125 return 0;
11126 scale = INTVAL (tmp);
11127 if ((unsigned HOST_WIDE_INT) scale > 3)
11128 return 0;
11129 scale = 1 << scale;
11130 retval = -1;
11131 }
11132 else
11133 disp = addr; /* displacement */
11134
11135 /* Extract the integral value of scale. */
11136 if (scale_rtx)
11137 {
11138 if (!CONST_INT_P (scale_rtx))
11139 return 0;
11140 scale = INTVAL (scale_rtx);
11141 }
11142
11143 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11144 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11145
11146 /* Avoid useless 0 displacement. */
11147 if (disp == const0_rtx && (base || index))
11148 disp = NULL_RTX;
11149
11150 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11151 if (base_reg && index_reg && scale == 1
11152 && (index_reg == arg_pointer_rtx
11153 || index_reg == frame_pointer_rtx
11154 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11155 {
11156 rtx tmp;
11157 tmp = base, base = index, index = tmp;
11158 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11159 }
11160
11161 /* Special case: %ebp cannot be encoded as a base without a displacement.
11162 Similarly %r13. */
11163 if (!disp
11164 && base_reg
11165 && (base_reg == hard_frame_pointer_rtx
11166 || base_reg == frame_pointer_rtx
11167 || base_reg == arg_pointer_rtx
11168 || (REG_P (base_reg)
11169 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11170 || REGNO (base_reg) == R13_REG))))
11171 disp = const0_rtx;
11172
11173 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11174 Avoid this by transforming to [%esi+0].
11175 Reload calls address legitimization without cfun defined, so we need
11176 to test cfun for being non-NULL. */
11177 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11178 && base_reg && !index_reg && !disp
11179 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11180 disp = const0_rtx;
11181
11182 /* Special case: encode reg+reg instead of reg*2. */
11183 if (!base && index && scale == 2)
11184 base = index, base_reg = index_reg, scale = 1;
11185
11186 /* Special case: scaling cannot be encoded without base or displacement. */
11187 if (!base && !disp && index && scale != 1)
11188 disp = const0_rtx;
11189
11190 out->base = base;
11191 out->index = index;
11192 out->disp = disp;
11193 out->scale = scale;
11194 out->seg = seg;
11195
11196 return retval;
11197 }
11198 \f
11199 /* Return cost of the memory address x.
11200 For i386, it is better to use a complex address than let gcc copy
11201 the address into a reg and make a new pseudo. But not if the address
11202 requires to two regs - that would mean more pseudos with longer
11203 lifetimes. */
11204 static int
11205 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11206 {
11207 struct ix86_address parts;
11208 int cost = 1;
11209 int ok = ix86_decompose_address (x, &parts);
11210
11211 gcc_assert (ok);
11212
11213 if (parts.base && GET_CODE (parts.base) == SUBREG)
11214 parts.base = SUBREG_REG (parts.base);
11215 if (parts.index && GET_CODE (parts.index) == SUBREG)
11216 parts.index = SUBREG_REG (parts.index);
11217
11218 /* Attempt to minimize number of registers in the address. */
11219 if ((parts.base
11220 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11221 || (parts.index
11222 && (!REG_P (parts.index)
11223 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11224 cost++;
11225
11226 if (parts.base
11227 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11228 && parts.index
11229 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11230 && parts.base != parts.index)
11231 cost++;
11232
11233 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11234 since it's predecode logic can't detect the length of instructions
11235 and it degenerates to vector decoded. Increase cost of such
11236 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11237 to split such addresses or even refuse such addresses at all.
11238
11239 Following addressing modes are affected:
11240 [base+scale*index]
11241 [scale*index+disp]
11242 [base+index]
11243
11244 The first and last case may be avoidable by explicitly coding the zero in
11245 memory address, but I don't have AMD-K6 machine handy to check this
11246 theory. */
11247
11248 if (TARGET_K6
11249 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11250 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11251 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11252 cost += 10;
11253
11254 return cost;
11255 }
11256 \f
11257 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11258 this is used for to form addresses to local data when -fPIC is in
11259 use. */
11260
11261 static bool
11262 darwin_local_data_pic (rtx disp)
11263 {
11264 return (GET_CODE (disp) == UNSPEC
11265 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11266 }
11267
11268 /* Determine if a given RTX is a valid constant. We already know this
11269 satisfies CONSTANT_P. */
11270
11271 static bool
11272 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11273 {
11274 switch (GET_CODE (x))
11275 {
11276 case CONST:
11277 x = XEXP (x, 0);
11278
11279 if (GET_CODE (x) == PLUS)
11280 {
11281 if (!CONST_INT_P (XEXP (x, 1)))
11282 return false;
11283 x = XEXP (x, 0);
11284 }
11285
11286 if (TARGET_MACHO && darwin_local_data_pic (x))
11287 return true;
11288
11289 /* Only some unspecs are valid as "constants". */
11290 if (GET_CODE (x) == UNSPEC)
11291 switch (XINT (x, 1))
11292 {
11293 case UNSPEC_GOT:
11294 case UNSPEC_GOTOFF:
11295 case UNSPEC_PLTOFF:
11296 return TARGET_64BIT;
11297 case UNSPEC_TPOFF:
11298 case UNSPEC_NTPOFF:
11299 x = XVECEXP (x, 0, 0);
11300 return (GET_CODE (x) == SYMBOL_REF
11301 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11302 case UNSPEC_DTPOFF:
11303 x = XVECEXP (x, 0, 0);
11304 return (GET_CODE (x) == SYMBOL_REF
11305 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11306 default:
11307 return false;
11308 }
11309
11310 /* We must have drilled down to a symbol. */
11311 if (GET_CODE (x) == LABEL_REF)
11312 return true;
11313 if (GET_CODE (x) != SYMBOL_REF)
11314 return false;
11315 /* FALLTHRU */
11316
11317 case SYMBOL_REF:
11318 /* TLS symbols are never valid. */
11319 if (SYMBOL_REF_TLS_MODEL (x))
11320 return false;
11321
11322 /* DLLIMPORT symbols are never valid. */
11323 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11324 && SYMBOL_REF_DLLIMPORT_P (x))
11325 return false;
11326
11327 #if TARGET_MACHO
11328 /* mdynamic-no-pic */
11329 if (MACHO_DYNAMIC_NO_PIC_P)
11330 return machopic_symbol_defined_p (x);
11331 #endif
11332 break;
11333
11334 case CONST_DOUBLE:
11335 if (GET_MODE (x) == TImode
11336 && x != CONST0_RTX (TImode)
11337 && !TARGET_64BIT)
11338 return false;
11339 break;
11340
11341 case CONST_VECTOR:
11342 if (!standard_sse_constant_p (x))
11343 return false;
11344
11345 default:
11346 break;
11347 }
11348
11349 /* Otherwise we handle everything else in the move patterns. */
11350 return true;
11351 }
11352
11353 /* Determine if it's legal to put X into the constant pool. This
11354 is not possible for the address of thread-local symbols, which
11355 is checked above. */
11356
11357 static bool
11358 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11359 {
11360 /* We can always put integral constants and vectors in memory. */
11361 switch (GET_CODE (x))
11362 {
11363 case CONST_INT:
11364 case CONST_DOUBLE:
11365 case CONST_VECTOR:
11366 return false;
11367
11368 default:
11369 break;
11370 }
11371 return !ix86_legitimate_constant_p (mode, x);
11372 }
11373
11374
11375 /* Nonzero if the constant value X is a legitimate general operand
11376 when generating PIC code. It is given that flag_pic is on and
11377 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11378
11379 bool
11380 legitimate_pic_operand_p (rtx x)
11381 {
11382 rtx inner;
11383
11384 switch (GET_CODE (x))
11385 {
11386 case CONST:
11387 inner = XEXP (x, 0);
11388 if (GET_CODE (inner) == PLUS
11389 && CONST_INT_P (XEXP (inner, 1)))
11390 inner = XEXP (inner, 0);
11391
11392 /* Only some unspecs are valid as "constants". */
11393 if (GET_CODE (inner) == UNSPEC)
11394 switch (XINT (inner, 1))
11395 {
11396 case UNSPEC_GOT:
11397 case UNSPEC_GOTOFF:
11398 case UNSPEC_PLTOFF:
11399 return TARGET_64BIT;
11400 case UNSPEC_TPOFF:
11401 x = XVECEXP (inner, 0, 0);
11402 return (GET_CODE (x) == SYMBOL_REF
11403 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11404 case UNSPEC_MACHOPIC_OFFSET:
11405 return legitimate_pic_address_disp_p (x);
11406 default:
11407 return false;
11408 }
11409 /* FALLTHRU */
11410
11411 case SYMBOL_REF:
11412 case LABEL_REF:
11413 return legitimate_pic_address_disp_p (x);
11414
11415 default:
11416 return true;
11417 }
11418 }
11419
11420 /* Determine if a given CONST RTX is a valid memory displacement
11421 in PIC mode. */
11422
11423 bool
11424 legitimate_pic_address_disp_p (rtx disp)
11425 {
11426 bool saw_plus;
11427
11428 /* In 64bit mode we can allow direct addresses of symbols and labels
11429 when they are not dynamic symbols. */
11430 if (TARGET_64BIT)
11431 {
11432 rtx op0 = disp, op1;
11433
11434 switch (GET_CODE (disp))
11435 {
11436 case LABEL_REF:
11437 return true;
11438
11439 case CONST:
11440 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11441 break;
11442 op0 = XEXP (XEXP (disp, 0), 0);
11443 op1 = XEXP (XEXP (disp, 0), 1);
11444 if (!CONST_INT_P (op1)
11445 || INTVAL (op1) >= 16*1024*1024
11446 || INTVAL (op1) < -16*1024*1024)
11447 break;
11448 if (GET_CODE (op0) == LABEL_REF)
11449 return true;
11450 if (GET_CODE (op0) != SYMBOL_REF)
11451 break;
11452 /* FALLTHRU */
11453
11454 case SYMBOL_REF:
11455 /* TLS references should always be enclosed in UNSPEC. */
11456 if (SYMBOL_REF_TLS_MODEL (op0))
11457 return false;
11458 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11459 && ix86_cmodel != CM_LARGE_PIC)
11460 return true;
11461 break;
11462
11463 default:
11464 break;
11465 }
11466 }
11467 if (GET_CODE (disp) != CONST)
11468 return false;
11469 disp = XEXP (disp, 0);
11470
11471 if (TARGET_64BIT)
11472 {
11473 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11474 of GOT tables. We should not need these anyway. */
11475 if (GET_CODE (disp) != UNSPEC
11476 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11477 && XINT (disp, 1) != UNSPEC_GOTOFF
11478 && XINT (disp, 1) != UNSPEC_PCREL
11479 && XINT (disp, 1) != UNSPEC_PLTOFF))
11480 return false;
11481
11482 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11483 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11484 return false;
11485 return true;
11486 }
11487
11488 saw_plus = false;
11489 if (GET_CODE (disp) == PLUS)
11490 {
11491 if (!CONST_INT_P (XEXP (disp, 1)))
11492 return false;
11493 disp = XEXP (disp, 0);
11494 saw_plus = true;
11495 }
11496
11497 if (TARGET_MACHO && darwin_local_data_pic (disp))
11498 return true;
11499
11500 if (GET_CODE (disp) != UNSPEC)
11501 return false;
11502
11503 switch (XINT (disp, 1))
11504 {
11505 case UNSPEC_GOT:
11506 if (saw_plus)
11507 return false;
11508 /* We need to check for both symbols and labels because VxWorks loads
11509 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11510 details. */
11511 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11512 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11513 case UNSPEC_GOTOFF:
11514 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11515 While ABI specify also 32bit relocation but we don't produce it in
11516 small PIC model at all. */
11517 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11518 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11519 && !TARGET_64BIT)
11520 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11521 return false;
11522 case UNSPEC_GOTTPOFF:
11523 case UNSPEC_GOTNTPOFF:
11524 case UNSPEC_INDNTPOFF:
11525 if (saw_plus)
11526 return false;
11527 disp = XVECEXP (disp, 0, 0);
11528 return (GET_CODE (disp) == SYMBOL_REF
11529 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11530 case UNSPEC_NTPOFF:
11531 disp = XVECEXP (disp, 0, 0);
11532 return (GET_CODE (disp) == SYMBOL_REF
11533 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11534 case UNSPEC_DTPOFF:
11535 disp = XVECEXP (disp, 0, 0);
11536 return (GET_CODE (disp) == SYMBOL_REF
11537 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11538 }
11539
11540 return false;
11541 }
11542
11543 /* Recognizes RTL expressions that are valid memory addresses for an
11544 instruction. The MODE argument is the machine mode for the MEM
11545 expression that wants to use this address.
11546
11547 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11548 convert common non-canonical forms to canonical form so that they will
11549 be recognized. */
11550
11551 static bool
11552 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11553 rtx addr, bool strict)
11554 {
11555 struct ix86_address parts;
11556 rtx base, index, disp;
11557 HOST_WIDE_INT scale;
11558
11559 if (ix86_decompose_address (addr, &parts) <= 0)
11560 /* Decomposition failed. */
11561 return false;
11562
11563 base = parts.base;
11564 index = parts.index;
11565 disp = parts.disp;
11566 scale = parts.scale;
11567
11568 /* Validate base register.
11569
11570 Don't allow SUBREG's that span more than a word here. It can lead to spill
11571 failures when the base is one word out of a two word structure, which is
11572 represented internally as a DImode int. */
11573
11574 if (base)
11575 {
11576 rtx reg;
11577
11578 if (REG_P (base))
11579 reg = base;
11580 else if (GET_CODE (base) == SUBREG
11581 && REG_P (SUBREG_REG (base))
11582 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11583 <= UNITS_PER_WORD)
11584 reg = SUBREG_REG (base);
11585 else
11586 /* Base is not a register. */
11587 return false;
11588
11589 if (GET_MODE (base) != Pmode)
11590 /* Base is not in Pmode. */
11591 return false;
11592
11593 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11594 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11595 /* Base is not valid. */
11596 return false;
11597 }
11598
11599 /* Validate index register.
11600
11601 Don't allow SUBREG's that span more than a word here -- same as above. */
11602
11603 if (index)
11604 {
11605 rtx reg;
11606
11607 if (REG_P (index))
11608 reg = index;
11609 else if (GET_CODE (index) == SUBREG
11610 && REG_P (SUBREG_REG (index))
11611 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11612 <= UNITS_PER_WORD)
11613 reg = SUBREG_REG (index);
11614 else
11615 /* Index is not a register. */
11616 return false;
11617
11618 if (GET_MODE (index) != Pmode)
11619 /* Index is not in Pmode. */
11620 return false;
11621
11622 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11623 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11624 /* Index is not valid. */
11625 return false;
11626 }
11627
11628 /* Validate scale factor. */
11629 if (scale != 1)
11630 {
11631 if (!index)
11632 /* Scale without index. */
11633 return false;
11634
11635 if (scale != 2 && scale != 4 && scale != 8)
11636 /* Scale is not a valid multiplier. */
11637 return false;
11638 }
11639
11640 /* Validate displacement. */
11641 if (disp)
11642 {
11643 if (GET_CODE (disp) == CONST
11644 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11645 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11646 switch (XINT (XEXP (disp, 0), 1))
11647 {
11648 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11649 used. While ABI specify also 32bit relocations, we don't produce
11650 them at all and use IP relative instead. */
11651 case UNSPEC_GOT:
11652 case UNSPEC_GOTOFF:
11653 gcc_assert (flag_pic);
11654 if (!TARGET_64BIT)
11655 goto is_legitimate_pic;
11656
11657 /* 64bit address unspec. */
11658 return false;
11659
11660 case UNSPEC_GOTPCREL:
11661 case UNSPEC_PCREL:
11662 gcc_assert (flag_pic);
11663 goto is_legitimate_pic;
11664
11665 case UNSPEC_GOTTPOFF:
11666 case UNSPEC_GOTNTPOFF:
11667 case UNSPEC_INDNTPOFF:
11668 case UNSPEC_NTPOFF:
11669 case UNSPEC_DTPOFF:
11670 break;
11671
11672 case UNSPEC_STACK_CHECK:
11673 gcc_assert (flag_split_stack);
11674 break;
11675
11676 default:
11677 /* Invalid address unspec. */
11678 return false;
11679 }
11680
11681 else if (SYMBOLIC_CONST (disp)
11682 && (flag_pic
11683 || (TARGET_MACHO
11684 #if TARGET_MACHO
11685 && MACHOPIC_INDIRECT
11686 && !machopic_operand_p (disp)
11687 #endif
11688 )))
11689 {
11690
11691 is_legitimate_pic:
11692 if (TARGET_64BIT && (index || base))
11693 {
11694 /* foo@dtpoff(%rX) is ok. */
11695 if (GET_CODE (disp) != CONST
11696 || GET_CODE (XEXP (disp, 0)) != PLUS
11697 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11698 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11699 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11700 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11701 /* Non-constant pic memory reference. */
11702 return false;
11703 }
11704 else if ((!TARGET_MACHO || flag_pic)
11705 && ! legitimate_pic_address_disp_p (disp))
11706 /* Displacement is an invalid pic construct. */
11707 return false;
11708 #if TARGET_MACHO
11709 else if (MACHO_DYNAMIC_NO_PIC_P
11710 && !ix86_legitimate_constant_p (Pmode, disp))
11711 /* displacment must be referenced via non_lazy_pointer */
11712 return false;
11713 #endif
11714
11715 /* This code used to verify that a symbolic pic displacement
11716 includes the pic_offset_table_rtx register.
11717
11718 While this is good idea, unfortunately these constructs may
11719 be created by "adds using lea" optimization for incorrect
11720 code like:
11721
11722 int a;
11723 int foo(int i)
11724 {
11725 return *(&a+i);
11726 }
11727
11728 This code is nonsensical, but results in addressing
11729 GOT table with pic_offset_table_rtx base. We can't
11730 just refuse it easily, since it gets matched by
11731 "addsi3" pattern, that later gets split to lea in the
11732 case output register differs from input. While this
11733 can be handled by separate addsi pattern for this case
11734 that never results in lea, this seems to be easier and
11735 correct fix for crash to disable this test. */
11736 }
11737 else if (GET_CODE (disp) != LABEL_REF
11738 && !CONST_INT_P (disp)
11739 && (GET_CODE (disp) != CONST
11740 || !ix86_legitimate_constant_p (Pmode, disp))
11741 && (GET_CODE (disp) != SYMBOL_REF
11742 || !ix86_legitimate_constant_p (Pmode, disp)))
11743 /* Displacement is not constant. */
11744 return false;
11745 else if (TARGET_64BIT
11746 && !x86_64_immediate_operand (disp, VOIDmode))
11747 /* Displacement is out of range. */
11748 return false;
11749 }
11750
11751 /* Everything looks valid. */
11752 return true;
11753 }
11754
11755 /* Determine if a given RTX is a valid constant address. */
11756
11757 bool
11758 constant_address_p (rtx x)
11759 {
11760 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11761 }
11762 \f
11763 /* Return a unique alias set for the GOT. */
11764
11765 static alias_set_type
11766 ix86_GOT_alias_set (void)
11767 {
11768 static alias_set_type set = -1;
11769 if (set == -1)
11770 set = new_alias_set ();
11771 return set;
11772 }
11773
11774 /* Return a legitimate reference for ORIG (an address) using the
11775 register REG. If REG is 0, a new pseudo is generated.
11776
11777 There are two types of references that must be handled:
11778
11779 1. Global data references must load the address from the GOT, via
11780 the PIC reg. An insn is emitted to do this load, and the reg is
11781 returned.
11782
11783 2. Static data references, constant pool addresses, and code labels
11784 compute the address as an offset from the GOT, whose base is in
11785 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11786 differentiate them from global data objects. The returned
11787 address is the PIC reg + an unspec constant.
11788
11789 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11790 reg also appears in the address. */
11791
11792 static rtx
11793 legitimize_pic_address (rtx orig, rtx reg)
11794 {
11795 rtx addr = orig;
11796 rtx new_rtx = orig;
11797 rtx base;
11798
11799 #if TARGET_MACHO
11800 if (TARGET_MACHO && !TARGET_64BIT)
11801 {
11802 if (reg == 0)
11803 reg = gen_reg_rtx (Pmode);
11804 /* Use the generic Mach-O PIC machinery. */
11805 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11806 }
11807 #endif
11808
11809 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11810 new_rtx = addr;
11811 else if (TARGET_64BIT
11812 && ix86_cmodel != CM_SMALL_PIC
11813 && gotoff_operand (addr, Pmode))
11814 {
11815 rtx tmpreg;
11816 /* This symbol may be referenced via a displacement from the PIC
11817 base address (@GOTOFF). */
11818
11819 if (reload_in_progress)
11820 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11821 if (GET_CODE (addr) == CONST)
11822 addr = XEXP (addr, 0);
11823 if (GET_CODE (addr) == PLUS)
11824 {
11825 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11826 UNSPEC_GOTOFF);
11827 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11828 }
11829 else
11830 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11831 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11832 if (!reg)
11833 tmpreg = gen_reg_rtx (Pmode);
11834 else
11835 tmpreg = reg;
11836 emit_move_insn (tmpreg, new_rtx);
11837
11838 if (reg != 0)
11839 {
11840 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11841 tmpreg, 1, OPTAB_DIRECT);
11842 new_rtx = reg;
11843 }
11844 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11845 }
11846 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11847 {
11848 /* This symbol may be referenced via a displacement from the PIC
11849 base address (@GOTOFF). */
11850
11851 if (reload_in_progress)
11852 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11853 if (GET_CODE (addr) == CONST)
11854 addr = XEXP (addr, 0);
11855 if (GET_CODE (addr) == PLUS)
11856 {
11857 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11858 UNSPEC_GOTOFF);
11859 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11860 }
11861 else
11862 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11863 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11864 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11865
11866 if (reg != 0)
11867 {
11868 emit_move_insn (reg, new_rtx);
11869 new_rtx = reg;
11870 }
11871 }
11872 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11873 /* We can't use @GOTOFF for text labels on VxWorks;
11874 see gotoff_operand. */
11875 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11876 {
11877 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11878 {
11879 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11880 return legitimize_dllimport_symbol (addr, true);
11881 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11882 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11883 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11884 {
11885 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11886 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11887 }
11888 }
11889
11890 /* For x64 PE-COFF there is no GOT table. So we use address
11891 directly. */
11892 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11893 {
11894 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11895 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11896
11897 if (reg == 0)
11898 reg = gen_reg_rtx (Pmode);
11899 emit_move_insn (reg, new_rtx);
11900 new_rtx = reg;
11901 }
11902 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11903 {
11904 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11905 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11906 new_rtx = gen_const_mem (Pmode, new_rtx);
11907 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11908
11909 if (reg == 0)
11910 reg = gen_reg_rtx (Pmode);
11911 /* Use directly gen_movsi, otherwise the address is loaded
11912 into register for CSE. We don't want to CSE this addresses,
11913 instead we CSE addresses from the GOT table, so skip this. */
11914 emit_insn (gen_movsi (reg, new_rtx));
11915 new_rtx = reg;
11916 }
11917 else
11918 {
11919 /* This symbol must be referenced via a load from the
11920 Global Offset Table (@GOT). */
11921
11922 if (reload_in_progress)
11923 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11924 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11925 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11926 if (TARGET_64BIT)
11927 new_rtx = force_reg (Pmode, new_rtx);
11928 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11929 new_rtx = gen_const_mem (Pmode, new_rtx);
11930 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11931
11932 if (reg == 0)
11933 reg = gen_reg_rtx (Pmode);
11934 emit_move_insn (reg, new_rtx);
11935 new_rtx = reg;
11936 }
11937 }
11938 else
11939 {
11940 if (CONST_INT_P (addr)
11941 && !x86_64_immediate_operand (addr, VOIDmode))
11942 {
11943 if (reg)
11944 {
11945 emit_move_insn (reg, addr);
11946 new_rtx = reg;
11947 }
11948 else
11949 new_rtx = force_reg (Pmode, addr);
11950 }
11951 else if (GET_CODE (addr) == CONST)
11952 {
11953 addr = XEXP (addr, 0);
11954
11955 /* We must match stuff we generate before. Assume the only
11956 unspecs that can get here are ours. Not that we could do
11957 anything with them anyway.... */
11958 if (GET_CODE (addr) == UNSPEC
11959 || (GET_CODE (addr) == PLUS
11960 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11961 return orig;
11962 gcc_assert (GET_CODE (addr) == PLUS);
11963 }
11964 if (GET_CODE (addr) == PLUS)
11965 {
11966 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11967
11968 /* Check first to see if this is a constant offset from a @GOTOFF
11969 symbol reference. */
11970 if (gotoff_operand (op0, Pmode)
11971 && CONST_INT_P (op1))
11972 {
11973 if (!TARGET_64BIT)
11974 {
11975 if (reload_in_progress)
11976 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11977 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11978 UNSPEC_GOTOFF);
11979 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11980 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11981 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11982
11983 if (reg != 0)
11984 {
11985 emit_move_insn (reg, new_rtx);
11986 new_rtx = reg;
11987 }
11988 }
11989 else
11990 {
11991 if (INTVAL (op1) < -16*1024*1024
11992 || INTVAL (op1) >= 16*1024*1024)
11993 {
11994 if (!x86_64_immediate_operand (op1, Pmode))
11995 op1 = force_reg (Pmode, op1);
11996 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11997 }
11998 }
11999 }
12000 else
12001 {
12002 base = legitimize_pic_address (XEXP (addr, 0), reg);
12003 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12004 base == reg ? NULL_RTX : reg);
12005
12006 if (CONST_INT_P (new_rtx))
12007 new_rtx = plus_constant (base, INTVAL (new_rtx));
12008 else
12009 {
12010 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12011 {
12012 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12013 new_rtx = XEXP (new_rtx, 1);
12014 }
12015 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12016 }
12017 }
12018 }
12019 }
12020 return new_rtx;
12021 }
12022 \f
12023 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12024
12025 static rtx
12026 get_thread_pointer (bool to_reg)
12027 {
12028 rtx tp, reg, insn;
12029
12030 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12031 if (!to_reg)
12032 return tp;
12033
12034 reg = gen_reg_rtx (Pmode);
12035 insn = gen_rtx_SET (VOIDmode, reg, tp);
12036 insn = emit_insn (insn);
12037
12038 return reg;
12039 }
12040
12041 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12042
12043 static GTY(()) rtx ix86_tls_symbol;
12044
12045 static rtx
12046 ix86_tls_get_addr (void)
12047 {
12048 if (!ix86_tls_symbol)
12049 {
12050 const char *sym
12051 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12052 ? "___tls_get_addr" : "__tls_get_addr");
12053
12054 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12055 }
12056
12057 return ix86_tls_symbol;
12058 }
12059
12060 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12061
12062 static GTY(()) rtx ix86_tls_module_base_symbol;
12063
12064 rtx
12065 ix86_tls_module_base (void)
12066 {
12067 if (!ix86_tls_module_base_symbol)
12068 {
12069 ix86_tls_module_base_symbol
12070 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12071
12072 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12073 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12074 }
12075
12076 return ix86_tls_module_base_symbol;
12077 }
12078
12079 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12080 false if we expect this to be used for a memory address and true if
12081 we expect to load the address into a register. */
12082
12083 static rtx
12084 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12085 {
12086 rtx dest, base, off;
12087 rtx pic = NULL_RTX, tp = NULL_RTX;
12088 int type;
12089
12090 switch (model)
12091 {
12092 case TLS_MODEL_GLOBAL_DYNAMIC:
12093 dest = gen_reg_rtx (Pmode);
12094
12095 if (!TARGET_64BIT)
12096 {
12097 if (flag_pic)
12098 pic = pic_offset_table_rtx;
12099 else
12100 {
12101 pic = gen_reg_rtx (Pmode);
12102 emit_insn (gen_set_got (pic));
12103 }
12104 }
12105
12106 if (TARGET_GNU2_TLS)
12107 {
12108 if (TARGET_64BIT)
12109 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12110 else
12111 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12112
12113 tp = get_thread_pointer (true);
12114 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12115
12116 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12117 }
12118 else
12119 {
12120 rtx caddr = ix86_tls_get_addr ();
12121
12122 if (TARGET_64BIT)
12123 {
12124 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12125
12126 start_sequence ();
12127 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12128 insns = get_insns ();
12129 end_sequence ();
12130
12131 RTL_CONST_CALL_P (insns) = 1;
12132 emit_libcall_block (insns, dest, rax, x);
12133 }
12134 else
12135 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12136 }
12137 break;
12138
12139 case TLS_MODEL_LOCAL_DYNAMIC:
12140 base = gen_reg_rtx (Pmode);
12141
12142 if (!TARGET_64BIT)
12143 {
12144 if (flag_pic)
12145 pic = pic_offset_table_rtx;
12146 else
12147 {
12148 pic = gen_reg_rtx (Pmode);
12149 emit_insn (gen_set_got (pic));
12150 }
12151 }
12152
12153 if (TARGET_GNU2_TLS)
12154 {
12155 rtx tmp = ix86_tls_module_base ();
12156
12157 if (TARGET_64BIT)
12158 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12159 else
12160 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12161
12162 tp = get_thread_pointer (true);
12163 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12164 gen_rtx_MINUS (Pmode, tmp, tp));
12165 }
12166 else
12167 {
12168 rtx caddr = ix86_tls_get_addr ();
12169
12170 if (TARGET_64BIT)
12171 {
12172 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12173
12174 start_sequence ();
12175 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12176 insns = get_insns ();
12177 end_sequence ();
12178
12179 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12180 share the LD_BASE result with other LD model accesses. */
12181 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12182 UNSPEC_TLS_LD_BASE);
12183
12184 RTL_CONST_CALL_P (insns) = 1;
12185 emit_libcall_block (insns, base, rax, eqv);
12186 }
12187 else
12188 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12189 }
12190
12191 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12192 off = gen_rtx_CONST (Pmode, off);
12193
12194 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12195
12196 if (TARGET_GNU2_TLS)
12197 {
12198 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12199
12200 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12201 }
12202 break;
12203
12204 case TLS_MODEL_INITIAL_EXEC:
12205 if (TARGET_64BIT)
12206 {
12207 if (TARGET_SUN_TLS)
12208 {
12209 /* The Sun linker took the AMD64 TLS spec literally
12210 and can only handle %rax as destination of the
12211 initial executable code sequence. */
12212
12213 dest = gen_reg_rtx (Pmode);
12214 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12215 return dest;
12216 }
12217
12218 pic = NULL;
12219 type = UNSPEC_GOTNTPOFF;
12220 }
12221 else if (flag_pic)
12222 {
12223 if (reload_in_progress)
12224 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12225 pic = pic_offset_table_rtx;
12226 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12227 }
12228 else if (!TARGET_ANY_GNU_TLS)
12229 {
12230 pic = gen_reg_rtx (Pmode);
12231 emit_insn (gen_set_got (pic));
12232 type = UNSPEC_GOTTPOFF;
12233 }
12234 else
12235 {
12236 pic = NULL;
12237 type = UNSPEC_INDNTPOFF;
12238 }
12239
12240 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12241 off = gen_rtx_CONST (Pmode, off);
12242 if (pic)
12243 off = gen_rtx_PLUS (Pmode, pic, off);
12244 off = gen_const_mem (Pmode, off);
12245 set_mem_alias_set (off, ix86_GOT_alias_set ());
12246
12247 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12248 {
12249 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12250 off = force_reg (Pmode, off);
12251 return gen_rtx_PLUS (Pmode, base, off);
12252 }
12253 else
12254 {
12255 base = get_thread_pointer (true);
12256 dest = gen_reg_rtx (Pmode);
12257 emit_insn (gen_subsi3 (dest, base, off));
12258 }
12259 break;
12260
12261 case TLS_MODEL_LOCAL_EXEC:
12262 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12263 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12264 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12265 off = gen_rtx_CONST (Pmode, off);
12266
12267 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12268 {
12269 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12270 return gen_rtx_PLUS (Pmode, base, off);
12271 }
12272 else
12273 {
12274 base = get_thread_pointer (true);
12275 dest = gen_reg_rtx (Pmode);
12276 emit_insn (gen_subsi3 (dest, base, off));
12277 }
12278 break;
12279
12280 default:
12281 gcc_unreachable ();
12282 }
12283
12284 return dest;
12285 }
12286
12287 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12288 to symbol DECL. */
12289
12290 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12291 htab_t dllimport_map;
12292
12293 static tree
12294 get_dllimport_decl (tree decl)
12295 {
12296 struct tree_map *h, in;
12297 void **loc;
12298 const char *name;
12299 const char *prefix;
12300 size_t namelen, prefixlen;
12301 char *imp_name;
12302 tree to;
12303 rtx rtl;
12304
12305 if (!dllimport_map)
12306 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12307
12308 in.hash = htab_hash_pointer (decl);
12309 in.base.from = decl;
12310 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12311 h = (struct tree_map *) *loc;
12312 if (h)
12313 return h->to;
12314
12315 *loc = h = ggc_alloc_tree_map ();
12316 h->hash = in.hash;
12317 h->base.from = decl;
12318 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12319 VAR_DECL, NULL, ptr_type_node);
12320 DECL_ARTIFICIAL (to) = 1;
12321 DECL_IGNORED_P (to) = 1;
12322 DECL_EXTERNAL (to) = 1;
12323 TREE_READONLY (to) = 1;
12324
12325 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12326 name = targetm.strip_name_encoding (name);
12327 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12328 ? "*__imp_" : "*__imp__";
12329 namelen = strlen (name);
12330 prefixlen = strlen (prefix);
12331 imp_name = (char *) alloca (namelen + prefixlen + 1);
12332 memcpy (imp_name, prefix, prefixlen);
12333 memcpy (imp_name + prefixlen, name, namelen + 1);
12334
12335 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12336 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12337 SET_SYMBOL_REF_DECL (rtl, to);
12338 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12339
12340 rtl = gen_const_mem (Pmode, rtl);
12341 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12342
12343 SET_DECL_RTL (to, rtl);
12344 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12345
12346 return to;
12347 }
12348
12349 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12350 true if we require the result be a register. */
12351
12352 static rtx
12353 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12354 {
12355 tree imp_decl;
12356 rtx x;
12357
12358 gcc_assert (SYMBOL_REF_DECL (symbol));
12359 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12360
12361 x = DECL_RTL (imp_decl);
12362 if (want_reg)
12363 x = force_reg (Pmode, x);
12364 return x;
12365 }
12366
12367 /* Try machine-dependent ways of modifying an illegitimate address
12368 to be legitimate. If we find one, return the new, valid address.
12369 This macro is used in only one place: `memory_address' in explow.c.
12370
12371 OLDX is the address as it was before break_out_memory_refs was called.
12372 In some cases it is useful to look at this to decide what needs to be done.
12373
12374 It is always safe for this macro to do nothing. It exists to recognize
12375 opportunities to optimize the output.
12376
12377 For the 80386, we handle X+REG by loading X into a register R and
12378 using R+REG. R will go in a general reg and indexing will be used.
12379 However, if REG is a broken-out memory address or multiplication,
12380 nothing needs to be done because REG can certainly go in a general reg.
12381
12382 When -fpic is used, special handling is needed for symbolic references.
12383 See comments by legitimize_pic_address in i386.c for details. */
12384
12385 static rtx
12386 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12387 enum machine_mode mode)
12388 {
12389 int changed = 0;
12390 unsigned log;
12391
12392 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12393 if (log)
12394 return legitimize_tls_address (x, (enum tls_model) log, false);
12395 if (GET_CODE (x) == CONST
12396 && GET_CODE (XEXP (x, 0)) == PLUS
12397 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12398 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12399 {
12400 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12401 (enum tls_model) log, false);
12402 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12403 }
12404
12405 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12406 {
12407 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12408 return legitimize_dllimport_symbol (x, true);
12409 if (GET_CODE (x) == CONST
12410 && GET_CODE (XEXP (x, 0)) == PLUS
12411 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12412 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12413 {
12414 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12415 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12416 }
12417 }
12418
12419 if (flag_pic && SYMBOLIC_CONST (x))
12420 return legitimize_pic_address (x, 0);
12421
12422 #if TARGET_MACHO
12423 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12424 return machopic_indirect_data_reference (x, 0);
12425 #endif
12426
12427 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12428 if (GET_CODE (x) == ASHIFT
12429 && CONST_INT_P (XEXP (x, 1))
12430 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12431 {
12432 changed = 1;
12433 log = INTVAL (XEXP (x, 1));
12434 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12435 GEN_INT (1 << log));
12436 }
12437
12438 if (GET_CODE (x) == PLUS)
12439 {
12440 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12441
12442 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12443 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12444 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12445 {
12446 changed = 1;
12447 log = INTVAL (XEXP (XEXP (x, 0), 1));
12448 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12449 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12450 GEN_INT (1 << log));
12451 }
12452
12453 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12454 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12455 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12456 {
12457 changed = 1;
12458 log = INTVAL (XEXP (XEXP (x, 1), 1));
12459 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12460 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12461 GEN_INT (1 << log));
12462 }
12463
12464 /* Put multiply first if it isn't already. */
12465 if (GET_CODE (XEXP (x, 1)) == MULT)
12466 {
12467 rtx tmp = XEXP (x, 0);
12468 XEXP (x, 0) = XEXP (x, 1);
12469 XEXP (x, 1) = tmp;
12470 changed = 1;
12471 }
12472
12473 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12474 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12475 created by virtual register instantiation, register elimination, and
12476 similar optimizations. */
12477 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12478 {
12479 changed = 1;
12480 x = gen_rtx_PLUS (Pmode,
12481 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12482 XEXP (XEXP (x, 1), 0)),
12483 XEXP (XEXP (x, 1), 1));
12484 }
12485
12486 /* Canonicalize
12487 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12488 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12489 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12490 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12491 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12492 && CONSTANT_P (XEXP (x, 1)))
12493 {
12494 rtx constant;
12495 rtx other = NULL_RTX;
12496
12497 if (CONST_INT_P (XEXP (x, 1)))
12498 {
12499 constant = XEXP (x, 1);
12500 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12501 }
12502 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12503 {
12504 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12505 other = XEXP (x, 1);
12506 }
12507 else
12508 constant = 0;
12509
12510 if (constant)
12511 {
12512 changed = 1;
12513 x = gen_rtx_PLUS (Pmode,
12514 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12515 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12516 plus_constant (other, INTVAL (constant)));
12517 }
12518 }
12519
12520 if (changed && ix86_legitimate_address_p (mode, x, false))
12521 return x;
12522
12523 if (GET_CODE (XEXP (x, 0)) == MULT)
12524 {
12525 changed = 1;
12526 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12527 }
12528
12529 if (GET_CODE (XEXP (x, 1)) == MULT)
12530 {
12531 changed = 1;
12532 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12533 }
12534
12535 if (changed
12536 && REG_P (XEXP (x, 1))
12537 && REG_P (XEXP (x, 0)))
12538 return x;
12539
12540 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12541 {
12542 changed = 1;
12543 x = legitimize_pic_address (x, 0);
12544 }
12545
12546 if (changed && ix86_legitimate_address_p (mode, x, false))
12547 return x;
12548
12549 if (REG_P (XEXP (x, 0)))
12550 {
12551 rtx temp = gen_reg_rtx (Pmode);
12552 rtx val = force_operand (XEXP (x, 1), temp);
12553 if (val != temp)
12554 emit_move_insn (temp, val);
12555
12556 XEXP (x, 1) = temp;
12557 return x;
12558 }
12559
12560 else if (REG_P (XEXP (x, 1)))
12561 {
12562 rtx temp = gen_reg_rtx (Pmode);
12563 rtx val = force_operand (XEXP (x, 0), temp);
12564 if (val != temp)
12565 emit_move_insn (temp, val);
12566
12567 XEXP (x, 0) = temp;
12568 return x;
12569 }
12570 }
12571
12572 return x;
12573 }
12574 \f
12575 /* Print an integer constant expression in assembler syntax. Addition
12576 and subtraction are the only arithmetic that may appear in these
12577 expressions. FILE is the stdio stream to write to, X is the rtx, and
12578 CODE is the operand print code from the output string. */
12579
12580 static void
12581 output_pic_addr_const (FILE *file, rtx x, int code)
12582 {
12583 char buf[256];
12584
12585 switch (GET_CODE (x))
12586 {
12587 case PC:
12588 gcc_assert (flag_pic);
12589 putc ('.', file);
12590 break;
12591
12592 case SYMBOL_REF:
12593 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12594 output_addr_const (file, x);
12595 else
12596 {
12597 const char *name = XSTR (x, 0);
12598
12599 /* Mark the decl as referenced so that cgraph will
12600 output the function. */
12601 if (SYMBOL_REF_DECL (x))
12602 mark_decl_referenced (SYMBOL_REF_DECL (x));
12603
12604 #if TARGET_MACHO
12605 if (MACHOPIC_INDIRECT
12606 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12607 name = machopic_indirection_name (x, /*stub_p=*/true);
12608 #endif
12609 assemble_name (file, name);
12610 }
12611 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12612 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12613 fputs ("@PLT", file);
12614 break;
12615
12616 case LABEL_REF:
12617 x = XEXP (x, 0);
12618 /* FALLTHRU */
12619 case CODE_LABEL:
12620 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12621 assemble_name (asm_out_file, buf);
12622 break;
12623
12624 case CONST_INT:
12625 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12626 break;
12627
12628 case CONST:
12629 /* This used to output parentheses around the expression,
12630 but that does not work on the 386 (either ATT or BSD assembler). */
12631 output_pic_addr_const (file, XEXP (x, 0), code);
12632 break;
12633
12634 case CONST_DOUBLE:
12635 if (GET_MODE (x) == VOIDmode)
12636 {
12637 /* We can use %d if the number is <32 bits and positive. */
12638 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12639 fprintf (file, "0x%lx%08lx",
12640 (unsigned long) CONST_DOUBLE_HIGH (x),
12641 (unsigned long) CONST_DOUBLE_LOW (x));
12642 else
12643 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12644 }
12645 else
12646 /* We can't handle floating point constants;
12647 TARGET_PRINT_OPERAND must handle them. */
12648 output_operand_lossage ("floating constant misused");
12649 break;
12650
12651 case PLUS:
12652 /* Some assemblers need integer constants to appear first. */
12653 if (CONST_INT_P (XEXP (x, 0)))
12654 {
12655 output_pic_addr_const (file, XEXP (x, 0), code);
12656 putc ('+', file);
12657 output_pic_addr_const (file, XEXP (x, 1), code);
12658 }
12659 else
12660 {
12661 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12662 output_pic_addr_const (file, XEXP (x, 1), code);
12663 putc ('+', file);
12664 output_pic_addr_const (file, XEXP (x, 0), code);
12665 }
12666 break;
12667
12668 case MINUS:
12669 if (!TARGET_MACHO)
12670 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12671 output_pic_addr_const (file, XEXP (x, 0), code);
12672 putc ('-', file);
12673 output_pic_addr_const (file, XEXP (x, 1), code);
12674 if (!TARGET_MACHO)
12675 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12676 break;
12677
12678 case UNSPEC:
12679 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12680 {
12681 bool f = i386_asm_output_addr_const_extra (file, x);
12682 gcc_assert (f);
12683 break;
12684 }
12685
12686 gcc_assert (XVECLEN (x, 0) == 1);
12687 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12688 switch (XINT (x, 1))
12689 {
12690 case UNSPEC_GOT:
12691 fputs ("@GOT", file);
12692 break;
12693 case UNSPEC_GOTOFF:
12694 fputs ("@GOTOFF", file);
12695 break;
12696 case UNSPEC_PLTOFF:
12697 fputs ("@PLTOFF", file);
12698 break;
12699 case UNSPEC_PCREL:
12700 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12701 "(%rip)" : "[rip]", file);
12702 break;
12703 case UNSPEC_GOTPCREL:
12704 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12705 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12706 break;
12707 case UNSPEC_GOTTPOFF:
12708 /* FIXME: This might be @TPOFF in Sun ld too. */
12709 fputs ("@gottpoff", file);
12710 break;
12711 case UNSPEC_TPOFF:
12712 fputs ("@tpoff", file);
12713 break;
12714 case UNSPEC_NTPOFF:
12715 if (TARGET_64BIT)
12716 fputs ("@tpoff", file);
12717 else
12718 fputs ("@ntpoff", file);
12719 break;
12720 case UNSPEC_DTPOFF:
12721 fputs ("@dtpoff", file);
12722 break;
12723 case UNSPEC_GOTNTPOFF:
12724 if (TARGET_64BIT)
12725 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12726 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12727 else
12728 fputs ("@gotntpoff", file);
12729 break;
12730 case UNSPEC_INDNTPOFF:
12731 fputs ("@indntpoff", file);
12732 break;
12733 #if TARGET_MACHO
12734 case UNSPEC_MACHOPIC_OFFSET:
12735 putc ('-', file);
12736 machopic_output_function_base_name (file);
12737 break;
12738 #endif
12739 default:
12740 output_operand_lossage ("invalid UNSPEC as operand");
12741 break;
12742 }
12743 break;
12744
12745 default:
12746 output_operand_lossage ("invalid expression as operand");
12747 }
12748 }
12749
12750 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12751 We need to emit DTP-relative relocations. */
12752
12753 static void ATTRIBUTE_UNUSED
12754 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12755 {
12756 fputs (ASM_LONG, file);
12757 output_addr_const (file, x);
12758 fputs ("@dtpoff", file);
12759 switch (size)
12760 {
12761 case 4:
12762 break;
12763 case 8:
12764 fputs (", 0", file);
12765 break;
12766 default:
12767 gcc_unreachable ();
12768 }
12769 }
12770
12771 /* Return true if X is a representation of the PIC register. This copes
12772 with calls from ix86_find_base_term, where the register might have
12773 been replaced by a cselib value. */
12774
12775 static bool
12776 ix86_pic_register_p (rtx x)
12777 {
12778 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12779 return (pic_offset_table_rtx
12780 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12781 else
12782 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12783 }
12784
12785 /* Helper function for ix86_delegitimize_address.
12786 Attempt to delegitimize TLS local-exec accesses. */
12787
12788 static rtx
12789 ix86_delegitimize_tls_address (rtx orig_x)
12790 {
12791 rtx x = orig_x, unspec;
12792 struct ix86_address addr;
12793
12794 if (!TARGET_TLS_DIRECT_SEG_REFS)
12795 return orig_x;
12796 if (MEM_P (x))
12797 x = XEXP (x, 0);
12798 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12799 return orig_x;
12800 if (ix86_decompose_address (x, &addr) == 0
12801 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12802 || addr.disp == NULL_RTX
12803 || GET_CODE (addr.disp) != CONST)
12804 return orig_x;
12805 unspec = XEXP (addr.disp, 0);
12806 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12807 unspec = XEXP (unspec, 0);
12808 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12809 return orig_x;
12810 x = XVECEXP (unspec, 0, 0);
12811 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12812 if (unspec != XEXP (addr.disp, 0))
12813 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12814 if (addr.index)
12815 {
12816 rtx idx = addr.index;
12817 if (addr.scale != 1)
12818 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12819 x = gen_rtx_PLUS (Pmode, idx, x);
12820 }
12821 if (addr.base)
12822 x = gen_rtx_PLUS (Pmode, addr.base, x);
12823 if (MEM_P (orig_x))
12824 x = replace_equiv_address_nv (orig_x, x);
12825 return x;
12826 }
12827
12828 /* In the name of slightly smaller debug output, and to cater to
12829 general assembler lossage, recognize PIC+GOTOFF and turn it back
12830 into a direct symbol reference.
12831
12832 On Darwin, this is necessary to avoid a crash, because Darwin
12833 has a different PIC label for each routine but the DWARF debugging
12834 information is not associated with any particular routine, so it's
12835 necessary to remove references to the PIC label from RTL stored by
12836 the DWARF output code. */
12837
12838 static rtx
12839 ix86_delegitimize_address (rtx x)
12840 {
12841 rtx orig_x = delegitimize_mem_from_attrs (x);
12842 /* addend is NULL or some rtx if x is something+GOTOFF where
12843 something doesn't include the PIC register. */
12844 rtx addend = NULL_RTX;
12845 /* reg_addend is NULL or a multiple of some register. */
12846 rtx reg_addend = NULL_RTX;
12847 /* const_addend is NULL or a const_int. */
12848 rtx const_addend = NULL_RTX;
12849 /* This is the result, or NULL. */
12850 rtx result = NULL_RTX;
12851
12852 x = orig_x;
12853
12854 if (MEM_P (x))
12855 x = XEXP (x, 0);
12856
12857 if (TARGET_64BIT)
12858 {
12859 if (GET_CODE (x) != CONST
12860 || GET_CODE (XEXP (x, 0)) != UNSPEC
12861 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12862 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
12863 || !MEM_P (orig_x))
12864 return ix86_delegitimize_tls_address (orig_x);
12865 x = XVECEXP (XEXP (x, 0), 0, 0);
12866 if (GET_MODE (orig_x) != Pmode)
12867 {
12868 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12869 if (x == NULL_RTX)
12870 return orig_x;
12871 }
12872 return x;
12873 }
12874
12875 if (GET_CODE (x) != PLUS
12876 || GET_CODE (XEXP (x, 1)) != CONST)
12877 return ix86_delegitimize_tls_address (orig_x);
12878
12879 if (ix86_pic_register_p (XEXP (x, 0)))
12880 /* %ebx + GOT/GOTOFF */
12881 ;
12882 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12883 {
12884 /* %ebx + %reg * scale + GOT/GOTOFF */
12885 reg_addend = XEXP (x, 0);
12886 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12887 reg_addend = XEXP (reg_addend, 1);
12888 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12889 reg_addend = XEXP (reg_addend, 0);
12890 else
12891 {
12892 reg_addend = NULL_RTX;
12893 addend = XEXP (x, 0);
12894 }
12895 }
12896 else
12897 addend = XEXP (x, 0);
12898
12899 x = XEXP (XEXP (x, 1), 0);
12900 if (GET_CODE (x) == PLUS
12901 && CONST_INT_P (XEXP (x, 1)))
12902 {
12903 const_addend = XEXP (x, 1);
12904 x = XEXP (x, 0);
12905 }
12906
12907 if (GET_CODE (x) == UNSPEC
12908 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12909 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12910 result = XVECEXP (x, 0, 0);
12911
12912 if (TARGET_MACHO && darwin_local_data_pic (x)
12913 && !MEM_P (orig_x))
12914 result = XVECEXP (x, 0, 0);
12915
12916 if (! result)
12917 return ix86_delegitimize_tls_address (orig_x);
12918
12919 if (const_addend)
12920 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12921 if (reg_addend)
12922 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12923 if (addend)
12924 {
12925 /* If the rest of original X doesn't involve the PIC register, add
12926 addend and subtract pic_offset_table_rtx. This can happen e.g.
12927 for code like:
12928 leal (%ebx, %ecx, 4), %ecx
12929 ...
12930 movl foo@GOTOFF(%ecx), %edx
12931 in which case we return (%ecx - %ebx) + foo. */
12932 if (pic_offset_table_rtx)
12933 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12934 pic_offset_table_rtx),
12935 result);
12936 else
12937 return orig_x;
12938 }
12939 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12940 {
12941 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12942 if (result == NULL_RTX)
12943 return orig_x;
12944 }
12945 return result;
12946 }
12947
12948 /* If X is a machine specific address (i.e. a symbol or label being
12949 referenced as a displacement from the GOT implemented using an
12950 UNSPEC), then return the base term. Otherwise return X. */
12951
12952 rtx
12953 ix86_find_base_term (rtx x)
12954 {
12955 rtx term;
12956
12957 if (TARGET_64BIT)
12958 {
12959 if (GET_CODE (x) != CONST)
12960 return x;
12961 term = XEXP (x, 0);
12962 if (GET_CODE (term) == PLUS
12963 && (CONST_INT_P (XEXP (term, 1))
12964 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12965 term = XEXP (term, 0);
12966 if (GET_CODE (term) != UNSPEC
12967 || (XINT (term, 1) != UNSPEC_GOTPCREL
12968 && XINT (term, 1) != UNSPEC_PCREL))
12969 return x;
12970
12971 return XVECEXP (term, 0, 0);
12972 }
12973
12974 return ix86_delegitimize_address (x);
12975 }
12976 \f
12977 static void
12978 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12979 int fp, FILE *file)
12980 {
12981 const char *suffix;
12982
12983 if (mode == CCFPmode || mode == CCFPUmode)
12984 {
12985 code = ix86_fp_compare_code_to_integer (code);
12986 mode = CCmode;
12987 }
12988 if (reverse)
12989 code = reverse_condition (code);
12990
12991 switch (code)
12992 {
12993 case EQ:
12994 switch (mode)
12995 {
12996 case CCAmode:
12997 suffix = "a";
12998 break;
12999
13000 case CCCmode:
13001 suffix = "c";
13002 break;
13003
13004 case CCOmode:
13005 suffix = "o";
13006 break;
13007
13008 case CCSmode:
13009 suffix = "s";
13010 break;
13011
13012 default:
13013 suffix = "e";
13014 }
13015 break;
13016 case NE:
13017 switch (mode)
13018 {
13019 case CCAmode:
13020 suffix = "na";
13021 break;
13022
13023 case CCCmode:
13024 suffix = "nc";
13025 break;
13026
13027 case CCOmode:
13028 suffix = "no";
13029 break;
13030
13031 case CCSmode:
13032 suffix = "ns";
13033 break;
13034
13035 default:
13036 suffix = "ne";
13037 }
13038 break;
13039 case GT:
13040 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13041 suffix = "g";
13042 break;
13043 case GTU:
13044 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13045 Those same assemblers have the same but opposite lossage on cmov. */
13046 if (mode == CCmode)
13047 suffix = fp ? "nbe" : "a";
13048 else if (mode == CCCmode)
13049 suffix = "b";
13050 else
13051 gcc_unreachable ();
13052 break;
13053 case LT:
13054 switch (mode)
13055 {
13056 case CCNOmode:
13057 case CCGOCmode:
13058 suffix = "s";
13059 break;
13060
13061 case CCmode:
13062 case CCGCmode:
13063 suffix = "l";
13064 break;
13065
13066 default:
13067 gcc_unreachable ();
13068 }
13069 break;
13070 case LTU:
13071 gcc_assert (mode == CCmode || mode == CCCmode);
13072 suffix = "b";
13073 break;
13074 case GE:
13075 switch (mode)
13076 {
13077 case CCNOmode:
13078 case CCGOCmode:
13079 suffix = "ns";
13080 break;
13081
13082 case CCmode:
13083 case CCGCmode:
13084 suffix = "ge";
13085 break;
13086
13087 default:
13088 gcc_unreachable ();
13089 }
13090 break;
13091 case GEU:
13092 /* ??? As above. */
13093 gcc_assert (mode == CCmode || mode == CCCmode);
13094 suffix = fp ? "nb" : "ae";
13095 break;
13096 case LE:
13097 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13098 suffix = "le";
13099 break;
13100 case LEU:
13101 /* ??? As above. */
13102 if (mode == CCmode)
13103 suffix = "be";
13104 else if (mode == CCCmode)
13105 suffix = fp ? "nb" : "ae";
13106 else
13107 gcc_unreachable ();
13108 break;
13109 case UNORDERED:
13110 suffix = fp ? "u" : "p";
13111 break;
13112 case ORDERED:
13113 suffix = fp ? "nu" : "np";
13114 break;
13115 default:
13116 gcc_unreachable ();
13117 }
13118 fputs (suffix, file);
13119 }
13120
13121 /* Print the name of register X to FILE based on its machine mode and number.
13122 If CODE is 'w', pretend the mode is HImode.
13123 If CODE is 'b', pretend the mode is QImode.
13124 If CODE is 'k', pretend the mode is SImode.
13125 If CODE is 'q', pretend the mode is DImode.
13126 If CODE is 'x', pretend the mode is V4SFmode.
13127 If CODE is 't', pretend the mode is V8SFmode.
13128 If CODE is 'h', pretend the reg is the 'high' byte register.
13129 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13130 If CODE is 'd', duplicate the operand for AVX instruction.
13131 */
13132
13133 void
13134 print_reg (rtx x, int code, FILE *file)
13135 {
13136 const char *reg;
13137 bool duplicated = code == 'd' && TARGET_AVX;
13138
13139 gcc_assert (x == pc_rtx
13140 || (REGNO (x) != ARG_POINTER_REGNUM
13141 && REGNO (x) != FRAME_POINTER_REGNUM
13142 && REGNO (x) != FLAGS_REG
13143 && REGNO (x) != FPSR_REG
13144 && REGNO (x) != FPCR_REG));
13145
13146 if (ASSEMBLER_DIALECT == ASM_ATT)
13147 putc ('%', file);
13148
13149 if (x == pc_rtx)
13150 {
13151 gcc_assert (TARGET_64BIT);
13152 fputs ("rip", file);
13153 return;
13154 }
13155
13156 if (code == 'w' || MMX_REG_P (x))
13157 code = 2;
13158 else if (code == 'b')
13159 code = 1;
13160 else if (code == 'k')
13161 code = 4;
13162 else if (code == 'q')
13163 code = 8;
13164 else if (code == 'y')
13165 code = 3;
13166 else if (code == 'h')
13167 code = 0;
13168 else if (code == 'x')
13169 code = 16;
13170 else if (code == 't')
13171 code = 32;
13172 else
13173 code = GET_MODE_SIZE (GET_MODE (x));
13174
13175 /* Irritatingly, AMD extended registers use different naming convention
13176 from the normal registers. */
13177 if (REX_INT_REG_P (x))
13178 {
13179 gcc_assert (TARGET_64BIT);
13180 switch (code)
13181 {
13182 case 0:
13183 error ("extended registers have no high halves");
13184 break;
13185 case 1:
13186 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13187 break;
13188 case 2:
13189 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13190 break;
13191 case 4:
13192 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13193 break;
13194 case 8:
13195 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13196 break;
13197 default:
13198 error ("unsupported operand size for extended register");
13199 break;
13200 }
13201 return;
13202 }
13203
13204 reg = NULL;
13205 switch (code)
13206 {
13207 case 3:
13208 if (STACK_TOP_P (x))
13209 {
13210 reg = "st(0)";
13211 break;
13212 }
13213 /* FALLTHRU */
13214 case 8:
13215 case 4:
13216 case 12:
13217 if (! ANY_FP_REG_P (x))
13218 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13219 /* FALLTHRU */
13220 case 16:
13221 case 2:
13222 normal:
13223 reg = hi_reg_name[REGNO (x)];
13224 break;
13225 case 1:
13226 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13227 goto normal;
13228 reg = qi_reg_name[REGNO (x)];
13229 break;
13230 case 0:
13231 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13232 goto normal;
13233 reg = qi_high_reg_name[REGNO (x)];
13234 break;
13235 case 32:
13236 if (SSE_REG_P (x))
13237 {
13238 gcc_assert (!duplicated);
13239 putc ('y', file);
13240 fputs (hi_reg_name[REGNO (x)] + 1, file);
13241 return;
13242 }
13243 break;
13244 default:
13245 gcc_unreachable ();
13246 }
13247
13248 fputs (reg, file);
13249 if (duplicated)
13250 {
13251 if (ASSEMBLER_DIALECT == ASM_ATT)
13252 fprintf (file, ", %%%s", reg);
13253 else
13254 fprintf (file, ", %s", reg);
13255 }
13256 }
13257
13258 /* Locate some local-dynamic symbol still in use by this function
13259 so that we can print its name in some tls_local_dynamic_base
13260 pattern. */
13261
13262 static int
13263 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13264 {
13265 rtx x = *px;
13266
13267 if (GET_CODE (x) == SYMBOL_REF
13268 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13269 {
13270 cfun->machine->some_ld_name = XSTR (x, 0);
13271 return 1;
13272 }
13273
13274 return 0;
13275 }
13276
13277 static const char *
13278 get_some_local_dynamic_name (void)
13279 {
13280 rtx insn;
13281
13282 if (cfun->machine->some_ld_name)
13283 return cfun->machine->some_ld_name;
13284
13285 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13286 if (NONDEBUG_INSN_P (insn)
13287 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13288 return cfun->machine->some_ld_name;
13289
13290 return NULL;
13291 }
13292
13293 /* Meaning of CODE:
13294 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13295 C -- print opcode suffix for set/cmov insn.
13296 c -- like C, but print reversed condition
13297 F,f -- likewise, but for floating-point.
13298 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13299 otherwise nothing
13300 R -- print the prefix for register names.
13301 z -- print the opcode suffix for the size of the current operand.
13302 Z -- likewise, with special suffixes for x87 instructions.
13303 * -- print a star (in certain assembler syntax)
13304 A -- print an absolute memory reference.
13305 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13306 s -- print a shift double count, followed by the assemblers argument
13307 delimiter.
13308 b -- print the QImode name of the register for the indicated operand.
13309 %b0 would print %al if operands[0] is reg 0.
13310 w -- likewise, print the HImode name of the register.
13311 k -- likewise, print the SImode name of the register.
13312 q -- likewise, print the DImode name of the register.
13313 x -- likewise, print the V4SFmode name of the register.
13314 t -- likewise, print the V8SFmode name of the register.
13315 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13316 y -- print "st(0)" instead of "st" as a register.
13317 d -- print duplicated register operand for AVX instruction.
13318 D -- print condition for SSE cmp instruction.
13319 P -- if PIC, print an @PLT suffix.
13320 p -- print raw symbol name.
13321 X -- don't print any sort of PIC '@' suffix for a symbol.
13322 & -- print some in-use local-dynamic symbol name.
13323 H -- print a memory address offset by 8; used for sse high-parts
13324 Y -- print condition for XOP pcom* instruction.
13325 + -- print a branch hint as 'cs' or 'ds' prefix
13326 ; -- print a semicolon (after prefixes due to bug in older gas).
13327 @ -- print a segment register of thread base pointer load
13328 */
13329
13330 void
13331 ix86_print_operand (FILE *file, rtx x, int code)
13332 {
13333 if (code)
13334 {
13335 switch (code)
13336 {
13337 case '*':
13338 if (ASSEMBLER_DIALECT == ASM_ATT)
13339 putc ('*', file);
13340 return;
13341
13342 case '&':
13343 {
13344 const char *name = get_some_local_dynamic_name ();
13345 if (name == NULL)
13346 output_operand_lossage ("'%%&' used without any "
13347 "local dynamic TLS references");
13348 else
13349 assemble_name (file, name);
13350 return;
13351 }
13352
13353 case 'A':
13354 switch (ASSEMBLER_DIALECT)
13355 {
13356 case ASM_ATT:
13357 putc ('*', file);
13358 break;
13359
13360 case ASM_INTEL:
13361 /* Intel syntax. For absolute addresses, registers should not
13362 be surrounded by braces. */
13363 if (!REG_P (x))
13364 {
13365 putc ('[', file);
13366 ix86_print_operand (file, x, 0);
13367 putc (']', file);
13368 return;
13369 }
13370 break;
13371
13372 default:
13373 gcc_unreachable ();
13374 }
13375
13376 ix86_print_operand (file, x, 0);
13377 return;
13378
13379
13380 case 'L':
13381 if (ASSEMBLER_DIALECT == ASM_ATT)
13382 putc ('l', file);
13383 return;
13384
13385 case 'W':
13386 if (ASSEMBLER_DIALECT == ASM_ATT)
13387 putc ('w', file);
13388 return;
13389
13390 case 'B':
13391 if (ASSEMBLER_DIALECT == ASM_ATT)
13392 putc ('b', file);
13393 return;
13394
13395 case 'Q':
13396 if (ASSEMBLER_DIALECT == ASM_ATT)
13397 putc ('l', file);
13398 return;
13399
13400 case 'S':
13401 if (ASSEMBLER_DIALECT == ASM_ATT)
13402 putc ('s', file);
13403 return;
13404
13405 case 'T':
13406 if (ASSEMBLER_DIALECT == ASM_ATT)
13407 putc ('t', file);
13408 return;
13409
13410 case 'z':
13411 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13412 {
13413 /* Opcodes don't get size suffixes if using Intel opcodes. */
13414 if (ASSEMBLER_DIALECT == ASM_INTEL)
13415 return;
13416
13417 switch (GET_MODE_SIZE (GET_MODE (x)))
13418 {
13419 case 1:
13420 putc ('b', file);
13421 return;
13422
13423 case 2:
13424 putc ('w', file);
13425 return;
13426
13427 case 4:
13428 putc ('l', file);
13429 return;
13430
13431 case 8:
13432 putc ('q', file);
13433 return;
13434
13435 default:
13436 output_operand_lossage
13437 ("invalid operand size for operand code '%c'", code);
13438 return;
13439 }
13440 }
13441
13442 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13443 warning
13444 (0, "non-integer operand used with operand code '%c'", code);
13445 /* FALLTHRU */
13446
13447 case 'Z':
13448 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13449 if (ASSEMBLER_DIALECT == ASM_INTEL)
13450 return;
13451
13452 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13453 {
13454 switch (GET_MODE_SIZE (GET_MODE (x)))
13455 {
13456 case 2:
13457 #ifdef HAVE_AS_IX86_FILDS
13458 putc ('s', file);
13459 #endif
13460 return;
13461
13462 case 4:
13463 putc ('l', file);
13464 return;
13465
13466 case 8:
13467 #ifdef HAVE_AS_IX86_FILDQ
13468 putc ('q', file);
13469 #else
13470 fputs ("ll", file);
13471 #endif
13472 return;
13473
13474 default:
13475 break;
13476 }
13477 }
13478 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13479 {
13480 /* 387 opcodes don't get size suffixes
13481 if the operands are registers. */
13482 if (STACK_REG_P (x))
13483 return;
13484
13485 switch (GET_MODE_SIZE (GET_MODE (x)))
13486 {
13487 case 4:
13488 putc ('s', file);
13489 return;
13490
13491 case 8:
13492 putc ('l', file);
13493 return;
13494
13495 case 12:
13496 case 16:
13497 putc ('t', file);
13498 return;
13499
13500 default:
13501 break;
13502 }
13503 }
13504 else
13505 {
13506 output_operand_lossage
13507 ("invalid operand type used with operand code '%c'", code);
13508 return;
13509 }
13510
13511 output_operand_lossage
13512 ("invalid operand size for operand code '%c'", code);
13513 return;
13514
13515 case 'd':
13516 case 'b':
13517 case 'w':
13518 case 'k':
13519 case 'q':
13520 case 'h':
13521 case 't':
13522 case 'y':
13523 case 'x':
13524 case 'X':
13525 case 'P':
13526 case 'p':
13527 break;
13528
13529 case 's':
13530 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13531 {
13532 ix86_print_operand (file, x, 0);
13533 fputs (", ", file);
13534 }
13535 return;
13536
13537 case 'D':
13538 /* Little bit of braindamage here. The SSE compare instructions
13539 does use completely different names for the comparisons that the
13540 fp conditional moves. */
13541 if (TARGET_AVX)
13542 {
13543 switch (GET_CODE (x))
13544 {
13545 case EQ:
13546 fputs ("eq", file);
13547 break;
13548 case UNEQ:
13549 fputs ("eq_us", file);
13550 break;
13551 case LT:
13552 fputs ("lt", file);
13553 break;
13554 case UNLT:
13555 fputs ("nge", file);
13556 break;
13557 case LE:
13558 fputs ("le", file);
13559 break;
13560 case UNLE:
13561 fputs ("ngt", file);
13562 break;
13563 case UNORDERED:
13564 fputs ("unord", file);
13565 break;
13566 case NE:
13567 fputs ("neq", file);
13568 break;
13569 case LTGT:
13570 fputs ("neq_oq", file);
13571 break;
13572 case GE:
13573 fputs ("ge", file);
13574 break;
13575 case UNGE:
13576 fputs ("nlt", file);
13577 break;
13578 case GT:
13579 fputs ("gt", file);
13580 break;
13581 case UNGT:
13582 fputs ("nle", file);
13583 break;
13584 case ORDERED:
13585 fputs ("ord", file);
13586 break;
13587 default:
13588 output_operand_lossage ("operand is not a condition code, "
13589 "invalid operand code 'D'");
13590 return;
13591 }
13592 }
13593 else
13594 {
13595 switch (GET_CODE (x))
13596 {
13597 case EQ:
13598 case UNEQ:
13599 fputs ("eq", file);
13600 break;
13601 case LT:
13602 case UNLT:
13603 fputs ("lt", file);
13604 break;
13605 case LE:
13606 case UNLE:
13607 fputs ("le", file);
13608 break;
13609 case UNORDERED:
13610 fputs ("unord", file);
13611 break;
13612 case NE:
13613 case LTGT:
13614 fputs ("neq", file);
13615 break;
13616 case UNGE:
13617 case GE:
13618 fputs ("nlt", file);
13619 break;
13620 case UNGT:
13621 case GT:
13622 fputs ("nle", file);
13623 break;
13624 case ORDERED:
13625 fputs ("ord", file);
13626 break;
13627 default:
13628 output_operand_lossage ("operand is not a condition code, "
13629 "invalid operand code 'D'");
13630 return;
13631 }
13632 }
13633 return;
13634 case 'O':
13635 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13636 if (ASSEMBLER_DIALECT == ASM_ATT)
13637 {
13638 switch (GET_MODE (x))
13639 {
13640 case HImode: putc ('w', file); break;
13641 case SImode:
13642 case SFmode: putc ('l', file); break;
13643 case DImode:
13644 case DFmode: putc ('q', file); break;
13645 default: gcc_unreachable ();
13646 }
13647 putc ('.', file);
13648 }
13649 #endif
13650 return;
13651 case 'C':
13652 if (!COMPARISON_P (x))
13653 {
13654 output_operand_lossage ("operand is neither a constant nor a "
13655 "condition code, invalid operand code "
13656 "'C'");
13657 return;
13658 }
13659 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13660 return;
13661 case 'F':
13662 if (!COMPARISON_P (x))
13663 {
13664 output_operand_lossage ("operand is neither a constant nor a "
13665 "condition code, invalid operand code "
13666 "'F'");
13667 return;
13668 }
13669 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13670 if (ASSEMBLER_DIALECT == ASM_ATT)
13671 putc ('.', file);
13672 #endif
13673 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13674 return;
13675
13676 /* Like above, but reverse condition */
13677 case 'c':
13678 /* Check to see if argument to %c is really a constant
13679 and not a condition code which needs to be reversed. */
13680 if (!COMPARISON_P (x))
13681 {
13682 output_operand_lossage ("operand is neither a constant nor a "
13683 "condition code, invalid operand "
13684 "code 'c'");
13685 return;
13686 }
13687 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13688 return;
13689 case 'f':
13690 if (!COMPARISON_P (x))
13691 {
13692 output_operand_lossage ("operand is neither a constant nor a "
13693 "condition code, invalid operand "
13694 "code 'f'");
13695 return;
13696 }
13697 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13698 if (ASSEMBLER_DIALECT == ASM_ATT)
13699 putc ('.', file);
13700 #endif
13701 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13702 return;
13703
13704 case 'H':
13705 /* It doesn't actually matter what mode we use here, as we're
13706 only going to use this for printing. */
13707 x = adjust_address_nv (x, DImode, 8);
13708 break;
13709
13710 case '+':
13711 {
13712 rtx x;
13713
13714 if (!optimize
13715 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13716 return;
13717
13718 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13719 if (x)
13720 {
13721 int pred_val = INTVAL (XEXP (x, 0));
13722
13723 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13724 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13725 {
13726 int taken = pred_val > REG_BR_PROB_BASE / 2;
13727 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13728
13729 /* Emit hints only in the case default branch prediction
13730 heuristics would fail. */
13731 if (taken != cputaken)
13732 {
13733 /* We use 3e (DS) prefix for taken branches and
13734 2e (CS) prefix for not taken branches. */
13735 if (taken)
13736 fputs ("ds ; ", file);
13737 else
13738 fputs ("cs ; ", file);
13739 }
13740 }
13741 }
13742 return;
13743 }
13744
13745 case 'Y':
13746 switch (GET_CODE (x))
13747 {
13748 case NE:
13749 fputs ("neq", file);
13750 break;
13751 case EQ:
13752 fputs ("eq", file);
13753 break;
13754 case GE:
13755 case GEU:
13756 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13757 break;
13758 case GT:
13759 case GTU:
13760 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13761 break;
13762 case LE:
13763 case LEU:
13764 fputs ("le", file);
13765 break;
13766 case LT:
13767 case LTU:
13768 fputs ("lt", file);
13769 break;
13770 case UNORDERED:
13771 fputs ("unord", file);
13772 break;
13773 case ORDERED:
13774 fputs ("ord", file);
13775 break;
13776 case UNEQ:
13777 fputs ("ueq", file);
13778 break;
13779 case UNGE:
13780 fputs ("nlt", file);
13781 break;
13782 case UNGT:
13783 fputs ("nle", file);
13784 break;
13785 case UNLE:
13786 fputs ("ule", file);
13787 break;
13788 case UNLT:
13789 fputs ("ult", file);
13790 break;
13791 case LTGT:
13792 fputs ("une", file);
13793 break;
13794 default:
13795 output_operand_lossage ("operand is not a condition code, "
13796 "invalid operand code 'Y'");
13797 return;
13798 }
13799 return;
13800
13801 case ';':
13802 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13803 putc (';', file);
13804 #endif
13805 return;
13806
13807 case '@':
13808 if (ASSEMBLER_DIALECT == ASM_ATT)
13809 putc ('%', file);
13810
13811 /* The kernel uses a different segment register for performance
13812 reasons; a system call would not have to trash the userspace
13813 segment register, which would be expensive. */
13814 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13815 fputs ("fs", file);
13816 else
13817 fputs ("gs", file);
13818 return;
13819
13820 default:
13821 output_operand_lossage ("invalid operand code '%c'", code);
13822 }
13823 }
13824
13825 if (REG_P (x))
13826 print_reg (x, code, file);
13827
13828 else if (MEM_P (x))
13829 {
13830 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13831 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13832 && GET_MODE (x) != BLKmode)
13833 {
13834 const char * size;
13835 switch (GET_MODE_SIZE (GET_MODE (x)))
13836 {
13837 case 1: size = "BYTE"; break;
13838 case 2: size = "WORD"; break;
13839 case 4: size = "DWORD"; break;
13840 case 8: size = "QWORD"; break;
13841 case 12: size = "TBYTE"; break;
13842 case 16:
13843 if (GET_MODE (x) == XFmode)
13844 size = "TBYTE";
13845 else
13846 size = "XMMWORD";
13847 break;
13848 case 32: size = "YMMWORD"; break;
13849 default:
13850 gcc_unreachable ();
13851 }
13852
13853 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13854 if (code == 'b')
13855 size = "BYTE";
13856 else if (code == 'w')
13857 size = "WORD";
13858 else if (code == 'k')
13859 size = "DWORD";
13860
13861 fputs (size, file);
13862 fputs (" PTR ", file);
13863 }
13864
13865 x = XEXP (x, 0);
13866 /* Avoid (%rip) for call operands. */
13867 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13868 && !CONST_INT_P (x))
13869 output_addr_const (file, x);
13870 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13871 output_operand_lossage ("invalid constraints for operand");
13872 else
13873 output_address (x);
13874 }
13875
13876 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13877 {
13878 REAL_VALUE_TYPE r;
13879 long l;
13880
13881 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13882 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13883
13884 if (ASSEMBLER_DIALECT == ASM_ATT)
13885 putc ('$', file);
13886 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13887 if (code == 'q')
13888 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13889 else
13890 fprintf (file, "0x%08x", (unsigned int) l);
13891 }
13892
13893 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13894 {
13895 REAL_VALUE_TYPE r;
13896 long l[2];
13897
13898 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13899 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13900
13901 if (ASSEMBLER_DIALECT == ASM_ATT)
13902 putc ('$', file);
13903 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13904 }
13905
13906 /* These float cases don't actually occur as immediate operands. */
13907 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
13908 {
13909 char dstr[30];
13910
13911 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13912 fputs (dstr, file);
13913 }
13914
13915 else
13916 {
13917 /* We have patterns that allow zero sets of memory, for instance.
13918 In 64-bit mode, we should probably support all 8-byte vectors,
13919 since we can in fact encode that into an immediate. */
13920 if (GET_CODE (x) == CONST_VECTOR)
13921 {
13922 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13923 x = const0_rtx;
13924 }
13925
13926 if (code != 'P' && code != 'p')
13927 {
13928 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13929 {
13930 if (ASSEMBLER_DIALECT == ASM_ATT)
13931 putc ('$', file);
13932 }
13933 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13934 || GET_CODE (x) == LABEL_REF)
13935 {
13936 if (ASSEMBLER_DIALECT == ASM_ATT)
13937 putc ('$', file);
13938 else
13939 fputs ("OFFSET FLAT:", file);
13940 }
13941 }
13942 if (CONST_INT_P (x))
13943 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13944 else if (flag_pic || MACHOPIC_INDIRECT)
13945 output_pic_addr_const (file, x, code);
13946 else
13947 output_addr_const (file, x);
13948 }
13949 }
13950
13951 static bool
13952 ix86_print_operand_punct_valid_p (unsigned char code)
13953 {
13954 return (code == '@' || code == '*' || code == '+'
13955 || code == '&' || code == ';');
13956 }
13957 \f
13958 /* Print a memory operand whose address is ADDR. */
13959
13960 static void
13961 ix86_print_operand_address (FILE *file, rtx addr)
13962 {
13963 struct ix86_address parts;
13964 rtx base, index, disp;
13965 int scale;
13966 int ok = ix86_decompose_address (addr, &parts);
13967
13968 gcc_assert (ok);
13969
13970 base = parts.base;
13971 index = parts.index;
13972 disp = parts.disp;
13973 scale = parts.scale;
13974
13975 switch (parts.seg)
13976 {
13977 case SEG_DEFAULT:
13978 break;
13979 case SEG_FS:
13980 case SEG_GS:
13981 if (ASSEMBLER_DIALECT == ASM_ATT)
13982 putc ('%', file);
13983 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13984 break;
13985 default:
13986 gcc_unreachable ();
13987 }
13988
13989 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13990 if (TARGET_64BIT && !base && !index)
13991 {
13992 rtx symbol = disp;
13993
13994 if (GET_CODE (disp) == CONST
13995 && GET_CODE (XEXP (disp, 0)) == PLUS
13996 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13997 symbol = XEXP (XEXP (disp, 0), 0);
13998
13999 if (GET_CODE (symbol) == LABEL_REF
14000 || (GET_CODE (symbol) == SYMBOL_REF
14001 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14002 base = pc_rtx;
14003 }
14004 if (!base && !index)
14005 {
14006 /* Displacement only requires special attention. */
14007
14008 if (CONST_INT_P (disp))
14009 {
14010 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14011 fputs ("ds:", file);
14012 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14013 }
14014 else if (flag_pic)
14015 output_pic_addr_const (file, disp, 0);
14016 else
14017 output_addr_const (file, disp);
14018 }
14019 else
14020 {
14021 if (ASSEMBLER_DIALECT == ASM_ATT)
14022 {
14023 if (disp)
14024 {
14025 if (flag_pic)
14026 output_pic_addr_const (file, disp, 0);
14027 else if (GET_CODE (disp) == LABEL_REF)
14028 output_asm_label (disp);
14029 else
14030 output_addr_const (file, disp);
14031 }
14032
14033 putc ('(', file);
14034 if (base)
14035 print_reg (base, 0, file);
14036 if (index)
14037 {
14038 putc (',', file);
14039 print_reg (index, 0, file);
14040 if (scale != 1)
14041 fprintf (file, ",%d", scale);
14042 }
14043 putc (')', file);
14044 }
14045 else
14046 {
14047 rtx offset = NULL_RTX;
14048
14049 if (disp)
14050 {
14051 /* Pull out the offset of a symbol; print any symbol itself. */
14052 if (GET_CODE (disp) == CONST
14053 && GET_CODE (XEXP (disp, 0)) == PLUS
14054 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14055 {
14056 offset = XEXP (XEXP (disp, 0), 1);
14057 disp = gen_rtx_CONST (VOIDmode,
14058 XEXP (XEXP (disp, 0), 0));
14059 }
14060
14061 if (flag_pic)
14062 output_pic_addr_const (file, disp, 0);
14063 else if (GET_CODE (disp) == LABEL_REF)
14064 output_asm_label (disp);
14065 else if (CONST_INT_P (disp))
14066 offset = disp;
14067 else
14068 output_addr_const (file, disp);
14069 }
14070
14071 putc ('[', file);
14072 if (base)
14073 {
14074 print_reg (base, 0, file);
14075 if (offset)
14076 {
14077 if (INTVAL (offset) >= 0)
14078 putc ('+', file);
14079 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14080 }
14081 }
14082 else if (offset)
14083 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14084 else
14085 putc ('0', file);
14086
14087 if (index)
14088 {
14089 putc ('+', file);
14090 print_reg (index, 0, file);
14091 if (scale != 1)
14092 fprintf (file, "*%d", scale);
14093 }
14094 putc (']', file);
14095 }
14096 }
14097 }
14098
14099 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14100
14101 static bool
14102 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14103 {
14104 rtx op;
14105
14106 if (GET_CODE (x) != UNSPEC)
14107 return false;
14108
14109 op = XVECEXP (x, 0, 0);
14110 switch (XINT (x, 1))
14111 {
14112 case UNSPEC_GOTTPOFF:
14113 output_addr_const (file, op);
14114 /* FIXME: This might be @TPOFF in Sun ld. */
14115 fputs ("@gottpoff", file);
14116 break;
14117 case UNSPEC_TPOFF:
14118 output_addr_const (file, op);
14119 fputs ("@tpoff", file);
14120 break;
14121 case UNSPEC_NTPOFF:
14122 output_addr_const (file, op);
14123 if (TARGET_64BIT)
14124 fputs ("@tpoff", file);
14125 else
14126 fputs ("@ntpoff", file);
14127 break;
14128 case UNSPEC_DTPOFF:
14129 output_addr_const (file, op);
14130 fputs ("@dtpoff", file);
14131 break;
14132 case UNSPEC_GOTNTPOFF:
14133 output_addr_const (file, op);
14134 if (TARGET_64BIT)
14135 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14136 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14137 else
14138 fputs ("@gotntpoff", file);
14139 break;
14140 case UNSPEC_INDNTPOFF:
14141 output_addr_const (file, op);
14142 fputs ("@indntpoff", file);
14143 break;
14144 #if TARGET_MACHO
14145 case UNSPEC_MACHOPIC_OFFSET:
14146 output_addr_const (file, op);
14147 putc ('-', file);
14148 machopic_output_function_base_name (file);
14149 break;
14150 #endif
14151
14152 case UNSPEC_STACK_CHECK:
14153 {
14154 int offset;
14155
14156 gcc_assert (flag_split_stack);
14157
14158 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14159 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14160 #else
14161 gcc_unreachable ();
14162 #endif
14163
14164 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14165 }
14166 break;
14167
14168 default:
14169 return false;
14170 }
14171
14172 return true;
14173 }
14174 \f
14175 /* Split one or more double-mode RTL references into pairs of half-mode
14176 references. The RTL can be REG, offsettable MEM, integer constant, or
14177 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14178 split and "num" is its length. lo_half and hi_half are output arrays
14179 that parallel "operands". */
14180
14181 void
14182 split_double_mode (enum machine_mode mode, rtx operands[],
14183 int num, rtx lo_half[], rtx hi_half[])
14184 {
14185 enum machine_mode half_mode;
14186 unsigned int byte;
14187
14188 switch (mode)
14189 {
14190 case TImode:
14191 half_mode = DImode;
14192 break;
14193 case DImode:
14194 half_mode = SImode;
14195 break;
14196 default:
14197 gcc_unreachable ();
14198 }
14199
14200 byte = GET_MODE_SIZE (half_mode);
14201
14202 while (num--)
14203 {
14204 rtx op = operands[num];
14205
14206 /* simplify_subreg refuse to split volatile memory addresses,
14207 but we still have to handle it. */
14208 if (MEM_P (op))
14209 {
14210 lo_half[num] = adjust_address (op, half_mode, 0);
14211 hi_half[num] = adjust_address (op, half_mode, byte);
14212 }
14213 else
14214 {
14215 lo_half[num] = simplify_gen_subreg (half_mode, op,
14216 GET_MODE (op) == VOIDmode
14217 ? mode : GET_MODE (op), 0);
14218 hi_half[num] = simplify_gen_subreg (half_mode, op,
14219 GET_MODE (op) == VOIDmode
14220 ? mode : GET_MODE (op), byte);
14221 }
14222 }
14223 }
14224 \f
14225 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14226 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14227 is the expression of the binary operation. The output may either be
14228 emitted here, or returned to the caller, like all output_* functions.
14229
14230 There is no guarantee that the operands are the same mode, as they
14231 might be within FLOAT or FLOAT_EXTEND expressions. */
14232
14233 #ifndef SYSV386_COMPAT
14234 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14235 wants to fix the assemblers because that causes incompatibility
14236 with gcc. No-one wants to fix gcc because that causes
14237 incompatibility with assemblers... You can use the option of
14238 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14239 #define SYSV386_COMPAT 1
14240 #endif
14241
14242 const char *
14243 output_387_binary_op (rtx insn, rtx *operands)
14244 {
14245 static char buf[40];
14246 const char *p;
14247 const char *ssep;
14248 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14249
14250 #ifdef ENABLE_CHECKING
14251 /* Even if we do not want to check the inputs, this documents input
14252 constraints. Which helps in understanding the following code. */
14253 if (STACK_REG_P (operands[0])
14254 && ((REG_P (operands[1])
14255 && REGNO (operands[0]) == REGNO (operands[1])
14256 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14257 || (REG_P (operands[2])
14258 && REGNO (operands[0]) == REGNO (operands[2])
14259 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14260 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14261 ; /* ok */
14262 else
14263 gcc_assert (is_sse);
14264 #endif
14265
14266 switch (GET_CODE (operands[3]))
14267 {
14268 case PLUS:
14269 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14270 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14271 p = "fiadd";
14272 else
14273 p = "fadd";
14274 ssep = "vadd";
14275 break;
14276
14277 case MINUS:
14278 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14279 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14280 p = "fisub";
14281 else
14282 p = "fsub";
14283 ssep = "vsub";
14284 break;
14285
14286 case MULT:
14287 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14288 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14289 p = "fimul";
14290 else
14291 p = "fmul";
14292 ssep = "vmul";
14293 break;
14294
14295 case DIV:
14296 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14297 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14298 p = "fidiv";
14299 else
14300 p = "fdiv";
14301 ssep = "vdiv";
14302 break;
14303
14304 default:
14305 gcc_unreachable ();
14306 }
14307
14308 if (is_sse)
14309 {
14310 if (TARGET_AVX)
14311 {
14312 strcpy (buf, ssep);
14313 if (GET_MODE (operands[0]) == SFmode)
14314 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14315 else
14316 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14317 }
14318 else
14319 {
14320 strcpy (buf, ssep + 1);
14321 if (GET_MODE (operands[0]) == SFmode)
14322 strcat (buf, "ss\t{%2, %0|%0, %2}");
14323 else
14324 strcat (buf, "sd\t{%2, %0|%0, %2}");
14325 }
14326 return buf;
14327 }
14328 strcpy (buf, p);
14329
14330 switch (GET_CODE (operands[3]))
14331 {
14332 case MULT:
14333 case PLUS:
14334 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14335 {
14336 rtx temp = operands[2];
14337 operands[2] = operands[1];
14338 operands[1] = temp;
14339 }
14340
14341 /* know operands[0] == operands[1]. */
14342
14343 if (MEM_P (operands[2]))
14344 {
14345 p = "%Z2\t%2";
14346 break;
14347 }
14348
14349 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14350 {
14351 if (STACK_TOP_P (operands[0]))
14352 /* How is it that we are storing to a dead operand[2]?
14353 Well, presumably operands[1] is dead too. We can't
14354 store the result to st(0) as st(0) gets popped on this
14355 instruction. Instead store to operands[2] (which I
14356 think has to be st(1)). st(1) will be popped later.
14357 gcc <= 2.8.1 didn't have this check and generated
14358 assembly code that the Unixware assembler rejected. */
14359 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14360 else
14361 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14362 break;
14363 }
14364
14365 if (STACK_TOP_P (operands[0]))
14366 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14367 else
14368 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14369 break;
14370
14371 case MINUS:
14372 case DIV:
14373 if (MEM_P (operands[1]))
14374 {
14375 p = "r%Z1\t%1";
14376 break;
14377 }
14378
14379 if (MEM_P (operands[2]))
14380 {
14381 p = "%Z2\t%2";
14382 break;
14383 }
14384
14385 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14386 {
14387 #if SYSV386_COMPAT
14388 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14389 derived assemblers, confusingly reverse the direction of
14390 the operation for fsub{r} and fdiv{r} when the
14391 destination register is not st(0). The Intel assembler
14392 doesn't have this brain damage. Read !SYSV386_COMPAT to
14393 figure out what the hardware really does. */
14394 if (STACK_TOP_P (operands[0]))
14395 p = "{p\t%0, %2|rp\t%2, %0}";
14396 else
14397 p = "{rp\t%2, %0|p\t%0, %2}";
14398 #else
14399 if (STACK_TOP_P (operands[0]))
14400 /* As above for fmul/fadd, we can't store to st(0). */
14401 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14402 else
14403 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14404 #endif
14405 break;
14406 }
14407
14408 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14409 {
14410 #if SYSV386_COMPAT
14411 if (STACK_TOP_P (operands[0]))
14412 p = "{rp\t%0, %1|p\t%1, %0}";
14413 else
14414 p = "{p\t%1, %0|rp\t%0, %1}";
14415 #else
14416 if (STACK_TOP_P (operands[0]))
14417 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14418 else
14419 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14420 #endif
14421 break;
14422 }
14423
14424 if (STACK_TOP_P (operands[0]))
14425 {
14426 if (STACK_TOP_P (operands[1]))
14427 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14428 else
14429 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14430 break;
14431 }
14432 else if (STACK_TOP_P (operands[1]))
14433 {
14434 #if SYSV386_COMPAT
14435 p = "{\t%1, %0|r\t%0, %1}";
14436 #else
14437 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14438 #endif
14439 }
14440 else
14441 {
14442 #if SYSV386_COMPAT
14443 p = "{r\t%2, %0|\t%0, %2}";
14444 #else
14445 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14446 #endif
14447 }
14448 break;
14449
14450 default:
14451 gcc_unreachable ();
14452 }
14453
14454 strcat (buf, p);
14455 return buf;
14456 }
14457
14458 /* Return needed mode for entity in optimize_mode_switching pass. */
14459
14460 int
14461 ix86_mode_needed (int entity, rtx insn)
14462 {
14463 enum attr_i387_cw mode;
14464
14465 /* The mode UNINITIALIZED is used to store control word after a
14466 function call or ASM pattern. The mode ANY specify that function
14467 has no requirements on the control word and make no changes in the
14468 bits we are interested in. */
14469
14470 if (CALL_P (insn)
14471 || (NONJUMP_INSN_P (insn)
14472 && (asm_noperands (PATTERN (insn)) >= 0
14473 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14474 return I387_CW_UNINITIALIZED;
14475
14476 if (recog_memoized (insn) < 0)
14477 return I387_CW_ANY;
14478
14479 mode = get_attr_i387_cw (insn);
14480
14481 switch (entity)
14482 {
14483 case I387_TRUNC:
14484 if (mode == I387_CW_TRUNC)
14485 return mode;
14486 break;
14487
14488 case I387_FLOOR:
14489 if (mode == I387_CW_FLOOR)
14490 return mode;
14491 break;
14492
14493 case I387_CEIL:
14494 if (mode == I387_CW_CEIL)
14495 return mode;
14496 break;
14497
14498 case I387_MASK_PM:
14499 if (mode == I387_CW_MASK_PM)
14500 return mode;
14501 break;
14502
14503 default:
14504 gcc_unreachable ();
14505 }
14506
14507 return I387_CW_ANY;
14508 }
14509
14510 /* Output code to initialize control word copies used by trunc?f?i and
14511 rounding patterns. CURRENT_MODE is set to current control word,
14512 while NEW_MODE is set to new control word. */
14513
14514 void
14515 emit_i387_cw_initialization (int mode)
14516 {
14517 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14518 rtx new_mode;
14519
14520 enum ix86_stack_slot slot;
14521
14522 rtx reg = gen_reg_rtx (HImode);
14523
14524 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14525 emit_move_insn (reg, copy_rtx (stored_mode));
14526
14527 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14528 || optimize_function_for_size_p (cfun))
14529 {
14530 switch (mode)
14531 {
14532 case I387_CW_TRUNC:
14533 /* round toward zero (truncate) */
14534 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14535 slot = SLOT_CW_TRUNC;
14536 break;
14537
14538 case I387_CW_FLOOR:
14539 /* round down toward -oo */
14540 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14541 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14542 slot = SLOT_CW_FLOOR;
14543 break;
14544
14545 case I387_CW_CEIL:
14546 /* round up toward +oo */
14547 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14548 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14549 slot = SLOT_CW_CEIL;
14550 break;
14551
14552 case I387_CW_MASK_PM:
14553 /* mask precision exception for nearbyint() */
14554 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14555 slot = SLOT_CW_MASK_PM;
14556 break;
14557
14558 default:
14559 gcc_unreachable ();
14560 }
14561 }
14562 else
14563 {
14564 switch (mode)
14565 {
14566 case I387_CW_TRUNC:
14567 /* round toward zero (truncate) */
14568 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14569 slot = SLOT_CW_TRUNC;
14570 break;
14571
14572 case I387_CW_FLOOR:
14573 /* round down toward -oo */
14574 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14575 slot = SLOT_CW_FLOOR;
14576 break;
14577
14578 case I387_CW_CEIL:
14579 /* round up toward +oo */
14580 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14581 slot = SLOT_CW_CEIL;
14582 break;
14583
14584 case I387_CW_MASK_PM:
14585 /* mask precision exception for nearbyint() */
14586 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14587 slot = SLOT_CW_MASK_PM;
14588 break;
14589
14590 default:
14591 gcc_unreachable ();
14592 }
14593 }
14594
14595 gcc_assert (slot < MAX_386_STACK_LOCALS);
14596
14597 new_mode = assign_386_stack_local (HImode, slot);
14598 emit_move_insn (new_mode, reg);
14599 }
14600
14601 /* Output code for INSN to convert a float to a signed int. OPERANDS
14602 are the insn operands. The output may be [HSD]Imode and the input
14603 operand may be [SDX]Fmode. */
14604
14605 const char *
14606 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14607 {
14608 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14609 int dimode_p = GET_MODE (operands[0]) == DImode;
14610 int round_mode = get_attr_i387_cw (insn);
14611
14612 /* Jump through a hoop or two for DImode, since the hardware has no
14613 non-popping instruction. We used to do this a different way, but
14614 that was somewhat fragile and broke with post-reload splitters. */
14615 if ((dimode_p || fisttp) && !stack_top_dies)
14616 output_asm_insn ("fld\t%y1", operands);
14617
14618 gcc_assert (STACK_TOP_P (operands[1]));
14619 gcc_assert (MEM_P (operands[0]));
14620 gcc_assert (GET_MODE (operands[1]) != TFmode);
14621
14622 if (fisttp)
14623 output_asm_insn ("fisttp%Z0\t%0", operands);
14624 else
14625 {
14626 if (round_mode != I387_CW_ANY)
14627 output_asm_insn ("fldcw\t%3", operands);
14628 if (stack_top_dies || dimode_p)
14629 output_asm_insn ("fistp%Z0\t%0", operands);
14630 else
14631 output_asm_insn ("fist%Z0\t%0", operands);
14632 if (round_mode != I387_CW_ANY)
14633 output_asm_insn ("fldcw\t%2", operands);
14634 }
14635
14636 return "";
14637 }
14638
14639 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14640 have the values zero or one, indicates the ffreep insn's operand
14641 from the OPERANDS array. */
14642
14643 static const char *
14644 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14645 {
14646 if (TARGET_USE_FFREEP)
14647 #ifdef HAVE_AS_IX86_FFREEP
14648 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14649 #else
14650 {
14651 static char retval[32];
14652 int regno = REGNO (operands[opno]);
14653
14654 gcc_assert (FP_REGNO_P (regno));
14655
14656 regno -= FIRST_STACK_REG;
14657
14658 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14659 return retval;
14660 }
14661 #endif
14662
14663 return opno ? "fstp\t%y1" : "fstp\t%y0";
14664 }
14665
14666
14667 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14668 should be used. UNORDERED_P is true when fucom should be used. */
14669
14670 const char *
14671 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14672 {
14673 int stack_top_dies;
14674 rtx cmp_op0, cmp_op1;
14675 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14676
14677 if (eflags_p)
14678 {
14679 cmp_op0 = operands[0];
14680 cmp_op1 = operands[1];
14681 }
14682 else
14683 {
14684 cmp_op0 = operands[1];
14685 cmp_op1 = operands[2];
14686 }
14687
14688 if (is_sse)
14689 {
14690 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14691 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14692 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14693 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14694
14695 if (GET_MODE (operands[0]) == SFmode)
14696 if (unordered_p)
14697 return &ucomiss[TARGET_AVX ? 0 : 1];
14698 else
14699 return &comiss[TARGET_AVX ? 0 : 1];
14700 else
14701 if (unordered_p)
14702 return &ucomisd[TARGET_AVX ? 0 : 1];
14703 else
14704 return &comisd[TARGET_AVX ? 0 : 1];
14705 }
14706
14707 gcc_assert (STACK_TOP_P (cmp_op0));
14708
14709 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14710
14711 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14712 {
14713 if (stack_top_dies)
14714 {
14715 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14716 return output_387_ffreep (operands, 1);
14717 }
14718 else
14719 return "ftst\n\tfnstsw\t%0";
14720 }
14721
14722 if (STACK_REG_P (cmp_op1)
14723 && stack_top_dies
14724 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14725 && REGNO (cmp_op1) != FIRST_STACK_REG)
14726 {
14727 /* If both the top of the 387 stack dies, and the other operand
14728 is also a stack register that dies, then this must be a
14729 `fcompp' float compare */
14730
14731 if (eflags_p)
14732 {
14733 /* There is no double popping fcomi variant. Fortunately,
14734 eflags is immune from the fstp's cc clobbering. */
14735 if (unordered_p)
14736 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14737 else
14738 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14739 return output_387_ffreep (operands, 0);
14740 }
14741 else
14742 {
14743 if (unordered_p)
14744 return "fucompp\n\tfnstsw\t%0";
14745 else
14746 return "fcompp\n\tfnstsw\t%0";
14747 }
14748 }
14749 else
14750 {
14751 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14752
14753 static const char * const alt[16] =
14754 {
14755 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14756 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14757 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14758 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14759
14760 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14761 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14762 NULL,
14763 NULL,
14764
14765 "fcomi\t{%y1, %0|%0, %y1}",
14766 "fcomip\t{%y1, %0|%0, %y1}",
14767 "fucomi\t{%y1, %0|%0, %y1}",
14768 "fucomip\t{%y1, %0|%0, %y1}",
14769
14770 NULL,
14771 NULL,
14772 NULL,
14773 NULL
14774 };
14775
14776 int mask;
14777 const char *ret;
14778
14779 mask = eflags_p << 3;
14780 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14781 mask |= unordered_p << 1;
14782 mask |= stack_top_dies;
14783
14784 gcc_assert (mask < 16);
14785 ret = alt[mask];
14786 gcc_assert (ret);
14787
14788 return ret;
14789 }
14790 }
14791
14792 void
14793 ix86_output_addr_vec_elt (FILE *file, int value)
14794 {
14795 const char *directive = ASM_LONG;
14796
14797 #ifdef ASM_QUAD
14798 if (TARGET_64BIT)
14799 directive = ASM_QUAD;
14800 #else
14801 gcc_assert (!TARGET_64BIT);
14802 #endif
14803
14804 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14805 }
14806
14807 void
14808 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14809 {
14810 const char *directive = ASM_LONG;
14811
14812 #ifdef ASM_QUAD
14813 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14814 directive = ASM_QUAD;
14815 #else
14816 gcc_assert (!TARGET_64BIT);
14817 #endif
14818 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14819 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14820 fprintf (file, "%s%s%d-%s%d\n",
14821 directive, LPREFIX, value, LPREFIX, rel);
14822 else if (HAVE_AS_GOTOFF_IN_DATA)
14823 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14824 #if TARGET_MACHO
14825 else if (TARGET_MACHO)
14826 {
14827 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14828 machopic_output_function_base_name (file);
14829 putc ('\n', file);
14830 }
14831 #endif
14832 else
14833 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14834 GOT_SYMBOL_NAME, LPREFIX, value);
14835 }
14836 \f
14837 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14838 for the target. */
14839
14840 void
14841 ix86_expand_clear (rtx dest)
14842 {
14843 rtx tmp;
14844
14845 /* We play register width games, which are only valid after reload. */
14846 gcc_assert (reload_completed);
14847
14848 /* Avoid HImode and its attendant prefix byte. */
14849 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14850 dest = gen_rtx_REG (SImode, REGNO (dest));
14851 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14852
14853 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14854 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14855 {
14856 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14857 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14858 }
14859
14860 emit_insn (tmp);
14861 }
14862
14863 /* X is an unchanging MEM. If it is a constant pool reference, return
14864 the constant pool rtx, else NULL. */
14865
14866 rtx
14867 maybe_get_pool_constant (rtx x)
14868 {
14869 x = ix86_delegitimize_address (XEXP (x, 0));
14870
14871 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14872 return get_pool_constant (x);
14873
14874 return NULL_RTX;
14875 }
14876
14877 void
14878 ix86_expand_move (enum machine_mode mode, rtx operands[])
14879 {
14880 rtx op0, op1;
14881 enum tls_model model;
14882
14883 op0 = operands[0];
14884 op1 = operands[1];
14885
14886 if (GET_CODE (op1) == SYMBOL_REF)
14887 {
14888 model = SYMBOL_REF_TLS_MODEL (op1);
14889 if (model)
14890 {
14891 op1 = legitimize_tls_address (op1, model, true);
14892 op1 = force_operand (op1, op0);
14893 if (op1 == op0)
14894 return;
14895 }
14896 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14897 && SYMBOL_REF_DLLIMPORT_P (op1))
14898 op1 = legitimize_dllimport_symbol (op1, false);
14899 }
14900 else if (GET_CODE (op1) == CONST
14901 && GET_CODE (XEXP (op1, 0)) == PLUS
14902 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14903 {
14904 rtx addend = XEXP (XEXP (op1, 0), 1);
14905 rtx symbol = XEXP (XEXP (op1, 0), 0);
14906 rtx tmp = NULL;
14907
14908 model = SYMBOL_REF_TLS_MODEL (symbol);
14909 if (model)
14910 tmp = legitimize_tls_address (symbol, model, true);
14911 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14912 && SYMBOL_REF_DLLIMPORT_P (symbol))
14913 tmp = legitimize_dllimport_symbol (symbol, true);
14914
14915 if (tmp)
14916 {
14917 tmp = force_operand (tmp, NULL);
14918 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14919 op0, 1, OPTAB_DIRECT);
14920 if (tmp == op0)
14921 return;
14922 }
14923 }
14924
14925 if ((flag_pic || MACHOPIC_INDIRECT)
14926 && mode == Pmode && symbolic_operand (op1, Pmode))
14927 {
14928 if (TARGET_MACHO && !TARGET_64BIT)
14929 {
14930 #if TARGET_MACHO
14931 /* dynamic-no-pic */
14932 if (MACHOPIC_INDIRECT)
14933 {
14934 rtx temp = ((reload_in_progress
14935 || ((op0 && REG_P (op0))
14936 && mode == Pmode))
14937 ? op0 : gen_reg_rtx (Pmode));
14938 op1 = machopic_indirect_data_reference (op1, temp);
14939 if (MACHOPIC_PURE)
14940 op1 = machopic_legitimize_pic_address (op1, mode,
14941 temp == op1 ? 0 : temp);
14942 }
14943 if (op0 != op1 && GET_CODE (op0) != MEM)
14944 {
14945 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
14946 emit_insn (insn);
14947 return;
14948 }
14949 if (GET_CODE (op0) == MEM)
14950 op1 = force_reg (Pmode, op1);
14951 else
14952 {
14953 rtx temp = op0;
14954 if (GET_CODE (temp) != REG)
14955 temp = gen_reg_rtx (Pmode);
14956 temp = legitimize_pic_address (op1, temp);
14957 if (temp == op0)
14958 return;
14959 op1 = temp;
14960 }
14961 /* dynamic-no-pic */
14962 #endif
14963 }
14964 else
14965 {
14966 if (MEM_P (op0))
14967 op1 = force_reg (Pmode, op1);
14968 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14969 {
14970 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14971 op1 = legitimize_pic_address (op1, reg);
14972 if (op0 == op1)
14973 return;
14974 }
14975 }
14976 }
14977 else
14978 {
14979 if (MEM_P (op0)
14980 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14981 || !push_operand (op0, mode))
14982 && MEM_P (op1))
14983 op1 = force_reg (mode, op1);
14984
14985 if (push_operand (op0, mode)
14986 && ! general_no_elim_operand (op1, mode))
14987 op1 = copy_to_mode_reg (mode, op1);
14988
14989 /* Force large constants in 64bit compilation into register
14990 to get them CSEed. */
14991 if (can_create_pseudo_p ()
14992 && (mode == DImode) && TARGET_64BIT
14993 && immediate_operand (op1, mode)
14994 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14995 && !register_operand (op0, mode)
14996 && optimize)
14997 op1 = copy_to_mode_reg (mode, op1);
14998
14999 if (can_create_pseudo_p ()
15000 && FLOAT_MODE_P (mode)
15001 && GET_CODE (op1) == CONST_DOUBLE)
15002 {
15003 /* If we are loading a floating point constant to a register,
15004 force the value to memory now, since we'll get better code
15005 out the back end. */
15006
15007 op1 = validize_mem (force_const_mem (mode, op1));
15008 if (!register_operand (op0, mode))
15009 {
15010 rtx temp = gen_reg_rtx (mode);
15011 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15012 emit_move_insn (op0, temp);
15013 return;
15014 }
15015 }
15016 }
15017
15018 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15019 }
15020
15021 void
15022 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15023 {
15024 rtx op0 = operands[0], op1 = operands[1];
15025 unsigned int align = GET_MODE_ALIGNMENT (mode);
15026
15027 /* Force constants other than zero into memory. We do not know how
15028 the instructions used to build constants modify the upper 64 bits
15029 of the register, once we have that information we may be able
15030 to handle some of them more efficiently. */
15031 if (can_create_pseudo_p ()
15032 && register_operand (op0, mode)
15033 && (CONSTANT_P (op1)
15034 || (GET_CODE (op1) == SUBREG
15035 && CONSTANT_P (SUBREG_REG (op1))))
15036 && !standard_sse_constant_p (op1))
15037 op1 = validize_mem (force_const_mem (mode, op1));
15038
15039 /* We need to check memory alignment for SSE mode since attribute
15040 can make operands unaligned. */
15041 if (can_create_pseudo_p ()
15042 && SSE_REG_MODE_P (mode)
15043 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15044 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15045 {
15046 rtx tmp[2];
15047
15048 /* ix86_expand_vector_move_misalign() does not like constants ... */
15049 if (CONSTANT_P (op1)
15050 || (GET_CODE (op1) == SUBREG
15051 && CONSTANT_P (SUBREG_REG (op1))))
15052 op1 = validize_mem (force_const_mem (mode, op1));
15053
15054 /* ... nor both arguments in memory. */
15055 if (!register_operand (op0, mode)
15056 && !register_operand (op1, mode))
15057 op1 = force_reg (mode, op1);
15058
15059 tmp[0] = op0; tmp[1] = op1;
15060 ix86_expand_vector_move_misalign (mode, tmp);
15061 return;
15062 }
15063
15064 /* Make operand1 a register if it isn't already. */
15065 if (can_create_pseudo_p ()
15066 && !register_operand (op0, mode)
15067 && !register_operand (op1, mode))
15068 {
15069 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15070 return;
15071 }
15072
15073 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15074 }
15075
15076 /* Split 32-byte AVX unaligned load and store if needed. */
15077
15078 static void
15079 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15080 {
15081 rtx m;
15082 rtx (*extract) (rtx, rtx, rtx);
15083 rtx (*move_unaligned) (rtx, rtx);
15084 enum machine_mode mode;
15085
15086 switch (GET_MODE (op0))
15087 {
15088 default:
15089 gcc_unreachable ();
15090 case V32QImode:
15091 extract = gen_avx_vextractf128v32qi;
15092 move_unaligned = gen_avx_movdqu256;
15093 mode = V16QImode;
15094 break;
15095 case V8SFmode:
15096 extract = gen_avx_vextractf128v8sf;
15097 move_unaligned = gen_avx_movups256;
15098 mode = V4SFmode;
15099 break;
15100 case V4DFmode:
15101 extract = gen_avx_vextractf128v4df;
15102 move_unaligned = gen_avx_movupd256;
15103 mode = V2DFmode;
15104 break;
15105 }
15106
15107 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15108 {
15109 rtx r = gen_reg_rtx (mode);
15110 m = adjust_address (op1, mode, 0);
15111 emit_move_insn (r, m);
15112 m = adjust_address (op1, mode, 16);
15113 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15114 emit_move_insn (op0, r);
15115 }
15116 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15117 {
15118 m = adjust_address (op0, mode, 0);
15119 emit_insn (extract (m, op1, const0_rtx));
15120 m = adjust_address (op0, mode, 16);
15121 emit_insn (extract (m, op1, const1_rtx));
15122 }
15123 else
15124 emit_insn (move_unaligned (op0, op1));
15125 }
15126
15127 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15128 straight to ix86_expand_vector_move. */
15129 /* Code generation for scalar reg-reg moves of single and double precision data:
15130 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15131 movaps reg, reg
15132 else
15133 movss reg, reg
15134 if (x86_sse_partial_reg_dependency == true)
15135 movapd reg, reg
15136 else
15137 movsd reg, reg
15138
15139 Code generation for scalar loads of double precision data:
15140 if (x86_sse_split_regs == true)
15141 movlpd mem, reg (gas syntax)
15142 else
15143 movsd mem, reg
15144
15145 Code generation for unaligned packed loads of single precision data
15146 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15147 if (x86_sse_unaligned_move_optimal)
15148 movups mem, reg
15149
15150 if (x86_sse_partial_reg_dependency == true)
15151 {
15152 xorps reg, reg
15153 movlps mem, reg
15154 movhps mem+8, reg
15155 }
15156 else
15157 {
15158 movlps mem, reg
15159 movhps mem+8, reg
15160 }
15161
15162 Code generation for unaligned packed loads of double precision data
15163 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15164 if (x86_sse_unaligned_move_optimal)
15165 movupd mem, reg
15166
15167 if (x86_sse_split_regs == true)
15168 {
15169 movlpd mem, reg
15170 movhpd mem+8, reg
15171 }
15172 else
15173 {
15174 movsd mem, reg
15175 movhpd mem+8, reg
15176 }
15177 */
15178
15179 void
15180 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15181 {
15182 rtx op0, op1, m;
15183
15184 op0 = operands[0];
15185 op1 = operands[1];
15186
15187 if (TARGET_AVX)
15188 {
15189 switch (GET_MODE_CLASS (mode))
15190 {
15191 case MODE_VECTOR_INT:
15192 case MODE_INT:
15193 switch (GET_MODE_SIZE (mode))
15194 {
15195 case 16:
15196 /* If we're optimizing for size, movups is the smallest. */
15197 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15198 {
15199 op0 = gen_lowpart (V4SFmode, op0);
15200 op1 = gen_lowpart (V4SFmode, op1);
15201 emit_insn (gen_sse_movups (op0, op1));
15202 return;
15203 }
15204 op0 = gen_lowpart (V16QImode, op0);
15205 op1 = gen_lowpart (V16QImode, op1);
15206 emit_insn (gen_sse2_movdqu (op0, op1));
15207 break;
15208 case 32:
15209 op0 = gen_lowpart (V32QImode, op0);
15210 op1 = gen_lowpart (V32QImode, op1);
15211 ix86_avx256_split_vector_move_misalign (op0, op1);
15212 break;
15213 default:
15214 gcc_unreachable ();
15215 }
15216 break;
15217 case MODE_VECTOR_FLOAT:
15218 op0 = gen_lowpart (mode, op0);
15219 op1 = gen_lowpart (mode, op1);
15220
15221 switch (mode)
15222 {
15223 case V4SFmode:
15224 emit_insn (gen_sse_movups (op0, op1));
15225 break;
15226 case V8SFmode:
15227 ix86_avx256_split_vector_move_misalign (op0, op1);
15228 break;
15229 case V2DFmode:
15230 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15231 {
15232 op0 = gen_lowpart (V4SFmode, op0);
15233 op1 = gen_lowpart (V4SFmode, op1);
15234 emit_insn (gen_sse_movups (op0, op1));
15235 return;
15236 }
15237 emit_insn (gen_sse2_movupd (op0, op1));
15238 break;
15239 case V4DFmode:
15240 ix86_avx256_split_vector_move_misalign (op0, op1);
15241 break;
15242 default:
15243 gcc_unreachable ();
15244 }
15245 break;
15246
15247 default:
15248 gcc_unreachable ();
15249 }
15250
15251 return;
15252 }
15253
15254 if (MEM_P (op1))
15255 {
15256 /* If we're optimizing for size, movups is the smallest. */
15257 if (optimize_insn_for_size_p ()
15258 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15259 {
15260 op0 = gen_lowpart (V4SFmode, op0);
15261 op1 = gen_lowpart (V4SFmode, op1);
15262 emit_insn (gen_sse_movups (op0, op1));
15263 return;
15264 }
15265
15266 /* ??? If we have typed data, then it would appear that using
15267 movdqu is the only way to get unaligned data loaded with
15268 integer type. */
15269 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15270 {
15271 op0 = gen_lowpart (V16QImode, op0);
15272 op1 = gen_lowpart (V16QImode, op1);
15273 emit_insn (gen_sse2_movdqu (op0, op1));
15274 return;
15275 }
15276
15277 if (TARGET_SSE2 && mode == V2DFmode)
15278 {
15279 rtx zero;
15280
15281 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15282 {
15283 op0 = gen_lowpart (V2DFmode, op0);
15284 op1 = gen_lowpart (V2DFmode, op1);
15285 emit_insn (gen_sse2_movupd (op0, op1));
15286 return;
15287 }
15288
15289 /* When SSE registers are split into halves, we can avoid
15290 writing to the top half twice. */
15291 if (TARGET_SSE_SPLIT_REGS)
15292 {
15293 emit_clobber (op0);
15294 zero = op0;
15295 }
15296 else
15297 {
15298 /* ??? Not sure about the best option for the Intel chips.
15299 The following would seem to satisfy; the register is
15300 entirely cleared, breaking the dependency chain. We
15301 then store to the upper half, with a dependency depth
15302 of one. A rumor has it that Intel recommends two movsd
15303 followed by an unpacklpd, but this is unconfirmed. And
15304 given that the dependency depth of the unpacklpd would
15305 still be one, I'm not sure why this would be better. */
15306 zero = CONST0_RTX (V2DFmode);
15307 }
15308
15309 m = adjust_address (op1, DFmode, 0);
15310 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15311 m = adjust_address (op1, DFmode, 8);
15312 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15313 }
15314 else
15315 {
15316 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15317 {
15318 op0 = gen_lowpart (V4SFmode, op0);
15319 op1 = gen_lowpart (V4SFmode, op1);
15320 emit_insn (gen_sse_movups (op0, op1));
15321 return;
15322 }
15323
15324 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15325 emit_move_insn (op0, CONST0_RTX (mode));
15326 else
15327 emit_clobber (op0);
15328
15329 if (mode != V4SFmode)
15330 op0 = gen_lowpart (V4SFmode, op0);
15331 m = adjust_address (op1, V2SFmode, 0);
15332 emit_insn (gen_sse_loadlps (op0, op0, m));
15333 m = adjust_address (op1, V2SFmode, 8);
15334 emit_insn (gen_sse_loadhps (op0, op0, m));
15335 }
15336 }
15337 else if (MEM_P (op0))
15338 {
15339 /* If we're optimizing for size, movups is the smallest. */
15340 if (optimize_insn_for_size_p ()
15341 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15342 {
15343 op0 = gen_lowpart (V4SFmode, op0);
15344 op1 = gen_lowpart (V4SFmode, op1);
15345 emit_insn (gen_sse_movups (op0, op1));
15346 return;
15347 }
15348
15349 /* ??? Similar to above, only less clear because of quote
15350 typeless stores unquote. */
15351 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15352 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15353 {
15354 op0 = gen_lowpart (V16QImode, op0);
15355 op1 = gen_lowpart (V16QImode, op1);
15356 emit_insn (gen_sse2_movdqu (op0, op1));
15357 return;
15358 }
15359
15360 if (TARGET_SSE2 && mode == V2DFmode)
15361 {
15362 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15363 {
15364 op0 = gen_lowpart (V2DFmode, op0);
15365 op1 = gen_lowpart (V2DFmode, op1);
15366 emit_insn (gen_sse2_movupd (op0, op1));
15367 }
15368 else
15369 {
15370 m = adjust_address (op0, DFmode, 0);
15371 emit_insn (gen_sse2_storelpd (m, op1));
15372 m = adjust_address (op0, DFmode, 8);
15373 emit_insn (gen_sse2_storehpd (m, op1));
15374 }
15375 }
15376 else
15377 {
15378 if (mode != V4SFmode)
15379 op1 = gen_lowpart (V4SFmode, op1);
15380
15381 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15382 {
15383 op0 = gen_lowpart (V4SFmode, op0);
15384 emit_insn (gen_sse_movups (op0, op1));
15385 }
15386 else
15387 {
15388 m = adjust_address (op0, V2SFmode, 0);
15389 emit_insn (gen_sse_storelps (m, op1));
15390 m = adjust_address (op0, V2SFmode, 8);
15391 emit_insn (gen_sse_storehps (m, op1));
15392 }
15393 }
15394 }
15395 else
15396 gcc_unreachable ();
15397 }
15398
15399 /* Expand a push in MODE. This is some mode for which we do not support
15400 proper push instructions, at least from the registers that we expect
15401 the value to live in. */
15402
15403 void
15404 ix86_expand_push (enum machine_mode mode, rtx x)
15405 {
15406 rtx tmp;
15407
15408 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15409 GEN_INT (-GET_MODE_SIZE (mode)),
15410 stack_pointer_rtx, 1, OPTAB_DIRECT);
15411 if (tmp != stack_pointer_rtx)
15412 emit_move_insn (stack_pointer_rtx, tmp);
15413
15414 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15415
15416 /* When we push an operand onto stack, it has to be aligned at least
15417 at the function argument boundary. However since we don't have
15418 the argument type, we can't determine the actual argument
15419 boundary. */
15420 emit_move_insn (tmp, x);
15421 }
15422
15423 /* Helper function of ix86_fixup_binary_operands to canonicalize
15424 operand order. Returns true if the operands should be swapped. */
15425
15426 static bool
15427 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15428 rtx operands[])
15429 {
15430 rtx dst = operands[0];
15431 rtx src1 = operands[1];
15432 rtx src2 = operands[2];
15433
15434 /* If the operation is not commutative, we can't do anything. */
15435 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15436 return false;
15437
15438 /* Highest priority is that src1 should match dst. */
15439 if (rtx_equal_p (dst, src1))
15440 return false;
15441 if (rtx_equal_p (dst, src2))
15442 return true;
15443
15444 /* Next highest priority is that immediate constants come second. */
15445 if (immediate_operand (src2, mode))
15446 return false;
15447 if (immediate_operand (src1, mode))
15448 return true;
15449
15450 /* Lowest priority is that memory references should come second. */
15451 if (MEM_P (src2))
15452 return false;
15453 if (MEM_P (src1))
15454 return true;
15455
15456 return false;
15457 }
15458
15459
15460 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15461 destination to use for the operation. If different from the true
15462 destination in operands[0], a copy operation will be required. */
15463
15464 rtx
15465 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15466 rtx operands[])
15467 {
15468 rtx dst = operands[0];
15469 rtx src1 = operands[1];
15470 rtx src2 = operands[2];
15471
15472 /* Canonicalize operand order. */
15473 if (ix86_swap_binary_operands_p (code, mode, operands))
15474 {
15475 rtx temp;
15476
15477 /* It is invalid to swap operands of different modes. */
15478 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15479
15480 temp = src1;
15481 src1 = src2;
15482 src2 = temp;
15483 }
15484
15485 /* Both source operands cannot be in memory. */
15486 if (MEM_P (src1) && MEM_P (src2))
15487 {
15488 /* Optimization: Only read from memory once. */
15489 if (rtx_equal_p (src1, src2))
15490 {
15491 src2 = force_reg (mode, src2);
15492 src1 = src2;
15493 }
15494 else
15495 src2 = force_reg (mode, src2);
15496 }
15497
15498 /* If the destination is memory, and we do not have matching source
15499 operands, do things in registers. */
15500 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15501 dst = gen_reg_rtx (mode);
15502
15503 /* Source 1 cannot be a constant. */
15504 if (CONSTANT_P (src1))
15505 src1 = force_reg (mode, src1);
15506
15507 /* Source 1 cannot be a non-matching memory. */
15508 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15509 src1 = force_reg (mode, src1);
15510
15511 operands[1] = src1;
15512 operands[2] = src2;
15513 return dst;
15514 }
15515
15516 /* Similarly, but assume that the destination has already been
15517 set up properly. */
15518
15519 void
15520 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15521 enum machine_mode mode, rtx operands[])
15522 {
15523 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15524 gcc_assert (dst == operands[0]);
15525 }
15526
15527 /* Attempt to expand a binary operator. Make the expansion closer to the
15528 actual machine, then just general_operand, which will allow 3 separate
15529 memory references (one output, two input) in a single insn. */
15530
15531 void
15532 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15533 rtx operands[])
15534 {
15535 rtx src1, src2, dst, op, clob;
15536
15537 dst = ix86_fixup_binary_operands (code, mode, operands);
15538 src1 = operands[1];
15539 src2 = operands[2];
15540
15541 /* Emit the instruction. */
15542
15543 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15544 if (reload_in_progress)
15545 {
15546 /* Reload doesn't know about the flags register, and doesn't know that
15547 it doesn't want to clobber it. We can only do this with PLUS. */
15548 gcc_assert (code == PLUS);
15549 emit_insn (op);
15550 }
15551 else if (reload_completed
15552 && code == PLUS
15553 && !rtx_equal_p (dst, src1))
15554 {
15555 /* This is going to be an LEA; avoid splitting it later. */
15556 emit_insn (op);
15557 }
15558 else
15559 {
15560 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15561 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15562 }
15563
15564 /* Fix up the destination if needed. */
15565 if (dst != operands[0])
15566 emit_move_insn (operands[0], dst);
15567 }
15568
15569 /* Return TRUE or FALSE depending on whether the binary operator meets the
15570 appropriate constraints. */
15571
15572 bool
15573 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15574 rtx operands[3])
15575 {
15576 rtx dst = operands[0];
15577 rtx src1 = operands[1];
15578 rtx src2 = operands[2];
15579
15580 /* Both source operands cannot be in memory. */
15581 if (MEM_P (src1) && MEM_P (src2))
15582 return false;
15583
15584 /* Canonicalize operand order for commutative operators. */
15585 if (ix86_swap_binary_operands_p (code, mode, operands))
15586 {
15587 rtx temp = src1;
15588 src1 = src2;
15589 src2 = temp;
15590 }
15591
15592 /* If the destination is memory, we must have a matching source operand. */
15593 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15594 return false;
15595
15596 /* Source 1 cannot be a constant. */
15597 if (CONSTANT_P (src1))
15598 return false;
15599
15600 /* Source 1 cannot be a non-matching memory. */
15601 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15602 {
15603 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15604 return (code == AND
15605 && (mode == HImode
15606 || mode == SImode
15607 || (TARGET_64BIT && mode == DImode))
15608 && CONST_INT_P (src2)
15609 && (INTVAL (src2) == 0xff
15610 || INTVAL (src2) == 0xffff));
15611 }
15612
15613 return true;
15614 }
15615
15616 /* Attempt to expand a unary operator. Make the expansion closer to the
15617 actual machine, then just general_operand, which will allow 2 separate
15618 memory references (one output, one input) in a single insn. */
15619
15620 void
15621 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15622 rtx operands[])
15623 {
15624 int matching_memory;
15625 rtx src, dst, op, clob;
15626
15627 dst = operands[0];
15628 src = operands[1];
15629
15630 /* If the destination is memory, and we do not have matching source
15631 operands, do things in registers. */
15632 matching_memory = 0;
15633 if (MEM_P (dst))
15634 {
15635 if (rtx_equal_p (dst, src))
15636 matching_memory = 1;
15637 else
15638 dst = gen_reg_rtx (mode);
15639 }
15640
15641 /* When source operand is memory, destination must match. */
15642 if (MEM_P (src) && !matching_memory)
15643 src = force_reg (mode, src);
15644
15645 /* Emit the instruction. */
15646
15647 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15648 if (reload_in_progress || code == NOT)
15649 {
15650 /* Reload doesn't know about the flags register, and doesn't know that
15651 it doesn't want to clobber it. */
15652 gcc_assert (code == NOT);
15653 emit_insn (op);
15654 }
15655 else
15656 {
15657 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15658 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15659 }
15660
15661 /* Fix up the destination if needed. */
15662 if (dst != operands[0])
15663 emit_move_insn (operands[0], dst);
15664 }
15665
15666 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15667 divisor are within the range [0-255]. */
15668
15669 void
15670 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15671 bool signed_p)
15672 {
15673 rtx end_label, qimode_label;
15674 rtx insn, div, mod;
15675 rtx scratch, tmp0, tmp1, tmp2;
15676 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15677 rtx (*gen_zero_extend) (rtx, rtx);
15678 rtx (*gen_test_ccno_1) (rtx, rtx);
15679
15680 switch (mode)
15681 {
15682 case SImode:
15683 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15684 gen_test_ccno_1 = gen_testsi_ccno_1;
15685 gen_zero_extend = gen_zero_extendqisi2;
15686 break;
15687 case DImode:
15688 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15689 gen_test_ccno_1 = gen_testdi_ccno_1;
15690 gen_zero_extend = gen_zero_extendqidi2;
15691 break;
15692 default:
15693 gcc_unreachable ();
15694 }
15695
15696 end_label = gen_label_rtx ();
15697 qimode_label = gen_label_rtx ();
15698
15699 scratch = gen_reg_rtx (mode);
15700
15701 /* Use 8bit unsigned divimod if dividend and divisor are within
15702 the range [0-255]. */
15703 emit_move_insn (scratch, operands[2]);
15704 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15705 scratch, 1, OPTAB_DIRECT);
15706 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15707 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15708 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15709 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15710 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15711 pc_rtx);
15712 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15713 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15714 JUMP_LABEL (insn) = qimode_label;
15715
15716 /* Generate original signed/unsigned divimod. */
15717 div = gen_divmod4_1 (operands[0], operands[1],
15718 operands[2], operands[3]);
15719 emit_insn (div);
15720
15721 /* Branch to the end. */
15722 emit_jump_insn (gen_jump (end_label));
15723 emit_barrier ();
15724
15725 /* Generate 8bit unsigned divide. */
15726 emit_label (qimode_label);
15727 /* Don't use operands[0] for result of 8bit divide since not all
15728 registers support QImode ZERO_EXTRACT. */
15729 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15730 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15731 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15732 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15733
15734 if (signed_p)
15735 {
15736 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15737 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15738 }
15739 else
15740 {
15741 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15742 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15743 }
15744
15745 /* Extract remainder from AH. */
15746 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15747 if (REG_P (operands[1]))
15748 insn = emit_move_insn (operands[1], tmp1);
15749 else
15750 {
15751 /* Need a new scratch register since the old one has result
15752 of 8bit divide. */
15753 scratch = gen_reg_rtx (mode);
15754 emit_move_insn (scratch, tmp1);
15755 insn = emit_move_insn (operands[1], scratch);
15756 }
15757 set_unique_reg_note (insn, REG_EQUAL, mod);
15758
15759 /* Zero extend quotient from AL. */
15760 tmp1 = gen_lowpart (QImode, tmp0);
15761 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15762 set_unique_reg_note (insn, REG_EQUAL, div);
15763
15764 emit_label (end_label);
15765 }
15766
15767 #define LEA_SEARCH_THRESHOLD 12
15768
15769 /* Search backward for non-agu definition of register number REGNO1
15770 or register number REGNO2 in INSN's basic block until
15771 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15772 2. Reach BB boundary, or
15773 3. Reach agu definition.
15774 Returns the distance between the non-agu definition point and INSN.
15775 If no definition point, returns -1. */
15776
15777 static int
15778 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15779 rtx insn)
15780 {
15781 basic_block bb = BLOCK_FOR_INSN (insn);
15782 int distance = 0;
15783 df_ref *def_rec;
15784 enum attr_type insn_type;
15785
15786 if (insn != BB_HEAD (bb))
15787 {
15788 rtx prev = PREV_INSN (insn);
15789 while (prev && distance < LEA_SEARCH_THRESHOLD)
15790 {
15791 if (NONDEBUG_INSN_P (prev))
15792 {
15793 distance++;
15794 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15795 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15796 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15797 && (regno1 == DF_REF_REGNO (*def_rec)
15798 || regno2 == DF_REF_REGNO (*def_rec)))
15799 {
15800 insn_type = get_attr_type (prev);
15801 if (insn_type != TYPE_LEA)
15802 goto done;
15803 }
15804 }
15805 if (prev == BB_HEAD (bb))
15806 break;
15807 prev = PREV_INSN (prev);
15808 }
15809 }
15810
15811 if (distance < LEA_SEARCH_THRESHOLD)
15812 {
15813 edge e;
15814 edge_iterator ei;
15815 bool simple_loop = false;
15816
15817 FOR_EACH_EDGE (e, ei, bb->preds)
15818 if (e->src == bb)
15819 {
15820 simple_loop = true;
15821 break;
15822 }
15823
15824 if (simple_loop)
15825 {
15826 rtx prev = BB_END (bb);
15827 while (prev
15828 && prev != insn
15829 && distance < LEA_SEARCH_THRESHOLD)
15830 {
15831 if (NONDEBUG_INSN_P (prev))
15832 {
15833 distance++;
15834 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15835 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15836 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15837 && (regno1 == DF_REF_REGNO (*def_rec)
15838 || regno2 == DF_REF_REGNO (*def_rec)))
15839 {
15840 insn_type = get_attr_type (prev);
15841 if (insn_type != TYPE_LEA)
15842 goto done;
15843 }
15844 }
15845 prev = PREV_INSN (prev);
15846 }
15847 }
15848 }
15849
15850 distance = -1;
15851
15852 done:
15853 /* get_attr_type may modify recog data. We want to make sure
15854 that recog data is valid for instruction INSN, on which
15855 distance_non_agu_define is called. INSN is unchanged here. */
15856 extract_insn_cached (insn);
15857 return distance;
15858 }
15859
15860 /* Return the distance between INSN and the next insn that uses
15861 register number REGNO0 in memory address. Return -1 if no such
15862 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15863
15864 static int
15865 distance_agu_use (unsigned int regno0, rtx insn)
15866 {
15867 basic_block bb = BLOCK_FOR_INSN (insn);
15868 int distance = 0;
15869 df_ref *def_rec;
15870 df_ref *use_rec;
15871
15872 if (insn != BB_END (bb))
15873 {
15874 rtx next = NEXT_INSN (insn);
15875 while (next && distance < LEA_SEARCH_THRESHOLD)
15876 {
15877 if (NONDEBUG_INSN_P (next))
15878 {
15879 distance++;
15880
15881 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15882 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15883 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15884 && regno0 == DF_REF_REGNO (*use_rec))
15885 {
15886 /* Return DISTANCE if OP0 is used in memory
15887 address in NEXT. */
15888 return distance;
15889 }
15890
15891 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15892 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15893 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15894 && regno0 == DF_REF_REGNO (*def_rec))
15895 {
15896 /* Return -1 if OP0 is set in NEXT. */
15897 return -1;
15898 }
15899 }
15900 if (next == BB_END (bb))
15901 break;
15902 next = NEXT_INSN (next);
15903 }
15904 }
15905
15906 if (distance < LEA_SEARCH_THRESHOLD)
15907 {
15908 edge e;
15909 edge_iterator ei;
15910 bool simple_loop = false;
15911
15912 FOR_EACH_EDGE (e, ei, bb->succs)
15913 if (e->dest == bb)
15914 {
15915 simple_loop = true;
15916 break;
15917 }
15918
15919 if (simple_loop)
15920 {
15921 rtx next = BB_HEAD (bb);
15922 while (next
15923 && next != insn
15924 && distance < LEA_SEARCH_THRESHOLD)
15925 {
15926 if (NONDEBUG_INSN_P (next))
15927 {
15928 distance++;
15929
15930 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15931 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15932 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15933 && regno0 == DF_REF_REGNO (*use_rec))
15934 {
15935 /* Return DISTANCE if OP0 is used in memory
15936 address in NEXT. */
15937 return distance;
15938 }
15939
15940 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15941 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15942 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15943 && regno0 == DF_REF_REGNO (*def_rec))
15944 {
15945 /* Return -1 if OP0 is set in NEXT. */
15946 return -1;
15947 }
15948
15949 }
15950 next = NEXT_INSN (next);
15951 }
15952 }
15953 }
15954
15955 return -1;
15956 }
15957
15958 /* Define this macro to tune LEA priority vs ADD, it take effect when
15959 there is a dilemma of choicing LEA or ADD
15960 Negative value: ADD is more preferred than LEA
15961 Zero: Netrual
15962 Positive value: LEA is more preferred than ADD*/
15963 #define IX86_LEA_PRIORITY 2
15964
15965 /* Return true if it is ok to optimize an ADD operation to LEA
15966 operation to avoid flag register consumation. For most processors,
15967 ADD is faster than LEA. For the processors like ATOM, if the
15968 destination register of LEA holds an actual address which will be
15969 used soon, LEA is better and otherwise ADD is better. */
15970
15971 bool
15972 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15973 {
15974 unsigned int regno0 = true_regnum (operands[0]);
15975 unsigned int regno1 = true_regnum (operands[1]);
15976 unsigned int regno2 = true_regnum (operands[2]);
15977
15978 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15979 if (regno0 != regno1 && regno0 != regno2)
15980 return true;
15981
15982 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15983 return false;
15984 else
15985 {
15986 int dist_define, dist_use;
15987
15988 /* Return false if REGNO0 isn't used in memory address. */
15989 dist_use = distance_agu_use (regno0, insn);
15990 if (dist_use <= 0)
15991 return false;
15992
15993 dist_define = distance_non_agu_define (regno1, regno2, insn);
15994 if (dist_define <= 0)
15995 return true;
15996
15997 /* If this insn has both backward non-agu dependence and forward
15998 agu dependence, the one with short distance take effect. */
15999 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16000 return false;
16001
16002 return true;
16003 }
16004 }
16005
16006 /* Return true if destination reg of SET_BODY is shift count of
16007 USE_BODY. */
16008
16009 static bool
16010 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16011 {
16012 rtx set_dest;
16013 rtx shift_rtx;
16014 int i;
16015
16016 /* Retrieve destination of SET_BODY. */
16017 switch (GET_CODE (set_body))
16018 {
16019 case SET:
16020 set_dest = SET_DEST (set_body);
16021 if (!set_dest || !REG_P (set_dest))
16022 return false;
16023 break;
16024 case PARALLEL:
16025 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16026 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16027 use_body))
16028 return true;
16029 default:
16030 return false;
16031 break;
16032 }
16033
16034 /* Retrieve shift count of USE_BODY. */
16035 switch (GET_CODE (use_body))
16036 {
16037 case SET:
16038 shift_rtx = XEXP (use_body, 1);
16039 break;
16040 case PARALLEL:
16041 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16042 if (ix86_dep_by_shift_count_body (set_body,
16043 XVECEXP (use_body, 0, i)))
16044 return true;
16045 default:
16046 return false;
16047 break;
16048 }
16049
16050 if (shift_rtx
16051 && (GET_CODE (shift_rtx) == ASHIFT
16052 || GET_CODE (shift_rtx) == LSHIFTRT
16053 || GET_CODE (shift_rtx) == ASHIFTRT
16054 || GET_CODE (shift_rtx) == ROTATE
16055 || GET_CODE (shift_rtx) == ROTATERT))
16056 {
16057 rtx shift_count = XEXP (shift_rtx, 1);
16058
16059 /* Return true if shift count is dest of SET_BODY. */
16060 if (REG_P (shift_count)
16061 && true_regnum (set_dest) == true_regnum (shift_count))
16062 return true;
16063 }
16064
16065 return false;
16066 }
16067
16068 /* Return true if destination reg of SET_INSN is shift count of
16069 USE_INSN. */
16070
16071 bool
16072 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16073 {
16074 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16075 PATTERN (use_insn));
16076 }
16077
16078 /* Return TRUE or FALSE depending on whether the unary operator meets the
16079 appropriate constraints. */
16080
16081 bool
16082 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16083 enum machine_mode mode ATTRIBUTE_UNUSED,
16084 rtx operands[2] ATTRIBUTE_UNUSED)
16085 {
16086 /* If one of operands is memory, source and destination must match. */
16087 if ((MEM_P (operands[0])
16088 || MEM_P (operands[1]))
16089 && ! rtx_equal_p (operands[0], operands[1]))
16090 return false;
16091 return true;
16092 }
16093
16094 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16095 are ok, keeping in mind the possible movddup alternative. */
16096
16097 bool
16098 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16099 {
16100 if (MEM_P (operands[0]))
16101 return rtx_equal_p (operands[0], operands[1 + high]);
16102 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16103 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16104 return true;
16105 }
16106
16107 /* Post-reload splitter for converting an SF or DFmode value in an
16108 SSE register into an unsigned SImode. */
16109
16110 void
16111 ix86_split_convert_uns_si_sse (rtx operands[])
16112 {
16113 enum machine_mode vecmode;
16114 rtx value, large, zero_or_two31, input, two31, x;
16115
16116 large = operands[1];
16117 zero_or_two31 = operands[2];
16118 input = operands[3];
16119 two31 = operands[4];
16120 vecmode = GET_MODE (large);
16121 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16122
16123 /* Load up the value into the low element. We must ensure that the other
16124 elements are valid floats -- zero is the easiest such value. */
16125 if (MEM_P (input))
16126 {
16127 if (vecmode == V4SFmode)
16128 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16129 else
16130 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16131 }
16132 else
16133 {
16134 input = gen_rtx_REG (vecmode, REGNO (input));
16135 emit_move_insn (value, CONST0_RTX (vecmode));
16136 if (vecmode == V4SFmode)
16137 emit_insn (gen_sse_movss (value, value, input));
16138 else
16139 emit_insn (gen_sse2_movsd (value, value, input));
16140 }
16141
16142 emit_move_insn (large, two31);
16143 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16144
16145 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16146 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16147
16148 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16149 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16150
16151 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16152 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16153
16154 large = gen_rtx_REG (V4SImode, REGNO (large));
16155 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16156
16157 x = gen_rtx_REG (V4SImode, REGNO (value));
16158 if (vecmode == V4SFmode)
16159 emit_insn (gen_sse2_cvttps2dq (x, value));
16160 else
16161 emit_insn (gen_sse2_cvttpd2dq (x, value));
16162 value = x;
16163
16164 emit_insn (gen_xorv4si3 (value, value, large));
16165 }
16166
16167 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16168 Expects the 64-bit DImode to be supplied in a pair of integral
16169 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16170 -mfpmath=sse, !optimize_size only. */
16171
16172 void
16173 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16174 {
16175 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16176 rtx int_xmm, fp_xmm;
16177 rtx biases, exponents;
16178 rtx x;
16179
16180 int_xmm = gen_reg_rtx (V4SImode);
16181 if (TARGET_INTER_UNIT_MOVES)
16182 emit_insn (gen_movdi_to_sse (int_xmm, input));
16183 else if (TARGET_SSE_SPLIT_REGS)
16184 {
16185 emit_clobber (int_xmm);
16186 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16187 }
16188 else
16189 {
16190 x = gen_reg_rtx (V2DImode);
16191 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16192 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16193 }
16194
16195 x = gen_rtx_CONST_VECTOR (V4SImode,
16196 gen_rtvec (4, GEN_INT (0x43300000UL),
16197 GEN_INT (0x45300000UL),
16198 const0_rtx, const0_rtx));
16199 exponents = validize_mem (force_const_mem (V4SImode, x));
16200
16201 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16202 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16203
16204 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16205 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16206 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16207 (0x1.0p84 + double(fp_value_hi_xmm)).
16208 Note these exponents differ by 32. */
16209
16210 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16211
16212 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16213 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16214 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16215 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16216 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16217 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16218 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16219 biases = validize_mem (force_const_mem (V2DFmode, biases));
16220 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16221
16222 /* Add the upper and lower DFmode values together. */
16223 if (TARGET_SSE3)
16224 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16225 else
16226 {
16227 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16228 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16229 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16230 }
16231
16232 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16233 }
16234
16235 /* Not used, but eases macroization of patterns. */
16236 void
16237 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16238 rtx input ATTRIBUTE_UNUSED)
16239 {
16240 gcc_unreachable ();
16241 }
16242
16243 /* Convert an unsigned SImode value into a DFmode. Only currently used
16244 for SSE, but applicable anywhere. */
16245
16246 void
16247 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16248 {
16249 REAL_VALUE_TYPE TWO31r;
16250 rtx x, fp;
16251
16252 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16253 NULL, 1, OPTAB_DIRECT);
16254
16255 fp = gen_reg_rtx (DFmode);
16256 emit_insn (gen_floatsidf2 (fp, x));
16257
16258 real_ldexp (&TWO31r, &dconst1, 31);
16259 x = const_double_from_real_value (TWO31r, DFmode);
16260
16261 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16262 if (x != target)
16263 emit_move_insn (target, x);
16264 }
16265
16266 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16267 32-bit mode; otherwise we have a direct convert instruction. */
16268
16269 void
16270 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16271 {
16272 REAL_VALUE_TYPE TWO32r;
16273 rtx fp_lo, fp_hi, x;
16274
16275 fp_lo = gen_reg_rtx (DFmode);
16276 fp_hi = gen_reg_rtx (DFmode);
16277
16278 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16279
16280 real_ldexp (&TWO32r, &dconst1, 32);
16281 x = const_double_from_real_value (TWO32r, DFmode);
16282 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16283
16284 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16285
16286 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16287 0, OPTAB_DIRECT);
16288 if (x != target)
16289 emit_move_insn (target, x);
16290 }
16291
16292 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16293 For x86_32, -mfpmath=sse, !optimize_size only. */
16294 void
16295 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16296 {
16297 REAL_VALUE_TYPE ONE16r;
16298 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16299
16300 real_ldexp (&ONE16r, &dconst1, 16);
16301 x = const_double_from_real_value (ONE16r, SFmode);
16302 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16303 NULL, 0, OPTAB_DIRECT);
16304 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16305 NULL, 0, OPTAB_DIRECT);
16306 fp_hi = gen_reg_rtx (SFmode);
16307 fp_lo = gen_reg_rtx (SFmode);
16308 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16309 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16310 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16311 0, OPTAB_DIRECT);
16312 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16313 0, OPTAB_DIRECT);
16314 if (!rtx_equal_p (target, fp_hi))
16315 emit_move_insn (target, fp_hi);
16316 }
16317
16318 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16319 then replicate the value for all elements of the vector
16320 register. */
16321
16322 rtx
16323 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16324 {
16325 rtvec v;
16326 switch (mode)
16327 {
16328 case V4SImode:
16329 gcc_assert (vect);
16330 v = gen_rtvec (4, value, value, value, value);
16331 return gen_rtx_CONST_VECTOR (V4SImode, v);
16332
16333 case V2DImode:
16334 gcc_assert (vect);
16335 v = gen_rtvec (2, value, value);
16336 return gen_rtx_CONST_VECTOR (V2DImode, v);
16337
16338 case V8SFmode:
16339 if (vect)
16340 v = gen_rtvec (8, value, value, value, value,
16341 value, value, value, value);
16342 else
16343 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16344 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16345 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16346 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16347 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16348
16349 case V4SFmode:
16350 if (vect)
16351 v = gen_rtvec (4, value, value, value, value);
16352 else
16353 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16354 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16355 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16356
16357 case V4DFmode:
16358 if (vect)
16359 v = gen_rtvec (4, value, value, value, value);
16360 else
16361 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16362 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16363 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16364
16365 case V2DFmode:
16366 if (vect)
16367 v = gen_rtvec (2, value, value);
16368 else
16369 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16370 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16371
16372 default:
16373 gcc_unreachable ();
16374 }
16375 }
16376
16377 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16378 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16379 for an SSE register. If VECT is true, then replicate the mask for
16380 all elements of the vector register. If INVERT is true, then create
16381 a mask excluding the sign bit. */
16382
16383 rtx
16384 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16385 {
16386 enum machine_mode vec_mode, imode;
16387 HOST_WIDE_INT hi, lo;
16388 int shift = 63;
16389 rtx v;
16390 rtx mask;
16391
16392 /* Find the sign bit, sign extended to 2*HWI. */
16393 switch (mode)
16394 {
16395 case V4SImode:
16396 case V8SFmode:
16397 case V4SFmode:
16398 vec_mode = mode;
16399 mode = GET_MODE_INNER (mode);
16400 imode = SImode;
16401 lo = 0x80000000, hi = lo < 0;
16402 break;
16403
16404 case V2DImode:
16405 case V4DFmode:
16406 case V2DFmode:
16407 vec_mode = mode;
16408 mode = GET_MODE_INNER (mode);
16409 imode = DImode;
16410 if (HOST_BITS_PER_WIDE_INT >= 64)
16411 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16412 else
16413 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16414 break;
16415
16416 case TImode:
16417 case TFmode:
16418 vec_mode = VOIDmode;
16419 if (HOST_BITS_PER_WIDE_INT >= 64)
16420 {
16421 imode = TImode;
16422 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16423 }
16424 else
16425 {
16426 rtvec vec;
16427
16428 imode = DImode;
16429 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16430
16431 if (invert)
16432 {
16433 lo = ~lo, hi = ~hi;
16434 v = constm1_rtx;
16435 }
16436 else
16437 v = const0_rtx;
16438
16439 mask = immed_double_const (lo, hi, imode);
16440
16441 vec = gen_rtvec (2, v, mask);
16442 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16443 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16444
16445 return v;
16446 }
16447 break;
16448
16449 default:
16450 gcc_unreachable ();
16451 }
16452
16453 if (invert)
16454 lo = ~lo, hi = ~hi;
16455
16456 /* Force this value into the low part of a fp vector constant. */
16457 mask = immed_double_const (lo, hi, imode);
16458 mask = gen_lowpart (mode, mask);
16459
16460 if (vec_mode == VOIDmode)
16461 return force_reg (mode, mask);
16462
16463 v = ix86_build_const_vector (vec_mode, vect, mask);
16464 return force_reg (vec_mode, v);
16465 }
16466
16467 /* Generate code for floating point ABS or NEG. */
16468
16469 void
16470 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16471 rtx operands[])
16472 {
16473 rtx mask, set, dst, src;
16474 bool use_sse = false;
16475 bool vector_mode = VECTOR_MODE_P (mode);
16476 enum machine_mode vmode = mode;
16477
16478 if (vector_mode)
16479 use_sse = true;
16480 else if (mode == TFmode)
16481 use_sse = true;
16482 else if (TARGET_SSE_MATH)
16483 {
16484 use_sse = SSE_FLOAT_MODE_P (mode);
16485 if (mode == SFmode)
16486 vmode = V4SFmode;
16487 else if (mode == DFmode)
16488 vmode = V2DFmode;
16489 }
16490
16491 /* NEG and ABS performed with SSE use bitwise mask operations.
16492 Create the appropriate mask now. */
16493 if (use_sse)
16494 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16495 else
16496 mask = NULL_RTX;
16497
16498 dst = operands[0];
16499 src = operands[1];
16500
16501 set = gen_rtx_fmt_e (code, mode, src);
16502 set = gen_rtx_SET (VOIDmode, dst, set);
16503
16504 if (mask)
16505 {
16506 rtx use, clob;
16507 rtvec par;
16508
16509 use = gen_rtx_USE (VOIDmode, mask);
16510 if (vector_mode)
16511 par = gen_rtvec (2, set, use);
16512 else
16513 {
16514 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16515 par = gen_rtvec (3, set, use, clob);
16516 }
16517 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16518 }
16519 else
16520 emit_insn (set);
16521 }
16522
16523 /* Expand a copysign operation. Special case operand 0 being a constant. */
16524
16525 void
16526 ix86_expand_copysign (rtx operands[])
16527 {
16528 enum machine_mode mode, vmode;
16529 rtx dest, op0, op1, mask, nmask;
16530
16531 dest = operands[0];
16532 op0 = operands[1];
16533 op1 = operands[2];
16534
16535 mode = GET_MODE (dest);
16536
16537 if (mode == SFmode)
16538 vmode = V4SFmode;
16539 else if (mode == DFmode)
16540 vmode = V2DFmode;
16541 else
16542 vmode = mode;
16543
16544 if (GET_CODE (op0) == CONST_DOUBLE)
16545 {
16546 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16547
16548 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16549 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16550
16551 if (mode == SFmode || mode == DFmode)
16552 {
16553 if (op0 == CONST0_RTX (mode))
16554 op0 = CONST0_RTX (vmode);
16555 else
16556 {
16557 rtx v = ix86_build_const_vector (vmode, false, op0);
16558
16559 op0 = force_reg (vmode, v);
16560 }
16561 }
16562 else if (op0 != CONST0_RTX (mode))
16563 op0 = force_reg (mode, op0);
16564
16565 mask = ix86_build_signbit_mask (vmode, 0, 0);
16566
16567 if (mode == SFmode)
16568 copysign_insn = gen_copysignsf3_const;
16569 else if (mode == DFmode)
16570 copysign_insn = gen_copysigndf3_const;
16571 else
16572 copysign_insn = gen_copysigntf3_const;
16573
16574 emit_insn (copysign_insn (dest, op0, op1, mask));
16575 }
16576 else
16577 {
16578 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16579
16580 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16581 mask = ix86_build_signbit_mask (vmode, 0, 0);
16582
16583 if (mode == SFmode)
16584 copysign_insn = gen_copysignsf3_var;
16585 else if (mode == DFmode)
16586 copysign_insn = gen_copysigndf3_var;
16587 else
16588 copysign_insn = gen_copysigntf3_var;
16589
16590 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16591 }
16592 }
16593
16594 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16595 be a constant, and so has already been expanded into a vector constant. */
16596
16597 void
16598 ix86_split_copysign_const (rtx operands[])
16599 {
16600 enum machine_mode mode, vmode;
16601 rtx dest, op0, mask, x;
16602
16603 dest = operands[0];
16604 op0 = operands[1];
16605 mask = operands[3];
16606
16607 mode = GET_MODE (dest);
16608 vmode = GET_MODE (mask);
16609
16610 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16611 x = gen_rtx_AND (vmode, dest, mask);
16612 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16613
16614 if (op0 != CONST0_RTX (vmode))
16615 {
16616 x = gen_rtx_IOR (vmode, dest, op0);
16617 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16618 }
16619 }
16620
16621 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16622 so we have to do two masks. */
16623
16624 void
16625 ix86_split_copysign_var (rtx operands[])
16626 {
16627 enum machine_mode mode, vmode;
16628 rtx dest, scratch, op0, op1, mask, nmask, x;
16629
16630 dest = operands[0];
16631 scratch = operands[1];
16632 op0 = operands[2];
16633 op1 = operands[3];
16634 nmask = operands[4];
16635 mask = operands[5];
16636
16637 mode = GET_MODE (dest);
16638 vmode = GET_MODE (mask);
16639
16640 if (rtx_equal_p (op0, op1))
16641 {
16642 /* Shouldn't happen often (it's useless, obviously), but when it does
16643 we'd generate incorrect code if we continue below. */
16644 emit_move_insn (dest, op0);
16645 return;
16646 }
16647
16648 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16649 {
16650 gcc_assert (REGNO (op1) == REGNO (scratch));
16651
16652 x = gen_rtx_AND (vmode, scratch, mask);
16653 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16654
16655 dest = mask;
16656 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16657 x = gen_rtx_NOT (vmode, dest);
16658 x = gen_rtx_AND (vmode, x, op0);
16659 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16660 }
16661 else
16662 {
16663 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16664 {
16665 x = gen_rtx_AND (vmode, scratch, mask);
16666 }
16667 else /* alternative 2,4 */
16668 {
16669 gcc_assert (REGNO (mask) == REGNO (scratch));
16670 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16671 x = gen_rtx_AND (vmode, scratch, op1);
16672 }
16673 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16674
16675 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16676 {
16677 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16678 x = gen_rtx_AND (vmode, dest, nmask);
16679 }
16680 else /* alternative 3,4 */
16681 {
16682 gcc_assert (REGNO (nmask) == REGNO (dest));
16683 dest = nmask;
16684 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16685 x = gen_rtx_AND (vmode, dest, op0);
16686 }
16687 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16688 }
16689
16690 x = gen_rtx_IOR (vmode, dest, scratch);
16691 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16692 }
16693
16694 /* Return TRUE or FALSE depending on whether the first SET in INSN
16695 has source and destination with matching CC modes, and that the
16696 CC mode is at least as constrained as REQ_MODE. */
16697
16698 bool
16699 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16700 {
16701 rtx set;
16702 enum machine_mode set_mode;
16703
16704 set = PATTERN (insn);
16705 if (GET_CODE (set) == PARALLEL)
16706 set = XVECEXP (set, 0, 0);
16707 gcc_assert (GET_CODE (set) == SET);
16708 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16709
16710 set_mode = GET_MODE (SET_DEST (set));
16711 switch (set_mode)
16712 {
16713 case CCNOmode:
16714 if (req_mode != CCNOmode
16715 && (req_mode != CCmode
16716 || XEXP (SET_SRC (set), 1) != const0_rtx))
16717 return false;
16718 break;
16719 case CCmode:
16720 if (req_mode == CCGCmode)
16721 return false;
16722 /* FALLTHRU */
16723 case CCGCmode:
16724 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16725 return false;
16726 /* FALLTHRU */
16727 case CCGOCmode:
16728 if (req_mode == CCZmode)
16729 return false;
16730 /* FALLTHRU */
16731 case CCZmode:
16732 break;
16733
16734 case CCAmode:
16735 case CCCmode:
16736 case CCOmode:
16737 case CCSmode:
16738 if (set_mode != req_mode)
16739 return false;
16740 break;
16741
16742 default:
16743 gcc_unreachable ();
16744 }
16745
16746 return GET_MODE (SET_SRC (set)) == set_mode;
16747 }
16748
16749 /* Generate insn patterns to do an integer compare of OPERANDS. */
16750
16751 static rtx
16752 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16753 {
16754 enum machine_mode cmpmode;
16755 rtx tmp, flags;
16756
16757 cmpmode = SELECT_CC_MODE (code, op0, op1);
16758 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16759
16760 /* This is very simple, but making the interface the same as in the
16761 FP case makes the rest of the code easier. */
16762 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16763 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16764
16765 /* Return the test that should be put into the flags user, i.e.
16766 the bcc, scc, or cmov instruction. */
16767 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16768 }
16769
16770 /* Figure out whether to use ordered or unordered fp comparisons.
16771 Return the appropriate mode to use. */
16772
16773 enum machine_mode
16774 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16775 {
16776 /* ??? In order to make all comparisons reversible, we do all comparisons
16777 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16778 all forms trapping and nontrapping comparisons, we can make inequality
16779 comparisons trapping again, since it results in better code when using
16780 FCOM based compares. */
16781 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16782 }
16783
16784 enum machine_mode
16785 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16786 {
16787 enum machine_mode mode = GET_MODE (op0);
16788
16789 if (SCALAR_FLOAT_MODE_P (mode))
16790 {
16791 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16792 return ix86_fp_compare_mode (code);
16793 }
16794
16795 switch (code)
16796 {
16797 /* Only zero flag is needed. */
16798 case EQ: /* ZF=0 */
16799 case NE: /* ZF!=0 */
16800 return CCZmode;
16801 /* Codes needing carry flag. */
16802 case GEU: /* CF=0 */
16803 case LTU: /* CF=1 */
16804 /* Detect overflow checks. They need just the carry flag. */
16805 if (GET_CODE (op0) == PLUS
16806 && rtx_equal_p (op1, XEXP (op0, 0)))
16807 return CCCmode;
16808 else
16809 return CCmode;
16810 case GTU: /* CF=0 & ZF=0 */
16811 case LEU: /* CF=1 | ZF=1 */
16812 /* Detect overflow checks. They need just the carry flag. */
16813 if (GET_CODE (op0) == MINUS
16814 && rtx_equal_p (op1, XEXP (op0, 0)))
16815 return CCCmode;
16816 else
16817 return CCmode;
16818 /* Codes possibly doable only with sign flag when
16819 comparing against zero. */
16820 case GE: /* SF=OF or SF=0 */
16821 case LT: /* SF<>OF or SF=1 */
16822 if (op1 == const0_rtx)
16823 return CCGOCmode;
16824 else
16825 /* For other cases Carry flag is not required. */
16826 return CCGCmode;
16827 /* Codes doable only with sign flag when comparing
16828 against zero, but we miss jump instruction for it
16829 so we need to use relational tests against overflow
16830 that thus needs to be zero. */
16831 case GT: /* ZF=0 & SF=OF */
16832 case LE: /* ZF=1 | SF<>OF */
16833 if (op1 == const0_rtx)
16834 return CCNOmode;
16835 else
16836 return CCGCmode;
16837 /* strcmp pattern do (use flags) and combine may ask us for proper
16838 mode. */
16839 case USE:
16840 return CCmode;
16841 default:
16842 gcc_unreachable ();
16843 }
16844 }
16845
16846 /* Return the fixed registers used for condition codes. */
16847
16848 static bool
16849 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16850 {
16851 *p1 = FLAGS_REG;
16852 *p2 = FPSR_REG;
16853 return true;
16854 }
16855
16856 /* If two condition code modes are compatible, return a condition code
16857 mode which is compatible with both. Otherwise, return
16858 VOIDmode. */
16859
16860 static enum machine_mode
16861 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16862 {
16863 if (m1 == m2)
16864 return m1;
16865
16866 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16867 return VOIDmode;
16868
16869 if ((m1 == CCGCmode && m2 == CCGOCmode)
16870 || (m1 == CCGOCmode && m2 == CCGCmode))
16871 return CCGCmode;
16872
16873 switch (m1)
16874 {
16875 default:
16876 gcc_unreachable ();
16877
16878 case CCmode:
16879 case CCGCmode:
16880 case CCGOCmode:
16881 case CCNOmode:
16882 case CCAmode:
16883 case CCCmode:
16884 case CCOmode:
16885 case CCSmode:
16886 case CCZmode:
16887 switch (m2)
16888 {
16889 default:
16890 return VOIDmode;
16891
16892 case CCmode:
16893 case CCGCmode:
16894 case CCGOCmode:
16895 case CCNOmode:
16896 case CCAmode:
16897 case CCCmode:
16898 case CCOmode:
16899 case CCSmode:
16900 case CCZmode:
16901 return CCmode;
16902 }
16903
16904 case CCFPmode:
16905 case CCFPUmode:
16906 /* These are only compatible with themselves, which we already
16907 checked above. */
16908 return VOIDmode;
16909 }
16910 }
16911
16912
16913 /* Return a comparison we can do and that it is equivalent to
16914 swap_condition (code) apart possibly from orderedness.
16915 But, never change orderedness if TARGET_IEEE_FP, returning
16916 UNKNOWN in that case if necessary. */
16917
16918 static enum rtx_code
16919 ix86_fp_swap_condition (enum rtx_code code)
16920 {
16921 switch (code)
16922 {
16923 case GT: /* GTU - CF=0 & ZF=0 */
16924 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16925 case GE: /* GEU - CF=0 */
16926 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16927 case UNLT: /* LTU - CF=1 */
16928 return TARGET_IEEE_FP ? UNKNOWN : GT;
16929 case UNLE: /* LEU - CF=1 | ZF=1 */
16930 return TARGET_IEEE_FP ? UNKNOWN : GE;
16931 default:
16932 return swap_condition (code);
16933 }
16934 }
16935
16936 /* Return cost of comparison CODE using the best strategy for performance.
16937 All following functions do use number of instructions as a cost metrics.
16938 In future this should be tweaked to compute bytes for optimize_size and
16939 take into account performance of various instructions on various CPUs. */
16940
16941 static int
16942 ix86_fp_comparison_cost (enum rtx_code code)
16943 {
16944 int arith_cost;
16945
16946 /* The cost of code using bit-twiddling on %ah. */
16947 switch (code)
16948 {
16949 case UNLE:
16950 case UNLT:
16951 case LTGT:
16952 case GT:
16953 case GE:
16954 case UNORDERED:
16955 case ORDERED:
16956 case UNEQ:
16957 arith_cost = 4;
16958 break;
16959 case LT:
16960 case NE:
16961 case EQ:
16962 case UNGE:
16963 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16964 break;
16965 case LE:
16966 case UNGT:
16967 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16968 break;
16969 default:
16970 gcc_unreachable ();
16971 }
16972
16973 switch (ix86_fp_comparison_strategy (code))
16974 {
16975 case IX86_FPCMP_COMI:
16976 return arith_cost > 4 ? 3 : 2;
16977 case IX86_FPCMP_SAHF:
16978 return arith_cost > 4 ? 4 : 3;
16979 default:
16980 return arith_cost;
16981 }
16982 }
16983
16984 /* Return strategy to use for floating-point. We assume that fcomi is always
16985 preferrable where available, since that is also true when looking at size
16986 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16987
16988 enum ix86_fpcmp_strategy
16989 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16990 {
16991 /* Do fcomi/sahf based test when profitable. */
16992
16993 if (TARGET_CMOVE)
16994 return IX86_FPCMP_COMI;
16995
16996 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16997 return IX86_FPCMP_SAHF;
16998
16999 return IX86_FPCMP_ARITH;
17000 }
17001
17002 /* Swap, force into registers, or otherwise massage the two operands
17003 to a fp comparison. The operands are updated in place; the new
17004 comparison code is returned. */
17005
17006 static enum rtx_code
17007 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17008 {
17009 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17010 rtx op0 = *pop0, op1 = *pop1;
17011 enum machine_mode op_mode = GET_MODE (op0);
17012 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17013
17014 /* All of the unordered compare instructions only work on registers.
17015 The same is true of the fcomi compare instructions. The XFmode
17016 compare instructions require registers except when comparing
17017 against zero or when converting operand 1 from fixed point to
17018 floating point. */
17019
17020 if (!is_sse
17021 && (fpcmp_mode == CCFPUmode
17022 || (op_mode == XFmode
17023 && ! (standard_80387_constant_p (op0) == 1
17024 || standard_80387_constant_p (op1) == 1)
17025 && GET_CODE (op1) != FLOAT)
17026 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17027 {
17028 op0 = force_reg (op_mode, op0);
17029 op1 = force_reg (op_mode, op1);
17030 }
17031 else
17032 {
17033 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17034 things around if they appear profitable, otherwise force op0
17035 into a register. */
17036
17037 if (standard_80387_constant_p (op0) == 0
17038 || (MEM_P (op0)
17039 && ! (standard_80387_constant_p (op1) == 0
17040 || MEM_P (op1))))
17041 {
17042 enum rtx_code new_code = ix86_fp_swap_condition (code);
17043 if (new_code != UNKNOWN)
17044 {
17045 rtx tmp;
17046 tmp = op0, op0 = op1, op1 = tmp;
17047 code = new_code;
17048 }
17049 }
17050
17051 if (!REG_P (op0))
17052 op0 = force_reg (op_mode, op0);
17053
17054 if (CONSTANT_P (op1))
17055 {
17056 int tmp = standard_80387_constant_p (op1);
17057 if (tmp == 0)
17058 op1 = validize_mem (force_const_mem (op_mode, op1));
17059 else if (tmp == 1)
17060 {
17061 if (TARGET_CMOVE)
17062 op1 = force_reg (op_mode, op1);
17063 }
17064 else
17065 op1 = force_reg (op_mode, op1);
17066 }
17067 }
17068
17069 /* Try to rearrange the comparison to make it cheaper. */
17070 if (ix86_fp_comparison_cost (code)
17071 > ix86_fp_comparison_cost (swap_condition (code))
17072 && (REG_P (op1) || can_create_pseudo_p ()))
17073 {
17074 rtx tmp;
17075 tmp = op0, op0 = op1, op1 = tmp;
17076 code = swap_condition (code);
17077 if (!REG_P (op0))
17078 op0 = force_reg (op_mode, op0);
17079 }
17080
17081 *pop0 = op0;
17082 *pop1 = op1;
17083 return code;
17084 }
17085
17086 /* Convert comparison codes we use to represent FP comparison to integer
17087 code that will result in proper branch. Return UNKNOWN if no such code
17088 is available. */
17089
17090 enum rtx_code
17091 ix86_fp_compare_code_to_integer (enum rtx_code code)
17092 {
17093 switch (code)
17094 {
17095 case GT:
17096 return GTU;
17097 case GE:
17098 return GEU;
17099 case ORDERED:
17100 case UNORDERED:
17101 return code;
17102 break;
17103 case UNEQ:
17104 return EQ;
17105 break;
17106 case UNLT:
17107 return LTU;
17108 break;
17109 case UNLE:
17110 return LEU;
17111 break;
17112 case LTGT:
17113 return NE;
17114 break;
17115 default:
17116 return UNKNOWN;
17117 }
17118 }
17119
17120 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17121
17122 static rtx
17123 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17124 {
17125 enum machine_mode fpcmp_mode, intcmp_mode;
17126 rtx tmp, tmp2;
17127
17128 fpcmp_mode = ix86_fp_compare_mode (code);
17129 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17130
17131 /* Do fcomi/sahf based test when profitable. */
17132 switch (ix86_fp_comparison_strategy (code))
17133 {
17134 case IX86_FPCMP_COMI:
17135 intcmp_mode = fpcmp_mode;
17136 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17137 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17138 tmp);
17139 emit_insn (tmp);
17140 break;
17141
17142 case IX86_FPCMP_SAHF:
17143 intcmp_mode = fpcmp_mode;
17144 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17145 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17146 tmp);
17147
17148 if (!scratch)
17149 scratch = gen_reg_rtx (HImode);
17150 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17151 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17152 break;
17153
17154 case IX86_FPCMP_ARITH:
17155 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17156 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17157 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17158 if (!scratch)
17159 scratch = gen_reg_rtx (HImode);
17160 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17161
17162 /* In the unordered case, we have to check C2 for NaN's, which
17163 doesn't happen to work out to anything nice combination-wise.
17164 So do some bit twiddling on the value we've got in AH to come
17165 up with an appropriate set of condition codes. */
17166
17167 intcmp_mode = CCNOmode;
17168 switch (code)
17169 {
17170 case GT:
17171 case UNGT:
17172 if (code == GT || !TARGET_IEEE_FP)
17173 {
17174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17175 code = EQ;
17176 }
17177 else
17178 {
17179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17180 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17181 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17182 intcmp_mode = CCmode;
17183 code = GEU;
17184 }
17185 break;
17186 case LT:
17187 case UNLT:
17188 if (code == LT && TARGET_IEEE_FP)
17189 {
17190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17191 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17192 intcmp_mode = CCmode;
17193 code = EQ;
17194 }
17195 else
17196 {
17197 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17198 code = NE;
17199 }
17200 break;
17201 case GE:
17202 case UNGE:
17203 if (code == GE || !TARGET_IEEE_FP)
17204 {
17205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17206 code = EQ;
17207 }
17208 else
17209 {
17210 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17211 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17212 code = NE;
17213 }
17214 break;
17215 case LE:
17216 case UNLE:
17217 if (code == LE && TARGET_IEEE_FP)
17218 {
17219 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17220 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17221 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17222 intcmp_mode = CCmode;
17223 code = LTU;
17224 }
17225 else
17226 {
17227 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17228 code = NE;
17229 }
17230 break;
17231 case EQ:
17232 case UNEQ:
17233 if (code == EQ && TARGET_IEEE_FP)
17234 {
17235 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17236 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17237 intcmp_mode = CCmode;
17238 code = EQ;
17239 }
17240 else
17241 {
17242 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17243 code = NE;
17244 }
17245 break;
17246 case NE:
17247 case LTGT:
17248 if (code == NE && TARGET_IEEE_FP)
17249 {
17250 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17251 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17252 GEN_INT (0x40)));
17253 code = NE;
17254 }
17255 else
17256 {
17257 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17258 code = EQ;
17259 }
17260 break;
17261
17262 case UNORDERED:
17263 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17264 code = NE;
17265 break;
17266 case ORDERED:
17267 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17268 code = EQ;
17269 break;
17270
17271 default:
17272 gcc_unreachable ();
17273 }
17274 break;
17275
17276 default:
17277 gcc_unreachable();
17278 }
17279
17280 /* Return the test that should be put into the flags user, i.e.
17281 the bcc, scc, or cmov instruction. */
17282 return gen_rtx_fmt_ee (code, VOIDmode,
17283 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17284 const0_rtx);
17285 }
17286
17287 static rtx
17288 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17289 {
17290 rtx ret;
17291
17292 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17293 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17294
17295 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17296 {
17297 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17298 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17299 }
17300 else
17301 ret = ix86_expand_int_compare (code, op0, op1);
17302
17303 return ret;
17304 }
17305
17306 void
17307 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17308 {
17309 enum machine_mode mode = GET_MODE (op0);
17310 rtx tmp;
17311
17312 switch (mode)
17313 {
17314 case SFmode:
17315 case DFmode:
17316 case XFmode:
17317 case QImode:
17318 case HImode:
17319 case SImode:
17320 simple:
17321 tmp = ix86_expand_compare (code, op0, op1);
17322 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17323 gen_rtx_LABEL_REF (VOIDmode, label),
17324 pc_rtx);
17325 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17326 return;
17327
17328 case DImode:
17329 if (TARGET_64BIT)
17330 goto simple;
17331 case TImode:
17332 /* Expand DImode branch into multiple compare+branch. */
17333 {
17334 rtx lo[2], hi[2], label2;
17335 enum rtx_code code1, code2, code3;
17336 enum machine_mode submode;
17337
17338 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17339 {
17340 tmp = op0, op0 = op1, op1 = tmp;
17341 code = swap_condition (code);
17342 }
17343
17344 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17345 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17346
17347 submode = mode == DImode ? SImode : DImode;
17348
17349 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17350 avoid two branches. This costs one extra insn, so disable when
17351 optimizing for size. */
17352
17353 if ((code == EQ || code == NE)
17354 && (!optimize_insn_for_size_p ()
17355 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17356 {
17357 rtx xor0, xor1;
17358
17359 xor1 = hi[0];
17360 if (hi[1] != const0_rtx)
17361 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17362 NULL_RTX, 0, OPTAB_WIDEN);
17363
17364 xor0 = lo[0];
17365 if (lo[1] != const0_rtx)
17366 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17367 NULL_RTX, 0, OPTAB_WIDEN);
17368
17369 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17370 NULL_RTX, 0, OPTAB_WIDEN);
17371
17372 ix86_expand_branch (code, tmp, const0_rtx, label);
17373 return;
17374 }
17375
17376 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17377 op1 is a constant and the low word is zero, then we can just
17378 examine the high word. Similarly for low word -1 and
17379 less-or-equal-than or greater-than. */
17380
17381 if (CONST_INT_P (hi[1]))
17382 switch (code)
17383 {
17384 case LT: case LTU: case GE: case GEU:
17385 if (lo[1] == const0_rtx)
17386 {
17387 ix86_expand_branch (code, hi[0], hi[1], label);
17388 return;
17389 }
17390 break;
17391 case LE: case LEU: case GT: case GTU:
17392 if (lo[1] == constm1_rtx)
17393 {
17394 ix86_expand_branch (code, hi[0], hi[1], label);
17395 return;
17396 }
17397 break;
17398 default:
17399 break;
17400 }
17401
17402 /* Otherwise, we need two or three jumps. */
17403
17404 label2 = gen_label_rtx ();
17405
17406 code1 = code;
17407 code2 = swap_condition (code);
17408 code3 = unsigned_condition (code);
17409
17410 switch (code)
17411 {
17412 case LT: case GT: case LTU: case GTU:
17413 break;
17414
17415 case LE: code1 = LT; code2 = GT; break;
17416 case GE: code1 = GT; code2 = LT; break;
17417 case LEU: code1 = LTU; code2 = GTU; break;
17418 case GEU: code1 = GTU; code2 = LTU; break;
17419
17420 case EQ: code1 = UNKNOWN; code2 = NE; break;
17421 case NE: code2 = UNKNOWN; break;
17422
17423 default:
17424 gcc_unreachable ();
17425 }
17426
17427 /*
17428 * a < b =>
17429 * if (hi(a) < hi(b)) goto true;
17430 * if (hi(a) > hi(b)) goto false;
17431 * if (lo(a) < lo(b)) goto true;
17432 * false:
17433 */
17434
17435 if (code1 != UNKNOWN)
17436 ix86_expand_branch (code1, hi[0], hi[1], label);
17437 if (code2 != UNKNOWN)
17438 ix86_expand_branch (code2, hi[0], hi[1], label2);
17439
17440 ix86_expand_branch (code3, lo[0], lo[1], label);
17441
17442 if (code2 != UNKNOWN)
17443 emit_label (label2);
17444 return;
17445 }
17446
17447 default:
17448 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17449 goto simple;
17450 }
17451 }
17452
17453 /* Split branch based on floating point condition. */
17454 void
17455 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17456 rtx target1, rtx target2, rtx tmp, rtx pushed)
17457 {
17458 rtx condition;
17459 rtx i;
17460
17461 if (target2 != pc_rtx)
17462 {
17463 rtx tmp = target2;
17464 code = reverse_condition_maybe_unordered (code);
17465 target2 = target1;
17466 target1 = tmp;
17467 }
17468
17469 condition = ix86_expand_fp_compare (code, op1, op2,
17470 tmp);
17471
17472 /* Remove pushed operand from stack. */
17473 if (pushed)
17474 ix86_free_from_memory (GET_MODE (pushed));
17475
17476 i = emit_jump_insn (gen_rtx_SET
17477 (VOIDmode, pc_rtx,
17478 gen_rtx_IF_THEN_ELSE (VOIDmode,
17479 condition, target1, target2)));
17480 if (split_branch_probability >= 0)
17481 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17482 }
17483
17484 void
17485 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17486 {
17487 rtx ret;
17488
17489 gcc_assert (GET_MODE (dest) == QImode);
17490
17491 ret = ix86_expand_compare (code, op0, op1);
17492 PUT_MODE (ret, QImode);
17493 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17494 }
17495
17496 /* Expand comparison setting or clearing carry flag. Return true when
17497 successful and set pop for the operation. */
17498 static bool
17499 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17500 {
17501 enum machine_mode mode =
17502 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17503
17504 /* Do not handle double-mode compares that go through special path. */
17505 if (mode == (TARGET_64BIT ? TImode : DImode))
17506 return false;
17507
17508 if (SCALAR_FLOAT_MODE_P (mode))
17509 {
17510 rtx compare_op, compare_seq;
17511
17512 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17513
17514 /* Shortcut: following common codes never translate
17515 into carry flag compares. */
17516 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17517 || code == ORDERED || code == UNORDERED)
17518 return false;
17519
17520 /* These comparisons require zero flag; swap operands so they won't. */
17521 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17522 && !TARGET_IEEE_FP)
17523 {
17524 rtx tmp = op0;
17525 op0 = op1;
17526 op1 = tmp;
17527 code = swap_condition (code);
17528 }
17529
17530 /* Try to expand the comparison and verify that we end up with
17531 carry flag based comparison. This fails to be true only when
17532 we decide to expand comparison using arithmetic that is not
17533 too common scenario. */
17534 start_sequence ();
17535 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17536 compare_seq = get_insns ();
17537 end_sequence ();
17538
17539 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17540 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17541 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17542 else
17543 code = GET_CODE (compare_op);
17544
17545 if (code != LTU && code != GEU)
17546 return false;
17547
17548 emit_insn (compare_seq);
17549 *pop = compare_op;
17550 return true;
17551 }
17552
17553 if (!INTEGRAL_MODE_P (mode))
17554 return false;
17555
17556 switch (code)
17557 {
17558 case LTU:
17559 case GEU:
17560 break;
17561
17562 /* Convert a==0 into (unsigned)a<1. */
17563 case EQ:
17564 case NE:
17565 if (op1 != const0_rtx)
17566 return false;
17567 op1 = const1_rtx;
17568 code = (code == EQ ? LTU : GEU);
17569 break;
17570
17571 /* Convert a>b into b<a or a>=b-1. */
17572 case GTU:
17573 case LEU:
17574 if (CONST_INT_P (op1))
17575 {
17576 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17577 /* Bail out on overflow. We still can swap operands but that
17578 would force loading of the constant into register. */
17579 if (op1 == const0_rtx
17580 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17581 return false;
17582 code = (code == GTU ? GEU : LTU);
17583 }
17584 else
17585 {
17586 rtx tmp = op1;
17587 op1 = op0;
17588 op0 = tmp;
17589 code = (code == GTU ? LTU : GEU);
17590 }
17591 break;
17592
17593 /* Convert a>=0 into (unsigned)a<0x80000000. */
17594 case LT:
17595 case GE:
17596 if (mode == DImode || op1 != const0_rtx)
17597 return false;
17598 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17599 code = (code == LT ? GEU : LTU);
17600 break;
17601 case LE:
17602 case GT:
17603 if (mode == DImode || op1 != constm1_rtx)
17604 return false;
17605 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17606 code = (code == LE ? GEU : LTU);
17607 break;
17608
17609 default:
17610 return false;
17611 }
17612 /* Swapping operands may cause constant to appear as first operand. */
17613 if (!nonimmediate_operand (op0, VOIDmode))
17614 {
17615 if (!can_create_pseudo_p ())
17616 return false;
17617 op0 = force_reg (mode, op0);
17618 }
17619 *pop = ix86_expand_compare (code, op0, op1);
17620 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17621 return true;
17622 }
17623
17624 bool
17625 ix86_expand_int_movcc (rtx operands[])
17626 {
17627 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17628 rtx compare_seq, compare_op;
17629 enum machine_mode mode = GET_MODE (operands[0]);
17630 bool sign_bit_compare_p = false;
17631 rtx op0 = XEXP (operands[1], 0);
17632 rtx op1 = XEXP (operands[1], 1);
17633
17634 start_sequence ();
17635 compare_op = ix86_expand_compare (code, op0, op1);
17636 compare_seq = get_insns ();
17637 end_sequence ();
17638
17639 compare_code = GET_CODE (compare_op);
17640
17641 if ((op1 == const0_rtx && (code == GE || code == LT))
17642 || (op1 == constm1_rtx && (code == GT || code == LE)))
17643 sign_bit_compare_p = true;
17644
17645 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17646 HImode insns, we'd be swallowed in word prefix ops. */
17647
17648 if ((mode != HImode || TARGET_FAST_PREFIX)
17649 && (mode != (TARGET_64BIT ? TImode : DImode))
17650 && CONST_INT_P (operands[2])
17651 && CONST_INT_P (operands[3]))
17652 {
17653 rtx out = operands[0];
17654 HOST_WIDE_INT ct = INTVAL (operands[2]);
17655 HOST_WIDE_INT cf = INTVAL (operands[3]);
17656 HOST_WIDE_INT diff;
17657
17658 diff = ct - cf;
17659 /* Sign bit compares are better done using shifts than we do by using
17660 sbb. */
17661 if (sign_bit_compare_p
17662 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17663 {
17664 /* Detect overlap between destination and compare sources. */
17665 rtx tmp = out;
17666
17667 if (!sign_bit_compare_p)
17668 {
17669 rtx flags;
17670 bool fpcmp = false;
17671
17672 compare_code = GET_CODE (compare_op);
17673
17674 flags = XEXP (compare_op, 0);
17675
17676 if (GET_MODE (flags) == CCFPmode
17677 || GET_MODE (flags) == CCFPUmode)
17678 {
17679 fpcmp = true;
17680 compare_code
17681 = ix86_fp_compare_code_to_integer (compare_code);
17682 }
17683
17684 /* To simplify rest of code, restrict to the GEU case. */
17685 if (compare_code == LTU)
17686 {
17687 HOST_WIDE_INT tmp = ct;
17688 ct = cf;
17689 cf = tmp;
17690 compare_code = reverse_condition (compare_code);
17691 code = reverse_condition (code);
17692 }
17693 else
17694 {
17695 if (fpcmp)
17696 PUT_CODE (compare_op,
17697 reverse_condition_maybe_unordered
17698 (GET_CODE (compare_op)));
17699 else
17700 PUT_CODE (compare_op,
17701 reverse_condition (GET_CODE (compare_op)));
17702 }
17703 diff = ct - cf;
17704
17705 if (reg_overlap_mentioned_p (out, op0)
17706 || reg_overlap_mentioned_p (out, op1))
17707 tmp = gen_reg_rtx (mode);
17708
17709 if (mode == DImode)
17710 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17711 else
17712 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17713 flags, compare_op));
17714 }
17715 else
17716 {
17717 if (code == GT || code == GE)
17718 code = reverse_condition (code);
17719 else
17720 {
17721 HOST_WIDE_INT tmp = ct;
17722 ct = cf;
17723 cf = tmp;
17724 diff = ct - cf;
17725 }
17726 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17727 }
17728
17729 if (diff == 1)
17730 {
17731 /*
17732 * cmpl op0,op1
17733 * sbbl dest,dest
17734 * [addl dest, ct]
17735 *
17736 * Size 5 - 8.
17737 */
17738 if (ct)
17739 tmp = expand_simple_binop (mode, PLUS,
17740 tmp, GEN_INT (ct),
17741 copy_rtx (tmp), 1, OPTAB_DIRECT);
17742 }
17743 else if (cf == -1)
17744 {
17745 /*
17746 * cmpl op0,op1
17747 * sbbl dest,dest
17748 * orl $ct, dest
17749 *
17750 * Size 8.
17751 */
17752 tmp = expand_simple_binop (mode, IOR,
17753 tmp, GEN_INT (ct),
17754 copy_rtx (tmp), 1, OPTAB_DIRECT);
17755 }
17756 else if (diff == -1 && ct)
17757 {
17758 /*
17759 * cmpl op0,op1
17760 * sbbl dest,dest
17761 * notl dest
17762 * [addl dest, cf]
17763 *
17764 * Size 8 - 11.
17765 */
17766 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17767 if (cf)
17768 tmp = expand_simple_binop (mode, PLUS,
17769 copy_rtx (tmp), GEN_INT (cf),
17770 copy_rtx (tmp), 1, OPTAB_DIRECT);
17771 }
17772 else
17773 {
17774 /*
17775 * cmpl op0,op1
17776 * sbbl dest,dest
17777 * [notl dest]
17778 * andl cf - ct, dest
17779 * [addl dest, ct]
17780 *
17781 * Size 8 - 11.
17782 */
17783
17784 if (cf == 0)
17785 {
17786 cf = ct;
17787 ct = 0;
17788 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17789 }
17790
17791 tmp = expand_simple_binop (mode, AND,
17792 copy_rtx (tmp),
17793 gen_int_mode (cf - ct, mode),
17794 copy_rtx (tmp), 1, OPTAB_DIRECT);
17795 if (ct)
17796 tmp = expand_simple_binop (mode, PLUS,
17797 copy_rtx (tmp), GEN_INT (ct),
17798 copy_rtx (tmp), 1, OPTAB_DIRECT);
17799 }
17800
17801 if (!rtx_equal_p (tmp, out))
17802 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17803
17804 return true;
17805 }
17806
17807 if (diff < 0)
17808 {
17809 enum machine_mode cmp_mode = GET_MODE (op0);
17810
17811 HOST_WIDE_INT tmp;
17812 tmp = ct, ct = cf, cf = tmp;
17813 diff = -diff;
17814
17815 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17816 {
17817 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17818
17819 /* We may be reversing unordered compare to normal compare, that
17820 is not valid in general (we may convert non-trapping condition
17821 to trapping one), however on i386 we currently emit all
17822 comparisons unordered. */
17823 compare_code = reverse_condition_maybe_unordered (compare_code);
17824 code = reverse_condition_maybe_unordered (code);
17825 }
17826 else
17827 {
17828 compare_code = reverse_condition (compare_code);
17829 code = reverse_condition (code);
17830 }
17831 }
17832
17833 compare_code = UNKNOWN;
17834 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17835 && CONST_INT_P (op1))
17836 {
17837 if (op1 == const0_rtx
17838 && (code == LT || code == GE))
17839 compare_code = code;
17840 else if (op1 == constm1_rtx)
17841 {
17842 if (code == LE)
17843 compare_code = LT;
17844 else if (code == GT)
17845 compare_code = GE;
17846 }
17847 }
17848
17849 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17850 if (compare_code != UNKNOWN
17851 && GET_MODE (op0) == GET_MODE (out)
17852 && (cf == -1 || ct == -1))
17853 {
17854 /* If lea code below could be used, only optimize
17855 if it results in a 2 insn sequence. */
17856
17857 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17858 || diff == 3 || diff == 5 || diff == 9)
17859 || (compare_code == LT && ct == -1)
17860 || (compare_code == GE && cf == -1))
17861 {
17862 /*
17863 * notl op1 (if necessary)
17864 * sarl $31, op1
17865 * orl cf, op1
17866 */
17867 if (ct != -1)
17868 {
17869 cf = ct;
17870 ct = -1;
17871 code = reverse_condition (code);
17872 }
17873
17874 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17875
17876 out = expand_simple_binop (mode, IOR,
17877 out, GEN_INT (cf),
17878 out, 1, OPTAB_DIRECT);
17879 if (out != operands[0])
17880 emit_move_insn (operands[0], out);
17881
17882 return true;
17883 }
17884 }
17885
17886
17887 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17888 || diff == 3 || diff == 5 || diff == 9)
17889 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17890 && (mode != DImode
17891 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17892 {
17893 /*
17894 * xorl dest,dest
17895 * cmpl op1,op2
17896 * setcc dest
17897 * lea cf(dest*(ct-cf)),dest
17898 *
17899 * Size 14.
17900 *
17901 * This also catches the degenerate setcc-only case.
17902 */
17903
17904 rtx tmp;
17905 int nops;
17906
17907 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17908
17909 nops = 0;
17910 /* On x86_64 the lea instruction operates on Pmode, so we need
17911 to get arithmetics done in proper mode to match. */
17912 if (diff == 1)
17913 tmp = copy_rtx (out);
17914 else
17915 {
17916 rtx out1;
17917 out1 = copy_rtx (out);
17918 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17919 nops++;
17920 if (diff & 1)
17921 {
17922 tmp = gen_rtx_PLUS (mode, tmp, out1);
17923 nops++;
17924 }
17925 }
17926 if (cf != 0)
17927 {
17928 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17929 nops++;
17930 }
17931 if (!rtx_equal_p (tmp, out))
17932 {
17933 if (nops == 1)
17934 out = force_operand (tmp, copy_rtx (out));
17935 else
17936 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17937 }
17938 if (!rtx_equal_p (out, operands[0]))
17939 emit_move_insn (operands[0], copy_rtx (out));
17940
17941 return true;
17942 }
17943
17944 /*
17945 * General case: Jumpful:
17946 * xorl dest,dest cmpl op1, op2
17947 * cmpl op1, op2 movl ct, dest
17948 * setcc dest jcc 1f
17949 * decl dest movl cf, dest
17950 * andl (cf-ct),dest 1:
17951 * addl ct,dest
17952 *
17953 * Size 20. Size 14.
17954 *
17955 * This is reasonably steep, but branch mispredict costs are
17956 * high on modern cpus, so consider failing only if optimizing
17957 * for space.
17958 */
17959
17960 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17961 && BRANCH_COST (optimize_insn_for_speed_p (),
17962 false) >= 2)
17963 {
17964 if (cf == 0)
17965 {
17966 enum machine_mode cmp_mode = GET_MODE (op0);
17967
17968 cf = ct;
17969 ct = 0;
17970
17971 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17972 {
17973 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17974
17975 /* We may be reversing unordered compare to normal compare,
17976 that is not valid in general (we may convert non-trapping
17977 condition to trapping one), however on i386 we currently
17978 emit all comparisons unordered. */
17979 code = reverse_condition_maybe_unordered (code);
17980 }
17981 else
17982 {
17983 code = reverse_condition (code);
17984 if (compare_code != UNKNOWN)
17985 compare_code = reverse_condition (compare_code);
17986 }
17987 }
17988
17989 if (compare_code != UNKNOWN)
17990 {
17991 /* notl op1 (if needed)
17992 sarl $31, op1
17993 andl (cf-ct), op1
17994 addl ct, op1
17995
17996 For x < 0 (resp. x <= -1) there will be no notl,
17997 so if possible swap the constants to get rid of the
17998 complement.
17999 True/false will be -1/0 while code below (store flag
18000 followed by decrement) is 0/-1, so the constants need
18001 to be exchanged once more. */
18002
18003 if (compare_code == GE || !cf)
18004 {
18005 code = reverse_condition (code);
18006 compare_code = LT;
18007 }
18008 else
18009 {
18010 HOST_WIDE_INT tmp = cf;
18011 cf = ct;
18012 ct = tmp;
18013 }
18014
18015 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18016 }
18017 else
18018 {
18019 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18020
18021 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18022 constm1_rtx,
18023 copy_rtx (out), 1, OPTAB_DIRECT);
18024 }
18025
18026 out = expand_simple_binop (mode, AND, copy_rtx (out),
18027 gen_int_mode (cf - ct, mode),
18028 copy_rtx (out), 1, OPTAB_DIRECT);
18029 if (ct)
18030 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18031 copy_rtx (out), 1, OPTAB_DIRECT);
18032 if (!rtx_equal_p (out, operands[0]))
18033 emit_move_insn (operands[0], copy_rtx (out));
18034
18035 return true;
18036 }
18037 }
18038
18039 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18040 {
18041 /* Try a few things more with specific constants and a variable. */
18042
18043 optab op;
18044 rtx var, orig_out, out, tmp;
18045
18046 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18047 return false;
18048
18049 /* If one of the two operands is an interesting constant, load a
18050 constant with the above and mask it in with a logical operation. */
18051
18052 if (CONST_INT_P (operands[2]))
18053 {
18054 var = operands[3];
18055 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18056 operands[3] = constm1_rtx, op = and_optab;
18057 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18058 operands[3] = const0_rtx, op = ior_optab;
18059 else
18060 return false;
18061 }
18062 else if (CONST_INT_P (operands[3]))
18063 {
18064 var = operands[2];
18065 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18066 operands[2] = constm1_rtx, op = and_optab;
18067 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18068 operands[2] = const0_rtx, op = ior_optab;
18069 else
18070 return false;
18071 }
18072 else
18073 return false;
18074
18075 orig_out = operands[0];
18076 tmp = gen_reg_rtx (mode);
18077 operands[0] = tmp;
18078
18079 /* Recurse to get the constant loaded. */
18080 if (ix86_expand_int_movcc (operands) == 0)
18081 return false;
18082
18083 /* Mask in the interesting variable. */
18084 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18085 OPTAB_WIDEN);
18086 if (!rtx_equal_p (out, orig_out))
18087 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18088
18089 return true;
18090 }
18091
18092 /*
18093 * For comparison with above,
18094 *
18095 * movl cf,dest
18096 * movl ct,tmp
18097 * cmpl op1,op2
18098 * cmovcc tmp,dest
18099 *
18100 * Size 15.
18101 */
18102
18103 if (! nonimmediate_operand (operands[2], mode))
18104 operands[2] = force_reg (mode, operands[2]);
18105 if (! nonimmediate_operand (operands[3], mode))
18106 operands[3] = force_reg (mode, operands[3]);
18107
18108 if (! register_operand (operands[2], VOIDmode)
18109 && (mode == QImode
18110 || ! register_operand (operands[3], VOIDmode)))
18111 operands[2] = force_reg (mode, operands[2]);
18112
18113 if (mode == QImode
18114 && ! register_operand (operands[3], VOIDmode))
18115 operands[3] = force_reg (mode, operands[3]);
18116
18117 emit_insn (compare_seq);
18118 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18119 gen_rtx_IF_THEN_ELSE (mode,
18120 compare_op, operands[2],
18121 operands[3])));
18122 return true;
18123 }
18124
18125 /* Swap, force into registers, or otherwise massage the two operands
18126 to an sse comparison with a mask result. Thus we differ a bit from
18127 ix86_prepare_fp_compare_args which expects to produce a flags result.
18128
18129 The DEST operand exists to help determine whether to commute commutative
18130 operators. The POP0/POP1 operands are updated in place. The new
18131 comparison code is returned, or UNKNOWN if not implementable. */
18132
18133 static enum rtx_code
18134 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18135 rtx *pop0, rtx *pop1)
18136 {
18137 rtx tmp;
18138
18139 switch (code)
18140 {
18141 case LTGT:
18142 case UNEQ:
18143 /* We have no LTGT as an operator. We could implement it with
18144 NE & ORDERED, but this requires an extra temporary. It's
18145 not clear that it's worth it. */
18146 return UNKNOWN;
18147
18148 case LT:
18149 case LE:
18150 case UNGT:
18151 case UNGE:
18152 /* These are supported directly. */
18153 break;
18154
18155 case EQ:
18156 case NE:
18157 case UNORDERED:
18158 case ORDERED:
18159 /* For commutative operators, try to canonicalize the destination
18160 operand to be first in the comparison - this helps reload to
18161 avoid extra moves. */
18162 if (!dest || !rtx_equal_p (dest, *pop1))
18163 break;
18164 /* FALLTHRU */
18165
18166 case GE:
18167 case GT:
18168 case UNLE:
18169 case UNLT:
18170 /* These are not supported directly. Swap the comparison operands
18171 to transform into something that is supported. */
18172 tmp = *pop0;
18173 *pop0 = *pop1;
18174 *pop1 = tmp;
18175 code = swap_condition (code);
18176 break;
18177
18178 default:
18179 gcc_unreachable ();
18180 }
18181
18182 return code;
18183 }
18184
18185 /* Detect conditional moves that exactly match min/max operational
18186 semantics. Note that this is IEEE safe, as long as we don't
18187 interchange the operands.
18188
18189 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18190 and TRUE if the operation is successful and instructions are emitted. */
18191
18192 static bool
18193 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18194 rtx cmp_op1, rtx if_true, rtx if_false)
18195 {
18196 enum machine_mode mode;
18197 bool is_min;
18198 rtx tmp;
18199
18200 if (code == LT)
18201 ;
18202 else if (code == UNGE)
18203 {
18204 tmp = if_true;
18205 if_true = if_false;
18206 if_false = tmp;
18207 }
18208 else
18209 return false;
18210
18211 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18212 is_min = true;
18213 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18214 is_min = false;
18215 else
18216 return false;
18217
18218 mode = GET_MODE (dest);
18219
18220 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18221 but MODE may be a vector mode and thus not appropriate. */
18222 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18223 {
18224 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18225 rtvec v;
18226
18227 if_true = force_reg (mode, if_true);
18228 v = gen_rtvec (2, if_true, if_false);
18229 tmp = gen_rtx_UNSPEC (mode, v, u);
18230 }
18231 else
18232 {
18233 code = is_min ? SMIN : SMAX;
18234 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18235 }
18236
18237 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18238 return true;
18239 }
18240
18241 /* Expand an sse vector comparison. Return the register with the result. */
18242
18243 static rtx
18244 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18245 rtx op_true, rtx op_false)
18246 {
18247 enum machine_mode mode = GET_MODE (dest);
18248 rtx x;
18249
18250 cmp_op0 = force_reg (mode, cmp_op0);
18251 if (!nonimmediate_operand (cmp_op1, mode))
18252 cmp_op1 = force_reg (mode, cmp_op1);
18253
18254 if (optimize
18255 || reg_overlap_mentioned_p (dest, op_true)
18256 || reg_overlap_mentioned_p (dest, op_false))
18257 dest = gen_reg_rtx (mode);
18258
18259 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18260 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18261
18262 return dest;
18263 }
18264
18265 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18266 operations. This is used for both scalar and vector conditional moves. */
18267
18268 static void
18269 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18270 {
18271 enum machine_mode mode = GET_MODE (dest);
18272 rtx t2, t3, x;
18273
18274 if (op_false == CONST0_RTX (mode))
18275 {
18276 op_true = force_reg (mode, op_true);
18277 x = gen_rtx_AND (mode, cmp, op_true);
18278 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18279 }
18280 else if (op_true == CONST0_RTX (mode))
18281 {
18282 op_false = force_reg (mode, op_false);
18283 x = gen_rtx_NOT (mode, cmp);
18284 x = gen_rtx_AND (mode, x, op_false);
18285 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18286 }
18287 else if (TARGET_XOP)
18288 {
18289 rtx pcmov = gen_rtx_SET (mode, dest,
18290 gen_rtx_IF_THEN_ELSE (mode, cmp,
18291 op_true,
18292 op_false));
18293 emit_insn (pcmov);
18294 }
18295 else
18296 {
18297 op_true = force_reg (mode, op_true);
18298 op_false = force_reg (mode, op_false);
18299
18300 t2 = gen_reg_rtx (mode);
18301 if (optimize)
18302 t3 = gen_reg_rtx (mode);
18303 else
18304 t3 = dest;
18305
18306 x = gen_rtx_AND (mode, op_true, cmp);
18307 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18308
18309 x = gen_rtx_NOT (mode, cmp);
18310 x = gen_rtx_AND (mode, x, op_false);
18311 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18312
18313 x = gen_rtx_IOR (mode, t3, t2);
18314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18315 }
18316 }
18317
18318 /* Expand a floating-point conditional move. Return true if successful. */
18319
18320 bool
18321 ix86_expand_fp_movcc (rtx operands[])
18322 {
18323 enum machine_mode mode = GET_MODE (operands[0]);
18324 enum rtx_code code = GET_CODE (operands[1]);
18325 rtx tmp, compare_op;
18326 rtx op0 = XEXP (operands[1], 0);
18327 rtx op1 = XEXP (operands[1], 1);
18328
18329 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18330 {
18331 enum machine_mode cmode;
18332
18333 /* Since we've no cmove for sse registers, don't force bad register
18334 allocation just to gain access to it. Deny movcc when the
18335 comparison mode doesn't match the move mode. */
18336 cmode = GET_MODE (op0);
18337 if (cmode == VOIDmode)
18338 cmode = GET_MODE (op1);
18339 if (cmode != mode)
18340 return false;
18341
18342 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18343 if (code == UNKNOWN)
18344 return false;
18345
18346 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18347 operands[2], operands[3]))
18348 return true;
18349
18350 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18351 operands[2], operands[3]);
18352 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18353 return true;
18354 }
18355
18356 /* The floating point conditional move instructions don't directly
18357 support conditions resulting from a signed integer comparison. */
18358
18359 compare_op = ix86_expand_compare (code, op0, op1);
18360 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18361 {
18362 tmp = gen_reg_rtx (QImode);
18363 ix86_expand_setcc (tmp, code, op0, op1);
18364
18365 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18366 }
18367
18368 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18369 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18370 operands[2], operands[3])));
18371
18372 return true;
18373 }
18374
18375 /* Expand a floating-point vector conditional move; a vcond operation
18376 rather than a movcc operation. */
18377
18378 bool
18379 ix86_expand_fp_vcond (rtx operands[])
18380 {
18381 enum rtx_code code = GET_CODE (operands[3]);
18382 rtx cmp;
18383
18384 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18385 &operands[4], &operands[5]);
18386 if (code == UNKNOWN)
18387 return false;
18388
18389 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18390 operands[5], operands[1], operands[2]))
18391 return true;
18392
18393 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18394 operands[1], operands[2]);
18395 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18396 return true;
18397 }
18398
18399 /* Expand a signed/unsigned integral vector conditional move. */
18400
18401 bool
18402 ix86_expand_int_vcond (rtx operands[])
18403 {
18404 enum machine_mode mode = GET_MODE (operands[0]);
18405 enum rtx_code code = GET_CODE (operands[3]);
18406 bool negate = false;
18407 rtx x, cop0, cop1;
18408
18409 cop0 = operands[4];
18410 cop1 = operands[5];
18411
18412 /* XOP supports all of the comparisons on all vector int types. */
18413 if (!TARGET_XOP)
18414 {
18415 /* Canonicalize the comparison to EQ, GT, GTU. */
18416 switch (code)
18417 {
18418 case EQ:
18419 case GT:
18420 case GTU:
18421 break;
18422
18423 case NE:
18424 case LE:
18425 case LEU:
18426 code = reverse_condition (code);
18427 negate = true;
18428 break;
18429
18430 case GE:
18431 case GEU:
18432 code = reverse_condition (code);
18433 negate = true;
18434 /* FALLTHRU */
18435
18436 case LT:
18437 case LTU:
18438 code = swap_condition (code);
18439 x = cop0, cop0 = cop1, cop1 = x;
18440 break;
18441
18442 default:
18443 gcc_unreachable ();
18444 }
18445
18446 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18447 if (mode == V2DImode)
18448 {
18449 switch (code)
18450 {
18451 case EQ:
18452 /* SSE4.1 supports EQ. */
18453 if (!TARGET_SSE4_1)
18454 return false;
18455 break;
18456
18457 case GT:
18458 case GTU:
18459 /* SSE4.2 supports GT/GTU. */
18460 if (!TARGET_SSE4_2)
18461 return false;
18462 break;
18463
18464 default:
18465 gcc_unreachable ();
18466 }
18467 }
18468
18469 /* Unsigned parallel compare is not supported by the hardware.
18470 Play some tricks to turn this into a signed comparison
18471 against 0. */
18472 if (code == GTU)
18473 {
18474 cop0 = force_reg (mode, cop0);
18475
18476 switch (mode)
18477 {
18478 case V4SImode:
18479 case V2DImode:
18480 {
18481 rtx t1, t2, mask;
18482 rtx (*gen_sub3) (rtx, rtx, rtx);
18483
18484 /* Subtract (-(INT MAX) - 1) from both operands to make
18485 them signed. */
18486 mask = ix86_build_signbit_mask (mode, true, false);
18487 gen_sub3 = (mode == V4SImode
18488 ? gen_subv4si3 : gen_subv2di3);
18489 t1 = gen_reg_rtx (mode);
18490 emit_insn (gen_sub3 (t1, cop0, mask));
18491
18492 t2 = gen_reg_rtx (mode);
18493 emit_insn (gen_sub3 (t2, cop1, mask));
18494
18495 cop0 = t1;
18496 cop1 = t2;
18497 code = GT;
18498 }
18499 break;
18500
18501 case V16QImode:
18502 case V8HImode:
18503 /* Perform a parallel unsigned saturating subtraction. */
18504 x = gen_reg_rtx (mode);
18505 emit_insn (gen_rtx_SET (VOIDmode, x,
18506 gen_rtx_US_MINUS (mode, cop0, cop1)));
18507
18508 cop0 = x;
18509 cop1 = CONST0_RTX (mode);
18510 code = EQ;
18511 negate = !negate;
18512 break;
18513
18514 default:
18515 gcc_unreachable ();
18516 }
18517 }
18518 }
18519
18520 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18521 operands[1+negate], operands[2-negate]);
18522
18523 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18524 operands[2-negate]);
18525 return true;
18526 }
18527
18528 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18529 true if we should do zero extension, else sign extension. HIGH_P is
18530 true if we want the N/2 high elements, else the low elements. */
18531
18532 void
18533 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18534 {
18535 enum machine_mode imode = GET_MODE (operands[1]);
18536 rtx tmp, dest;
18537
18538 if (TARGET_SSE4_1)
18539 {
18540 rtx (*unpack)(rtx, rtx);
18541
18542 switch (imode)
18543 {
18544 case V16QImode:
18545 if (unsigned_p)
18546 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18547 else
18548 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18549 break;
18550 case V8HImode:
18551 if (unsigned_p)
18552 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18553 else
18554 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18555 break;
18556 case V4SImode:
18557 if (unsigned_p)
18558 unpack = gen_sse4_1_zero_extendv2siv2di2;
18559 else
18560 unpack = gen_sse4_1_sign_extendv2siv2di2;
18561 break;
18562 default:
18563 gcc_unreachable ();
18564 }
18565
18566 if (high_p)
18567 {
18568 /* Shift higher 8 bytes to lower 8 bytes. */
18569 tmp = gen_reg_rtx (imode);
18570 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18571 gen_lowpart (V1TImode, operands[1]),
18572 GEN_INT (64)));
18573 }
18574 else
18575 tmp = operands[1];
18576
18577 emit_insn (unpack (operands[0], tmp));
18578 }
18579 else
18580 {
18581 rtx (*unpack)(rtx, rtx, rtx);
18582
18583 switch (imode)
18584 {
18585 case V16QImode:
18586 if (high_p)
18587 unpack = gen_vec_interleave_highv16qi;
18588 else
18589 unpack = gen_vec_interleave_lowv16qi;
18590 break;
18591 case V8HImode:
18592 if (high_p)
18593 unpack = gen_vec_interleave_highv8hi;
18594 else
18595 unpack = gen_vec_interleave_lowv8hi;
18596 break;
18597 case V4SImode:
18598 if (high_p)
18599 unpack = gen_vec_interleave_highv4si;
18600 else
18601 unpack = gen_vec_interleave_lowv4si;
18602 break;
18603 default:
18604 gcc_unreachable ();
18605 }
18606
18607 dest = gen_lowpart (imode, operands[0]);
18608
18609 if (unsigned_p)
18610 tmp = force_reg (imode, CONST0_RTX (imode));
18611 else
18612 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18613 operands[1], pc_rtx, pc_rtx);
18614
18615 emit_insn (unpack (dest, operands[1], tmp));
18616 }
18617 }
18618
18619 /* Expand conditional increment or decrement using adb/sbb instructions.
18620 The default case using setcc followed by the conditional move can be
18621 done by generic code. */
18622 bool
18623 ix86_expand_int_addcc (rtx operands[])
18624 {
18625 enum rtx_code code = GET_CODE (operands[1]);
18626 rtx flags;
18627 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18628 rtx compare_op;
18629 rtx val = const0_rtx;
18630 bool fpcmp = false;
18631 enum machine_mode mode;
18632 rtx op0 = XEXP (operands[1], 0);
18633 rtx op1 = XEXP (operands[1], 1);
18634
18635 if (operands[3] != const1_rtx
18636 && operands[3] != constm1_rtx)
18637 return false;
18638 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18639 return false;
18640 code = GET_CODE (compare_op);
18641
18642 flags = XEXP (compare_op, 0);
18643
18644 if (GET_MODE (flags) == CCFPmode
18645 || GET_MODE (flags) == CCFPUmode)
18646 {
18647 fpcmp = true;
18648 code = ix86_fp_compare_code_to_integer (code);
18649 }
18650
18651 if (code != LTU)
18652 {
18653 val = constm1_rtx;
18654 if (fpcmp)
18655 PUT_CODE (compare_op,
18656 reverse_condition_maybe_unordered
18657 (GET_CODE (compare_op)));
18658 else
18659 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18660 }
18661
18662 mode = GET_MODE (operands[0]);
18663
18664 /* Construct either adc or sbb insn. */
18665 if ((code == LTU) == (operands[3] == constm1_rtx))
18666 {
18667 switch (mode)
18668 {
18669 case QImode:
18670 insn = gen_subqi3_carry;
18671 break;
18672 case HImode:
18673 insn = gen_subhi3_carry;
18674 break;
18675 case SImode:
18676 insn = gen_subsi3_carry;
18677 break;
18678 case DImode:
18679 insn = gen_subdi3_carry;
18680 break;
18681 default:
18682 gcc_unreachable ();
18683 }
18684 }
18685 else
18686 {
18687 switch (mode)
18688 {
18689 case QImode:
18690 insn = gen_addqi3_carry;
18691 break;
18692 case HImode:
18693 insn = gen_addhi3_carry;
18694 break;
18695 case SImode:
18696 insn = gen_addsi3_carry;
18697 break;
18698 case DImode:
18699 insn = gen_adddi3_carry;
18700 break;
18701 default:
18702 gcc_unreachable ();
18703 }
18704 }
18705 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18706
18707 return true;
18708 }
18709
18710
18711 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18712 but works for floating pointer parameters and nonoffsetable memories.
18713 For pushes, it returns just stack offsets; the values will be saved
18714 in the right order. Maximally three parts are generated. */
18715
18716 static int
18717 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18718 {
18719 int size;
18720
18721 if (!TARGET_64BIT)
18722 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18723 else
18724 size = (GET_MODE_SIZE (mode) + 4) / 8;
18725
18726 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18727 gcc_assert (size >= 2 && size <= 4);
18728
18729 /* Optimize constant pool reference to immediates. This is used by fp
18730 moves, that force all constants to memory to allow combining. */
18731 if (MEM_P (operand) && MEM_READONLY_P (operand))
18732 {
18733 rtx tmp = maybe_get_pool_constant (operand);
18734 if (tmp)
18735 operand = tmp;
18736 }
18737
18738 if (MEM_P (operand) && !offsettable_memref_p (operand))
18739 {
18740 /* The only non-offsetable memories we handle are pushes. */
18741 int ok = push_operand (operand, VOIDmode);
18742
18743 gcc_assert (ok);
18744
18745 operand = copy_rtx (operand);
18746 PUT_MODE (operand, Pmode);
18747 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18748 return size;
18749 }
18750
18751 if (GET_CODE (operand) == CONST_VECTOR)
18752 {
18753 enum machine_mode imode = int_mode_for_mode (mode);
18754 /* Caution: if we looked through a constant pool memory above,
18755 the operand may actually have a different mode now. That's
18756 ok, since we want to pun this all the way back to an integer. */
18757 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18758 gcc_assert (operand != NULL);
18759 mode = imode;
18760 }
18761
18762 if (!TARGET_64BIT)
18763 {
18764 if (mode == DImode)
18765 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18766 else
18767 {
18768 int i;
18769
18770 if (REG_P (operand))
18771 {
18772 gcc_assert (reload_completed);
18773 for (i = 0; i < size; i++)
18774 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18775 }
18776 else if (offsettable_memref_p (operand))
18777 {
18778 operand = adjust_address (operand, SImode, 0);
18779 parts[0] = operand;
18780 for (i = 1; i < size; i++)
18781 parts[i] = adjust_address (operand, SImode, 4 * i);
18782 }
18783 else if (GET_CODE (operand) == CONST_DOUBLE)
18784 {
18785 REAL_VALUE_TYPE r;
18786 long l[4];
18787
18788 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18789 switch (mode)
18790 {
18791 case TFmode:
18792 real_to_target (l, &r, mode);
18793 parts[3] = gen_int_mode (l[3], SImode);
18794 parts[2] = gen_int_mode (l[2], SImode);
18795 break;
18796 case XFmode:
18797 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18798 parts[2] = gen_int_mode (l[2], SImode);
18799 break;
18800 case DFmode:
18801 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18802 break;
18803 default:
18804 gcc_unreachable ();
18805 }
18806 parts[1] = gen_int_mode (l[1], SImode);
18807 parts[0] = gen_int_mode (l[0], SImode);
18808 }
18809 else
18810 gcc_unreachable ();
18811 }
18812 }
18813 else
18814 {
18815 if (mode == TImode)
18816 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18817 if (mode == XFmode || mode == TFmode)
18818 {
18819 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18820 if (REG_P (operand))
18821 {
18822 gcc_assert (reload_completed);
18823 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18824 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18825 }
18826 else if (offsettable_memref_p (operand))
18827 {
18828 operand = adjust_address (operand, DImode, 0);
18829 parts[0] = operand;
18830 parts[1] = adjust_address (operand, upper_mode, 8);
18831 }
18832 else if (GET_CODE (operand) == CONST_DOUBLE)
18833 {
18834 REAL_VALUE_TYPE r;
18835 long l[4];
18836
18837 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18838 real_to_target (l, &r, mode);
18839
18840 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18841 if (HOST_BITS_PER_WIDE_INT >= 64)
18842 parts[0]
18843 = gen_int_mode
18844 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18845 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18846 DImode);
18847 else
18848 parts[0] = immed_double_const (l[0], l[1], DImode);
18849
18850 if (upper_mode == SImode)
18851 parts[1] = gen_int_mode (l[2], SImode);
18852 else if (HOST_BITS_PER_WIDE_INT >= 64)
18853 parts[1]
18854 = gen_int_mode
18855 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18856 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18857 DImode);
18858 else
18859 parts[1] = immed_double_const (l[2], l[3], DImode);
18860 }
18861 else
18862 gcc_unreachable ();
18863 }
18864 }
18865
18866 return size;
18867 }
18868
18869 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18870 Return false when normal moves are needed; true when all required
18871 insns have been emitted. Operands 2-4 contain the input values
18872 int the correct order; operands 5-7 contain the output values. */
18873
18874 void
18875 ix86_split_long_move (rtx operands[])
18876 {
18877 rtx part[2][4];
18878 int nparts, i, j;
18879 int push = 0;
18880 int collisions = 0;
18881 enum machine_mode mode = GET_MODE (operands[0]);
18882 bool collisionparts[4];
18883
18884 /* The DFmode expanders may ask us to move double.
18885 For 64bit target this is single move. By hiding the fact
18886 here we simplify i386.md splitters. */
18887 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18888 {
18889 /* Optimize constant pool reference to immediates. This is used by
18890 fp moves, that force all constants to memory to allow combining. */
18891
18892 if (MEM_P (operands[1])
18893 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18894 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18895 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18896 if (push_operand (operands[0], VOIDmode))
18897 {
18898 operands[0] = copy_rtx (operands[0]);
18899 PUT_MODE (operands[0], Pmode);
18900 }
18901 else
18902 operands[0] = gen_lowpart (DImode, operands[0]);
18903 operands[1] = gen_lowpart (DImode, operands[1]);
18904 emit_move_insn (operands[0], operands[1]);
18905 return;
18906 }
18907
18908 /* The only non-offsettable memory we handle is push. */
18909 if (push_operand (operands[0], VOIDmode))
18910 push = 1;
18911 else
18912 gcc_assert (!MEM_P (operands[0])
18913 || offsettable_memref_p (operands[0]));
18914
18915 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18916 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18917
18918 /* When emitting push, take care for source operands on the stack. */
18919 if (push && MEM_P (operands[1])
18920 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18921 {
18922 rtx src_base = XEXP (part[1][nparts - 1], 0);
18923
18924 /* Compensate for the stack decrement by 4. */
18925 if (!TARGET_64BIT && nparts == 3
18926 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18927 src_base = plus_constant (src_base, 4);
18928
18929 /* src_base refers to the stack pointer and is
18930 automatically decreased by emitted push. */
18931 for (i = 0; i < nparts; i++)
18932 part[1][i] = change_address (part[1][i],
18933 GET_MODE (part[1][i]), src_base);
18934 }
18935
18936 /* We need to do copy in the right order in case an address register
18937 of the source overlaps the destination. */
18938 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18939 {
18940 rtx tmp;
18941
18942 for (i = 0; i < nparts; i++)
18943 {
18944 collisionparts[i]
18945 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18946 if (collisionparts[i])
18947 collisions++;
18948 }
18949
18950 /* Collision in the middle part can be handled by reordering. */
18951 if (collisions == 1 && nparts == 3 && collisionparts [1])
18952 {
18953 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18954 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18955 }
18956 else if (collisions == 1
18957 && nparts == 4
18958 && (collisionparts [1] || collisionparts [2]))
18959 {
18960 if (collisionparts [1])
18961 {
18962 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18963 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18964 }
18965 else
18966 {
18967 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18968 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18969 }
18970 }
18971
18972 /* If there are more collisions, we can't handle it by reordering.
18973 Do an lea to the last part and use only one colliding move. */
18974 else if (collisions > 1)
18975 {
18976 rtx base;
18977
18978 collisions = 1;
18979
18980 base = part[0][nparts - 1];
18981
18982 /* Handle the case when the last part isn't valid for lea.
18983 Happens in 64-bit mode storing the 12-byte XFmode. */
18984 if (GET_MODE (base) != Pmode)
18985 base = gen_rtx_REG (Pmode, REGNO (base));
18986
18987 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18988 part[1][0] = replace_equiv_address (part[1][0], base);
18989 for (i = 1; i < nparts; i++)
18990 {
18991 tmp = plus_constant (base, UNITS_PER_WORD * i);
18992 part[1][i] = replace_equiv_address (part[1][i], tmp);
18993 }
18994 }
18995 }
18996
18997 if (push)
18998 {
18999 if (!TARGET_64BIT)
19000 {
19001 if (nparts == 3)
19002 {
19003 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19004 emit_insn (gen_addsi3 (stack_pointer_rtx,
19005 stack_pointer_rtx, GEN_INT (-4)));
19006 emit_move_insn (part[0][2], part[1][2]);
19007 }
19008 else if (nparts == 4)
19009 {
19010 emit_move_insn (part[0][3], part[1][3]);
19011 emit_move_insn (part[0][2], part[1][2]);
19012 }
19013 }
19014 else
19015 {
19016 /* In 64bit mode we don't have 32bit push available. In case this is
19017 register, it is OK - we will just use larger counterpart. We also
19018 retype memory - these comes from attempt to avoid REX prefix on
19019 moving of second half of TFmode value. */
19020 if (GET_MODE (part[1][1]) == SImode)
19021 {
19022 switch (GET_CODE (part[1][1]))
19023 {
19024 case MEM:
19025 part[1][1] = adjust_address (part[1][1], DImode, 0);
19026 break;
19027
19028 case REG:
19029 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19030 break;
19031
19032 default:
19033 gcc_unreachable ();
19034 }
19035
19036 if (GET_MODE (part[1][0]) == SImode)
19037 part[1][0] = part[1][1];
19038 }
19039 }
19040 emit_move_insn (part[0][1], part[1][1]);
19041 emit_move_insn (part[0][0], part[1][0]);
19042 return;
19043 }
19044
19045 /* Choose correct order to not overwrite the source before it is copied. */
19046 if ((REG_P (part[0][0])
19047 && REG_P (part[1][1])
19048 && (REGNO (part[0][0]) == REGNO (part[1][1])
19049 || (nparts == 3
19050 && REGNO (part[0][0]) == REGNO (part[1][2]))
19051 || (nparts == 4
19052 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19053 || (collisions > 0
19054 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19055 {
19056 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19057 {
19058 operands[2 + i] = part[0][j];
19059 operands[6 + i] = part[1][j];
19060 }
19061 }
19062 else
19063 {
19064 for (i = 0; i < nparts; i++)
19065 {
19066 operands[2 + i] = part[0][i];
19067 operands[6 + i] = part[1][i];
19068 }
19069 }
19070
19071 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19072 if (optimize_insn_for_size_p ())
19073 {
19074 for (j = 0; j < nparts - 1; j++)
19075 if (CONST_INT_P (operands[6 + j])
19076 && operands[6 + j] != const0_rtx
19077 && REG_P (operands[2 + j]))
19078 for (i = j; i < nparts - 1; i++)
19079 if (CONST_INT_P (operands[7 + i])
19080 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19081 operands[7 + i] = operands[2 + j];
19082 }
19083
19084 for (i = 0; i < nparts; i++)
19085 emit_move_insn (operands[2 + i], operands[6 + i]);
19086
19087 return;
19088 }
19089
19090 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19091 left shift by a constant, either using a single shift or
19092 a sequence of add instructions. */
19093
19094 static void
19095 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19096 {
19097 rtx (*insn)(rtx, rtx, rtx);
19098
19099 if (count == 1
19100 || (count * ix86_cost->add <= ix86_cost->shift_const
19101 && !optimize_insn_for_size_p ()))
19102 {
19103 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19104 while (count-- > 0)
19105 emit_insn (insn (operand, operand, operand));
19106 }
19107 else
19108 {
19109 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19110 emit_insn (insn (operand, operand, GEN_INT (count)));
19111 }
19112 }
19113
19114 void
19115 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19116 {
19117 rtx (*gen_ashl3)(rtx, rtx, rtx);
19118 rtx (*gen_shld)(rtx, rtx, rtx);
19119 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19120
19121 rtx low[2], high[2];
19122 int count;
19123
19124 if (CONST_INT_P (operands[2]))
19125 {
19126 split_double_mode (mode, operands, 2, low, high);
19127 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19128
19129 if (count >= half_width)
19130 {
19131 emit_move_insn (high[0], low[1]);
19132 emit_move_insn (low[0], const0_rtx);
19133
19134 if (count > half_width)
19135 ix86_expand_ashl_const (high[0], count - half_width, mode);
19136 }
19137 else
19138 {
19139 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19140
19141 if (!rtx_equal_p (operands[0], operands[1]))
19142 emit_move_insn (operands[0], operands[1]);
19143
19144 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19145 ix86_expand_ashl_const (low[0], count, mode);
19146 }
19147 return;
19148 }
19149
19150 split_double_mode (mode, operands, 1, low, high);
19151
19152 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19153
19154 if (operands[1] == const1_rtx)
19155 {
19156 /* Assuming we've chosen a QImode capable registers, then 1 << N
19157 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19158 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19159 {
19160 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19161
19162 ix86_expand_clear (low[0]);
19163 ix86_expand_clear (high[0]);
19164 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19165
19166 d = gen_lowpart (QImode, low[0]);
19167 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19168 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19169 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19170
19171 d = gen_lowpart (QImode, high[0]);
19172 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19173 s = gen_rtx_NE (QImode, flags, const0_rtx);
19174 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19175 }
19176
19177 /* Otherwise, we can get the same results by manually performing
19178 a bit extract operation on bit 5/6, and then performing the two
19179 shifts. The two methods of getting 0/1 into low/high are exactly
19180 the same size. Avoiding the shift in the bit extract case helps
19181 pentium4 a bit; no one else seems to care much either way. */
19182 else
19183 {
19184 enum machine_mode half_mode;
19185 rtx (*gen_lshr3)(rtx, rtx, rtx);
19186 rtx (*gen_and3)(rtx, rtx, rtx);
19187 rtx (*gen_xor3)(rtx, rtx, rtx);
19188 HOST_WIDE_INT bits;
19189 rtx x;
19190
19191 if (mode == DImode)
19192 {
19193 half_mode = SImode;
19194 gen_lshr3 = gen_lshrsi3;
19195 gen_and3 = gen_andsi3;
19196 gen_xor3 = gen_xorsi3;
19197 bits = 5;
19198 }
19199 else
19200 {
19201 half_mode = DImode;
19202 gen_lshr3 = gen_lshrdi3;
19203 gen_and3 = gen_anddi3;
19204 gen_xor3 = gen_xordi3;
19205 bits = 6;
19206 }
19207
19208 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19209 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19210 else
19211 x = gen_lowpart (half_mode, operands[2]);
19212 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19213
19214 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19215 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19216 emit_move_insn (low[0], high[0]);
19217 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19218 }
19219
19220 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19221 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19222 return;
19223 }
19224
19225 if (operands[1] == constm1_rtx)
19226 {
19227 /* For -1 << N, we can avoid the shld instruction, because we
19228 know that we're shifting 0...31/63 ones into a -1. */
19229 emit_move_insn (low[0], constm1_rtx);
19230 if (optimize_insn_for_size_p ())
19231 emit_move_insn (high[0], low[0]);
19232 else
19233 emit_move_insn (high[0], constm1_rtx);
19234 }
19235 else
19236 {
19237 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19238
19239 if (!rtx_equal_p (operands[0], operands[1]))
19240 emit_move_insn (operands[0], operands[1]);
19241
19242 split_double_mode (mode, operands, 1, low, high);
19243 emit_insn (gen_shld (high[0], low[0], operands[2]));
19244 }
19245
19246 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19247
19248 if (TARGET_CMOVE && scratch)
19249 {
19250 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19251 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19252
19253 ix86_expand_clear (scratch);
19254 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19255 }
19256 else
19257 {
19258 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19259 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19260
19261 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19262 }
19263 }
19264
19265 void
19266 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19267 {
19268 rtx (*gen_ashr3)(rtx, rtx, rtx)
19269 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19270 rtx (*gen_shrd)(rtx, rtx, rtx);
19271 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19272
19273 rtx low[2], high[2];
19274 int count;
19275
19276 if (CONST_INT_P (operands[2]))
19277 {
19278 split_double_mode (mode, operands, 2, low, high);
19279 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19280
19281 if (count == GET_MODE_BITSIZE (mode) - 1)
19282 {
19283 emit_move_insn (high[0], high[1]);
19284 emit_insn (gen_ashr3 (high[0], high[0],
19285 GEN_INT (half_width - 1)));
19286 emit_move_insn (low[0], high[0]);
19287
19288 }
19289 else if (count >= half_width)
19290 {
19291 emit_move_insn (low[0], high[1]);
19292 emit_move_insn (high[0], low[0]);
19293 emit_insn (gen_ashr3 (high[0], high[0],
19294 GEN_INT (half_width - 1)));
19295
19296 if (count > half_width)
19297 emit_insn (gen_ashr3 (low[0], low[0],
19298 GEN_INT (count - half_width)));
19299 }
19300 else
19301 {
19302 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19303
19304 if (!rtx_equal_p (operands[0], operands[1]))
19305 emit_move_insn (operands[0], operands[1]);
19306
19307 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19308 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19309 }
19310 }
19311 else
19312 {
19313 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19314
19315 if (!rtx_equal_p (operands[0], operands[1]))
19316 emit_move_insn (operands[0], operands[1]);
19317
19318 split_double_mode (mode, operands, 1, low, high);
19319
19320 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19321 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19322
19323 if (TARGET_CMOVE && scratch)
19324 {
19325 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19326 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19327
19328 emit_move_insn (scratch, high[0]);
19329 emit_insn (gen_ashr3 (scratch, scratch,
19330 GEN_INT (half_width - 1)));
19331 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19332 scratch));
19333 }
19334 else
19335 {
19336 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19337 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19338
19339 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19340 }
19341 }
19342 }
19343
19344 void
19345 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19346 {
19347 rtx (*gen_lshr3)(rtx, rtx, rtx)
19348 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19349 rtx (*gen_shrd)(rtx, rtx, rtx);
19350 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19351
19352 rtx low[2], high[2];
19353 int count;
19354
19355 if (CONST_INT_P (operands[2]))
19356 {
19357 split_double_mode (mode, operands, 2, low, high);
19358 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19359
19360 if (count >= half_width)
19361 {
19362 emit_move_insn (low[0], high[1]);
19363 ix86_expand_clear (high[0]);
19364
19365 if (count > half_width)
19366 emit_insn (gen_lshr3 (low[0], low[0],
19367 GEN_INT (count - half_width)));
19368 }
19369 else
19370 {
19371 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19372
19373 if (!rtx_equal_p (operands[0], operands[1]))
19374 emit_move_insn (operands[0], operands[1]);
19375
19376 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19377 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19378 }
19379 }
19380 else
19381 {
19382 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19383
19384 if (!rtx_equal_p (operands[0], operands[1]))
19385 emit_move_insn (operands[0], operands[1]);
19386
19387 split_double_mode (mode, operands, 1, low, high);
19388
19389 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19390 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19391
19392 if (TARGET_CMOVE && scratch)
19393 {
19394 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19395 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19396
19397 ix86_expand_clear (scratch);
19398 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19399 scratch));
19400 }
19401 else
19402 {
19403 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19404 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19405
19406 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19407 }
19408 }
19409 }
19410
19411 /* Predict just emitted jump instruction to be taken with probability PROB. */
19412 static void
19413 predict_jump (int prob)
19414 {
19415 rtx insn = get_last_insn ();
19416 gcc_assert (JUMP_P (insn));
19417 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19418 }
19419
19420 /* Helper function for the string operations below. Dest VARIABLE whether
19421 it is aligned to VALUE bytes. If true, jump to the label. */
19422 static rtx
19423 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19424 {
19425 rtx label = gen_label_rtx ();
19426 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19427 if (GET_MODE (variable) == DImode)
19428 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19429 else
19430 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19431 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19432 1, label);
19433 if (epilogue)
19434 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19435 else
19436 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19437 return label;
19438 }
19439
19440 /* Adjust COUNTER by the VALUE. */
19441 static void
19442 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19443 {
19444 rtx (*gen_add)(rtx, rtx, rtx)
19445 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19446
19447 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19448 }
19449
19450 /* Zero extend possibly SImode EXP to Pmode register. */
19451 rtx
19452 ix86_zero_extend_to_Pmode (rtx exp)
19453 {
19454 rtx r;
19455 if (GET_MODE (exp) == VOIDmode)
19456 return force_reg (Pmode, exp);
19457 if (GET_MODE (exp) == Pmode)
19458 return copy_to_mode_reg (Pmode, exp);
19459 r = gen_reg_rtx (Pmode);
19460 emit_insn (gen_zero_extendsidi2 (r, exp));
19461 return r;
19462 }
19463
19464 /* Divide COUNTREG by SCALE. */
19465 static rtx
19466 scale_counter (rtx countreg, int scale)
19467 {
19468 rtx sc;
19469
19470 if (scale == 1)
19471 return countreg;
19472 if (CONST_INT_P (countreg))
19473 return GEN_INT (INTVAL (countreg) / scale);
19474 gcc_assert (REG_P (countreg));
19475
19476 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19477 GEN_INT (exact_log2 (scale)),
19478 NULL, 1, OPTAB_DIRECT);
19479 return sc;
19480 }
19481
19482 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19483 DImode for constant loop counts. */
19484
19485 static enum machine_mode
19486 counter_mode (rtx count_exp)
19487 {
19488 if (GET_MODE (count_exp) != VOIDmode)
19489 return GET_MODE (count_exp);
19490 if (!CONST_INT_P (count_exp))
19491 return Pmode;
19492 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19493 return DImode;
19494 return SImode;
19495 }
19496
19497 /* When SRCPTR is non-NULL, output simple loop to move memory
19498 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19499 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19500 equivalent loop to set memory by VALUE (supposed to be in MODE).
19501
19502 The size is rounded down to whole number of chunk size moved at once.
19503 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19504
19505
19506 static void
19507 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19508 rtx destptr, rtx srcptr, rtx value,
19509 rtx count, enum machine_mode mode, int unroll,
19510 int expected_size)
19511 {
19512 rtx out_label, top_label, iter, tmp;
19513 enum machine_mode iter_mode = counter_mode (count);
19514 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19515 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19516 rtx size;
19517 rtx x_addr;
19518 rtx y_addr;
19519 int i;
19520
19521 top_label = gen_label_rtx ();
19522 out_label = gen_label_rtx ();
19523 iter = gen_reg_rtx (iter_mode);
19524
19525 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19526 NULL, 1, OPTAB_DIRECT);
19527 /* Those two should combine. */
19528 if (piece_size == const1_rtx)
19529 {
19530 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19531 true, out_label);
19532 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19533 }
19534 emit_move_insn (iter, const0_rtx);
19535
19536 emit_label (top_label);
19537
19538 tmp = convert_modes (Pmode, iter_mode, iter, true);
19539 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19540 destmem = change_address (destmem, mode, x_addr);
19541
19542 if (srcmem)
19543 {
19544 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19545 srcmem = change_address (srcmem, mode, y_addr);
19546
19547 /* When unrolling for chips that reorder memory reads and writes,
19548 we can save registers by using single temporary.
19549 Also using 4 temporaries is overkill in 32bit mode. */
19550 if (!TARGET_64BIT && 0)
19551 {
19552 for (i = 0; i < unroll; i++)
19553 {
19554 if (i)
19555 {
19556 destmem =
19557 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19558 srcmem =
19559 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19560 }
19561 emit_move_insn (destmem, srcmem);
19562 }
19563 }
19564 else
19565 {
19566 rtx tmpreg[4];
19567 gcc_assert (unroll <= 4);
19568 for (i = 0; i < unroll; i++)
19569 {
19570 tmpreg[i] = gen_reg_rtx (mode);
19571 if (i)
19572 {
19573 srcmem =
19574 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19575 }
19576 emit_move_insn (tmpreg[i], srcmem);
19577 }
19578 for (i = 0; i < unroll; i++)
19579 {
19580 if (i)
19581 {
19582 destmem =
19583 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19584 }
19585 emit_move_insn (destmem, tmpreg[i]);
19586 }
19587 }
19588 }
19589 else
19590 for (i = 0; i < unroll; i++)
19591 {
19592 if (i)
19593 destmem =
19594 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19595 emit_move_insn (destmem, value);
19596 }
19597
19598 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19599 true, OPTAB_LIB_WIDEN);
19600 if (tmp != iter)
19601 emit_move_insn (iter, tmp);
19602
19603 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19604 true, top_label);
19605 if (expected_size != -1)
19606 {
19607 expected_size /= GET_MODE_SIZE (mode) * unroll;
19608 if (expected_size == 0)
19609 predict_jump (0);
19610 else if (expected_size > REG_BR_PROB_BASE)
19611 predict_jump (REG_BR_PROB_BASE - 1);
19612 else
19613 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19614 }
19615 else
19616 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19617 iter = ix86_zero_extend_to_Pmode (iter);
19618 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19619 true, OPTAB_LIB_WIDEN);
19620 if (tmp != destptr)
19621 emit_move_insn (destptr, tmp);
19622 if (srcptr)
19623 {
19624 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19625 true, OPTAB_LIB_WIDEN);
19626 if (tmp != srcptr)
19627 emit_move_insn (srcptr, tmp);
19628 }
19629 emit_label (out_label);
19630 }
19631
19632 /* Output "rep; mov" instruction.
19633 Arguments have same meaning as for previous function */
19634 static void
19635 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19636 rtx destptr, rtx srcptr,
19637 rtx count,
19638 enum machine_mode mode)
19639 {
19640 rtx destexp;
19641 rtx srcexp;
19642 rtx countreg;
19643
19644 /* If the size is known, it is shorter to use rep movs. */
19645 if (mode == QImode && CONST_INT_P (count)
19646 && !(INTVAL (count) & 3))
19647 mode = SImode;
19648
19649 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19650 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19651 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19652 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19653 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19654 if (mode != QImode)
19655 {
19656 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19657 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19658 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19659 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19660 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19661 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19662 }
19663 else
19664 {
19665 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19666 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19667 }
19668 if (CONST_INT_P (count))
19669 {
19670 count = GEN_INT (INTVAL (count)
19671 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19672 destmem = shallow_copy_rtx (destmem);
19673 srcmem = shallow_copy_rtx (srcmem);
19674 set_mem_size (destmem, count);
19675 set_mem_size (srcmem, count);
19676 }
19677 else
19678 {
19679 if (MEM_SIZE (destmem))
19680 set_mem_size (destmem, NULL_RTX);
19681 if (MEM_SIZE (srcmem))
19682 set_mem_size (srcmem, NULL_RTX);
19683 }
19684 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19685 destexp, srcexp));
19686 }
19687
19688 /* Output "rep; stos" instruction.
19689 Arguments have same meaning as for previous function */
19690 static void
19691 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19692 rtx count, enum machine_mode mode,
19693 rtx orig_value)
19694 {
19695 rtx destexp;
19696 rtx countreg;
19697
19698 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19699 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19700 value = force_reg (mode, gen_lowpart (mode, value));
19701 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19702 if (mode != QImode)
19703 {
19704 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19705 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19706 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19707 }
19708 else
19709 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19710 if (orig_value == const0_rtx && CONST_INT_P (count))
19711 {
19712 count = GEN_INT (INTVAL (count)
19713 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19714 destmem = shallow_copy_rtx (destmem);
19715 set_mem_size (destmem, count);
19716 }
19717 else if (MEM_SIZE (destmem))
19718 set_mem_size (destmem, NULL_RTX);
19719 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19720 }
19721
19722 static void
19723 emit_strmov (rtx destmem, rtx srcmem,
19724 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19725 {
19726 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19727 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19728 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19729 }
19730
19731 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19732 static void
19733 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19734 rtx destptr, rtx srcptr, rtx count, int max_size)
19735 {
19736 rtx src, dest;
19737 if (CONST_INT_P (count))
19738 {
19739 HOST_WIDE_INT countval = INTVAL (count);
19740 int offset = 0;
19741
19742 if ((countval & 0x10) && max_size > 16)
19743 {
19744 if (TARGET_64BIT)
19745 {
19746 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19747 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19748 }
19749 else
19750 gcc_unreachable ();
19751 offset += 16;
19752 }
19753 if ((countval & 0x08) && max_size > 8)
19754 {
19755 if (TARGET_64BIT)
19756 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19757 else
19758 {
19759 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19760 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19761 }
19762 offset += 8;
19763 }
19764 if ((countval & 0x04) && max_size > 4)
19765 {
19766 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19767 offset += 4;
19768 }
19769 if ((countval & 0x02) && max_size > 2)
19770 {
19771 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19772 offset += 2;
19773 }
19774 if ((countval & 0x01) && max_size > 1)
19775 {
19776 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19777 offset += 1;
19778 }
19779 return;
19780 }
19781 if (max_size > 8)
19782 {
19783 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19784 count, 1, OPTAB_DIRECT);
19785 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19786 count, QImode, 1, 4);
19787 return;
19788 }
19789
19790 /* When there are stringops, we can cheaply increase dest and src pointers.
19791 Otherwise we save code size by maintaining offset (zero is readily
19792 available from preceding rep operation) and using x86 addressing modes.
19793 */
19794 if (TARGET_SINGLE_STRINGOP)
19795 {
19796 if (max_size > 4)
19797 {
19798 rtx label = ix86_expand_aligntest (count, 4, true);
19799 src = change_address (srcmem, SImode, srcptr);
19800 dest = change_address (destmem, SImode, destptr);
19801 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19802 emit_label (label);
19803 LABEL_NUSES (label) = 1;
19804 }
19805 if (max_size > 2)
19806 {
19807 rtx label = ix86_expand_aligntest (count, 2, true);
19808 src = change_address (srcmem, HImode, srcptr);
19809 dest = change_address (destmem, HImode, destptr);
19810 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19811 emit_label (label);
19812 LABEL_NUSES (label) = 1;
19813 }
19814 if (max_size > 1)
19815 {
19816 rtx label = ix86_expand_aligntest (count, 1, true);
19817 src = change_address (srcmem, QImode, srcptr);
19818 dest = change_address (destmem, QImode, destptr);
19819 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19820 emit_label (label);
19821 LABEL_NUSES (label) = 1;
19822 }
19823 }
19824 else
19825 {
19826 rtx offset = force_reg (Pmode, const0_rtx);
19827 rtx tmp;
19828
19829 if (max_size > 4)
19830 {
19831 rtx label = ix86_expand_aligntest (count, 4, true);
19832 src = change_address (srcmem, SImode, srcptr);
19833 dest = change_address (destmem, SImode, destptr);
19834 emit_move_insn (dest, src);
19835 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19836 true, OPTAB_LIB_WIDEN);
19837 if (tmp != offset)
19838 emit_move_insn (offset, tmp);
19839 emit_label (label);
19840 LABEL_NUSES (label) = 1;
19841 }
19842 if (max_size > 2)
19843 {
19844 rtx label = ix86_expand_aligntest (count, 2, true);
19845 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19846 src = change_address (srcmem, HImode, tmp);
19847 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19848 dest = change_address (destmem, HImode, tmp);
19849 emit_move_insn (dest, src);
19850 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19851 true, OPTAB_LIB_WIDEN);
19852 if (tmp != offset)
19853 emit_move_insn (offset, tmp);
19854 emit_label (label);
19855 LABEL_NUSES (label) = 1;
19856 }
19857 if (max_size > 1)
19858 {
19859 rtx label = ix86_expand_aligntest (count, 1, true);
19860 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19861 src = change_address (srcmem, QImode, tmp);
19862 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19863 dest = change_address (destmem, QImode, tmp);
19864 emit_move_insn (dest, src);
19865 emit_label (label);
19866 LABEL_NUSES (label) = 1;
19867 }
19868 }
19869 }
19870
19871 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19872 static void
19873 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19874 rtx count, int max_size)
19875 {
19876 count =
19877 expand_simple_binop (counter_mode (count), AND, count,
19878 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19879 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19880 gen_lowpart (QImode, value), count, QImode,
19881 1, max_size / 2);
19882 }
19883
19884 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19885 static void
19886 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19887 {
19888 rtx dest;
19889
19890 if (CONST_INT_P (count))
19891 {
19892 HOST_WIDE_INT countval = INTVAL (count);
19893 int offset = 0;
19894
19895 if ((countval & 0x10) && max_size > 16)
19896 {
19897 if (TARGET_64BIT)
19898 {
19899 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19900 emit_insn (gen_strset (destptr, dest, value));
19901 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19902 emit_insn (gen_strset (destptr, dest, value));
19903 }
19904 else
19905 gcc_unreachable ();
19906 offset += 16;
19907 }
19908 if ((countval & 0x08) && max_size > 8)
19909 {
19910 if (TARGET_64BIT)
19911 {
19912 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19913 emit_insn (gen_strset (destptr, dest, value));
19914 }
19915 else
19916 {
19917 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19918 emit_insn (gen_strset (destptr, dest, value));
19919 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19920 emit_insn (gen_strset (destptr, dest, value));
19921 }
19922 offset += 8;
19923 }
19924 if ((countval & 0x04) && max_size > 4)
19925 {
19926 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19927 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19928 offset += 4;
19929 }
19930 if ((countval & 0x02) && max_size > 2)
19931 {
19932 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19933 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19934 offset += 2;
19935 }
19936 if ((countval & 0x01) && max_size > 1)
19937 {
19938 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19939 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19940 offset += 1;
19941 }
19942 return;
19943 }
19944 if (max_size > 32)
19945 {
19946 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19947 return;
19948 }
19949 if (max_size > 16)
19950 {
19951 rtx label = ix86_expand_aligntest (count, 16, true);
19952 if (TARGET_64BIT)
19953 {
19954 dest = change_address (destmem, DImode, destptr);
19955 emit_insn (gen_strset (destptr, dest, value));
19956 emit_insn (gen_strset (destptr, dest, value));
19957 }
19958 else
19959 {
19960 dest = change_address (destmem, SImode, destptr);
19961 emit_insn (gen_strset (destptr, dest, value));
19962 emit_insn (gen_strset (destptr, dest, value));
19963 emit_insn (gen_strset (destptr, dest, value));
19964 emit_insn (gen_strset (destptr, dest, value));
19965 }
19966 emit_label (label);
19967 LABEL_NUSES (label) = 1;
19968 }
19969 if (max_size > 8)
19970 {
19971 rtx label = ix86_expand_aligntest (count, 8, true);
19972 if (TARGET_64BIT)
19973 {
19974 dest = change_address (destmem, DImode, destptr);
19975 emit_insn (gen_strset (destptr, dest, value));
19976 }
19977 else
19978 {
19979 dest = change_address (destmem, SImode, destptr);
19980 emit_insn (gen_strset (destptr, dest, value));
19981 emit_insn (gen_strset (destptr, dest, value));
19982 }
19983 emit_label (label);
19984 LABEL_NUSES (label) = 1;
19985 }
19986 if (max_size > 4)
19987 {
19988 rtx label = ix86_expand_aligntest (count, 4, true);
19989 dest = change_address (destmem, SImode, destptr);
19990 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19991 emit_label (label);
19992 LABEL_NUSES (label) = 1;
19993 }
19994 if (max_size > 2)
19995 {
19996 rtx label = ix86_expand_aligntest (count, 2, true);
19997 dest = change_address (destmem, HImode, destptr);
19998 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19999 emit_label (label);
20000 LABEL_NUSES (label) = 1;
20001 }
20002 if (max_size > 1)
20003 {
20004 rtx label = ix86_expand_aligntest (count, 1, true);
20005 dest = change_address (destmem, QImode, destptr);
20006 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20007 emit_label (label);
20008 LABEL_NUSES (label) = 1;
20009 }
20010 }
20011
20012 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20013 DESIRED_ALIGNMENT. */
20014 static void
20015 expand_movmem_prologue (rtx destmem, rtx srcmem,
20016 rtx destptr, rtx srcptr, rtx count,
20017 int align, int desired_alignment)
20018 {
20019 if (align <= 1 && desired_alignment > 1)
20020 {
20021 rtx label = ix86_expand_aligntest (destptr, 1, false);
20022 srcmem = change_address (srcmem, QImode, srcptr);
20023 destmem = change_address (destmem, QImode, destptr);
20024 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20025 ix86_adjust_counter (count, 1);
20026 emit_label (label);
20027 LABEL_NUSES (label) = 1;
20028 }
20029 if (align <= 2 && desired_alignment > 2)
20030 {
20031 rtx label = ix86_expand_aligntest (destptr, 2, false);
20032 srcmem = change_address (srcmem, HImode, srcptr);
20033 destmem = change_address (destmem, HImode, destptr);
20034 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20035 ix86_adjust_counter (count, 2);
20036 emit_label (label);
20037 LABEL_NUSES (label) = 1;
20038 }
20039 if (align <= 4 && desired_alignment > 4)
20040 {
20041 rtx label = ix86_expand_aligntest (destptr, 4, false);
20042 srcmem = change_address (srcmem, SImode, srcptr);
20043 destmem = change_address (destmem, SImode, destptr);
20044 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20045 ix86_adjust_counter (count, 4);
20046 emit_label (label);
20047 LABEL_NUSES (label) = 1;
20048 }
20049 gcc_assert (desired_alignment <= 8);
20050 }
20051
20052 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20053 ALIGN_BYTES is how many bytes need to be copied. */
20054 static rtx
20055 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20056 int desired_align, int align_bytes)
20057 {
20058 rtx src = *srcp;
20059 rtx src_size, dst_size;
20060 int off = 0;
20061 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20062 if (src_align_bytes >= 0)
20063 src_align_bytes = desired_align - src_align_bytes;
20064 src_size = MEM_SIZE (src);
20065 dst_size = MEM_SIZE (dst);
20066 if (align_bytes & 1)
20067 {
20068 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20069 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20070 off = 1;
20071 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20072 }
20073 if (align_bytes & 2)
20074 {
20075 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20076 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20077 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20078 set_mem_align (dst, 2 * BITS_PER_UNIT);
20079 if (src_align_bytes >= 0
20080 && (src_align_bytes & 1) == (align_bytes & 1)
20081 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20082 set_mem_align (src, 2 * BITS_PER_UNIT);
20083 off = 2;
20084 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20085 }
20086 if (align_bytes & 4)
20087 {
20088 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20089 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20090 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20091 set_mem_align (dst, 4 * BITS_PER_UNIT);
20092 if (src_align_bytes >= 0)
20093 {
20094 unsigned int src_align = 0;
20095 if ((src_align_bytes & 3) == (align_bytes & 3))
20096 src_align = 4;
20097 else if ((src_align_bytes & 1) == (align_bytes & 1))
20098 src_align = 2;
20099 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20100 set_mem_align (src, src_align * BITS_PER_UNIT);
20101 }
20102 off = 4;
20103 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20104 }
20105 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20106 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20107 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20108 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20109 if (src_align_bytes >= 0)
20110 {
20111 unsigned int src_align = 0;
20112 if ((src_align_bytes & 7) == (align_bytes & 7))
20113 src_align = 8;
20114 else if ((src_align_bytes & 3) == (align_bytes & 3))
20115 src_align = 4;
20116 else if ((src_align_bytes & 1) == (align_bytes & 1))
20117 src_align = 2;
20118 if (src_align > (unsigned int) desired_align)
20119 src_align = desired_align;
20120 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20121 set_mem_align (src, src_align * BITS_PER_UNIT);
20122 }
20123 if (dst_size)
20124 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20125 if (src_size)
20126 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20127 *srcp = src;
20128 return dst;
20129 }
20130
20131 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20132 DESIRED_ALIGNMENT. */
20133 static void
20134 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20135 int align, int desired_alignment)
20136 {
20137 if (align <= 1 && desired_alignment > 1)
20138 {
20139 rtx label = ix86_expand_aligntest (destptr, 1, false);
20140 destmem = change_address (destmem, QImode, destptr);
20141 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20142 ix86_adjust_counter (count, 1);
20143 emit_label (label);
20144 LABEL_NUSES (label) = 1;
20145 }
20146 if (align <= 2 && desired_alignment > 2)
20147 {
20148 rtx label = ix86_expand_aligntest (destptr, 2, false);
20149 destmem = change_address (destmem, HImode, destptr);
20150 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20151 ix86_adjust_counter (count, 2);
20152 emit_label (label);
20153 LABEL_NUSES (label) = 1;
20154 }
20155 if (align <= 4 && desired_alignment > 4)
20156 {
20157 rtx label = ix86_expand_aligntest (destptr, 4, false);
20158 destmem = change_address (destmem, SImode, destptr);
20159 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20160 ix86_adjust_counter (count, 4);
20161 emit_label (label);
20162 LABEL_NUSES (label) = 1;
20163 }
20164 gcc_assert (desired_alignment <= 8);
20165 }
20166
20167 /* Set enough from DST to align DST known to by aligned by ALIGN to
20168 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20169 static rtx
20170 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20171 int desired_align, int align_bytes)
20172 {
20173 int off = 0;
20174 rtx dst_size = MEM_SIZE (dst);
20175 if (align_bytes & 1)
20176 {
20177 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20178 off = 1;
20179 emit_insn (gen_strset (destreg, dst,
20180 gen_lowpart (QImode, value)));
20181 }
20182 if (align_bytes & 2)
20183 {
20184 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20185 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20186 set_mem_align (dst, 2 * BITS_PER_UNIT);
20187 off = 2;
20188 emit_insn (gen_strset (destreg, dst,
20189 gen_lowpart (HImode, value)));
20190 }
20191 if (align_bytes & 4)
20192 {
20193 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20194 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20195 set_mem_align (dst, 4 * BITS_PER_UNIT);
20196 off = 4;
20197 emit_insn (gen_strset (destreg, dst,
20198 gen_lowpart (SImode, value)));
20199 }
20200 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20201 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20202 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20203 if (dst_size)
20204 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20205 return dst;
20206 }
20207
20208 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20209 static enum stringop_alg
20210 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20211 int *dynamic_check)
20212 {
20213 const struct stringop_algs * algs;
20214 bool optimize_for_speed;
20215 /* Algorithms using the rep prefix want at least edi and ecx;
20216 additionally, memset wants eax and memcpy wants esi. Don't
20217 consider such algorithms if the user has appropriated those
20218 registers for their own purposes. */
20219 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20220 || (memset
20221 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20222
20223 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20224 || (alg != rep_prefix_1_byte \
20225 && alg != rep_prefix_4_byte \
20226 && alg != rep_prefix_8_byte))
20227 const struct processor_costs *cost;
20228
20229 /* Even if the string operation call is cold, we still might spend a lot
20230 of time processing large blocks. */
20231 if (optimize_function_for_size_p (cfun)
20232 || (optimize_insn_for_size_p ()
20233 && expected_size != -1 && expected_size < 256))
20234 optimize_for_speed = false;
20235 else
20236 optimize_for_speed = true;
20237
20238 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20239
20240 *dynamic_check = -1;
20241 if (memset)
20242 algs = &cost->memset[TARGET_64BIT != 0];
20243 else
20244 algs = &cost->memcpy[TARGET_64BIT != 0];
20245 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20246 return ix86_stringop_alg;
20247 /* rep; movq or rep; movl is the smallest variant. */
20248 else if (!optimize_for_speed)
20249 {
20250 if (!count || (count & 3))
20251 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20252 else
20253 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20254 }
20255 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20256 */
20257 else if (expected_size != -1 && expected_size < 4)
20258 return loop_1_byte;
20259 else if (expected_size != -1)
20260 {
20261 unsigned int i;
20262 enum stringop_alg alg = libcall;
20263 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20264 {
20265 /* We get here if the algorithms that were not libcall-based
20266 were rep-prefix based and we are unable to use rep prefixes
20267 based on global register usage. Break out of the loop and
20268 use the heuristic below. */
20269 if (algs->size[i].max == 0)
20270 break;
20271 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20272 {
20273 enum stringop_alg candidate = algs->size[i].alg;
20274
20275 if (candidate != libcall && ALG_USABLE_P (candidate))
20276 alg = candidate;
20277 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20278 last non-libcall inline algorithm. */
20279 if (TARGET_INLINE_ALL_STRINGOPS)
20280 {
20281 /* When the current size is best to be copied by a libcall,
20282 but we are still forced to inline, run the heuristic below
20283 that will pick code for medium sized blocks. */
20284 if (alg != libcall)
20285 return alg;
20286 break;
20287 }
20288 else if (ALG_USABLE_P (candidate))
20289 return candidate;
20290 }
20291 }
20292 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20293 }
20294 /* When asked to inline the call anyway, try to pick meaningful choice.
20295 We look for maximal size of block that is faster to copy by hand and
20296 take blocks of at most of that size guessing that average size will
20297 be roughly half of the block.
20298
20299 If this turns out to be bad, we might simply specify the preferred
20300 choice in ix86_costs. */
20301 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20302 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20303 {
20304 int max = -1;
20305 enum stringop_alg alg;
20306 int i;
20307 bool any_alg_usable_p = true;
20308
20309 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20310 {
20311 enum stringop_alg candidate = algs->size[i].alg;
20312 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20313
20314 if (candidate != libcall && candidate
20315 && ALG_USABLE_P (candidate))
20316 max = algs->size[i].max;
20317 }
20318 /* If there aren't any usable algorithms, then recursing on
20319 smaller sizes isn't going to find anything. Just return the
20320 simple byte-at-a-time copy loop. */
20321 if (!any_alg_usable_p)
20322 {
20323 /* Pick something reasonable. */
20324 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20325 *dynamic_check = 128;
20326 return loop_1_byte;
20327 }
20328 if (max == -1)
20329 max = 4096;
20330 alg = decide_alg (count, max / 2, memset, dynamic_check);
20331 gcc_assert (*dynamic_check == -1);
20332 gcc_assert (alg != libcall);
20333 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20334 *dynamic_check = max;
20335 return alg;
20336 }
20337 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20338 #undef ALG_USABLE_P
20339 }
20340
20341 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20342 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20343 static int
20344 decide_alignment (int align,
20345 enum stringop_alg alg,
20346 int expected_size)
20347 {
20348 int desired_align = 0;
20349 switch (alg)
20350 {
20351 case no_stringop:
20352 gcc_unreachable ();
20353 case loop:
20354 case unrolled_loop:
20355 desired_align = GET_MODE_SIZE (Pmode);
20356 break;
20357 case rep_prefix_8_byte:
20358 desired_align = 8;
20359 break;
20360 case rep_prefix_4_byte:
20361 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20362 copying whole cacheline at once. */
20363 if (TARGET_PENTIUMPRO)
20364 desired_align = 8;
20365 else
20366 desired_align = 4;
20367 break;
20368 case rep_prefix_1_byte:
20369 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20370 copying whole cacheline at once. */
20371 if (TARGET_PENTIUMPRO)
20372 desired_align = 8;
20373 else
20374 desired_align = 1;
20375 break;
20376 case loop_1_byte:
20377 desired_align = 1;
20378 break;
20379 case libcall:
20380 return 0;
20381 }
20382
20383 if (optimize_size)
20384 desired_align = 1;
20385 if (desired_align < align)
20386 desired_align = align;
20387 if (expected_size != -1 && expected_size < 4)
20388 desired_align = align;
20389 return desired_align;
20390 }
20391
20392 /* Return the smallest power of 2 greater than VAL. */
20393 static int
20394 smallest_pow2_greater_than (int val)
20395 {
20396 int ret = 1;
20397 while (ret <= val)
20398 ret <<= 1;
20399 return ret;
20400 }
20401
20402 /* Expand string move (memcpy) operation. Use i386 string operations
20403 when profitable. expand_setmem contains similar code. The code
20404 depends upon architecture, block size and alignment, but always has
20405 the same overall structure:
20406
20407 1) Prologue guard: Conditional that jumps up to epilogues for small
20408 blocks that can be handled by epilogue alone. This is faster
20409 but also needed for correctness, since prologue assume the block
20410 is larger than the desired alignment.
20411
20412 Optional dynamic check for size and libcall for large
20413 blocks is emitted here too, with -minline-stringops-dynamically.
20414
20415 2) Prologue: copy first few bytes in order to get destination
20416 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20417 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20418 copied. We emit either a jump tree on power of two sized
20419 blocks, or a byte loop.
20420
20421 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20422 with specified algorithm.
20423
20424 4) Epilogue: code copying tail of the block that is too small to be
20425 handled by main body (or up to size guarded by prologue guard). */
20426
20427 bool
20428 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20429 rtx expected_align_exp, rtx expected_size_exp)
20430 {
20431 rtx destreg;
20432 rtx srcreg;
20433 rtx label = NULL;
20434 rtx tmp;
20435 rtx jump_around_label = NULL;
20436 HOST_WIDE_INT align = 1;
20437 unsigned HOST_WIDE_INT count = 0;
20438 HOST_WIDE_INT expected_size = -1;
20439 int size_needed = 0, epilogue_size_needed;
20440 int desired_align = 0, align_bytes = 0;
20441 enum stringop_alg alg;
20442 int dynamic_check;
20443 bool need_zero_guard = false;
20444
20445 if (CONST_INT_P (align_exp))
20446 align = INTVAL (align_exp);
20447 /* i386 can do misaligned access on reasonably increased cost. */
20448 if (CONST_INT_P (expected_align_exp)
20449 && INTVAL (expected_align_exp) > align)
20450 align = INTVAL (expected_align_exp);
20451 /* ALIGN is the minimum of destination and source alignment, but we care here
20452 just about destination alignment. */
20453 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20454 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20455
20456 if (CONST_INT_P (count_exp))
20457 count = expected_size = INTVAL (count_exp);
20458 if (CONST_INT_P (expected_size_exp) && count == 0)
20459 expected_size = INTVAL (expected_size_exp);
20460
20461 /* Make sure we don't need to care about overflow later on. */
20462 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20463 return false;
20464
20465 /* Step 0: Decide on preferred algorithm, desired alignment and
20466 size of chunks to be copied by main loop. */
20467
20468 alg = decide_alg (count, expected_size, false, &dynamic_check);
20469 desired_align = decide_alignment (align, alg, expected_size);
20470
20471 if (!TARGET_ALIGN_STRINGOPS)
20472 align = desired_align;
20473
20474 if (alg == libcall)
20475 return false;
20476 gcc_assert (alg != no_stringop);
20477 if (!count)
20478 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20479 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20480 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20481 switch (alg)
20482 {
20483 case libcall:
20484 case no_stringop:
20485 gcc_unreachable ();
20486 case loop:
20487 need_zero_guard = true;
20488 size_needed = GET_MODE_SIZE (Pmode);
20489 break;
20490 case unrolled_loop:
20491 need_zero_guard = true;
20492 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20493 break;
20494 case rep_prefix_8_byte:
20495 size_needed = 8;
20496 break;
20497 case rep_prefix_4_byte:
20498 size_needed = 4;
20499 break;
20500 case rep_prefix_1_byte:
20501 size_needed = 1;
20502 break;
20503 case loop_1_byte:
20504 need_zero_guard = true;
20505 size_needed = 1;
20506 break;
20507 }
20508
20509 epilogue_size_needed = size_needed;
20510
20511 /* Step 1: Prologue guard. */
20512
20513 /* Alignment code needs count to be in register. */
20514 if (CONST_INT_P (count_exp) && desired_align > align)
20515 {
20516 if (INTVAL (count_exp) > desired_align
20517 && INTVAL (count_exp) > size_needed)
20518 {
20519 align_bytes
20520 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20521 if (align_bytes <= 0)
20522 align_bytes = 0;
20523 else
20524 align_bytes = desired_align - align_bytes;
20525 }
20526 if (align_bytes == 0)
20527 count_exp = force_reg (counter_mode (count_exp), count_exp);
20528 }
20529 gcc_assert (desired_align >= 1 && align >= 1);
20530
20531 /* Ensure that alignment prologue won't copy past end of block. */
20532 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20533 {
20534 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20535 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20536 Make sure it is power of 2. */
20537 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20538
20539 if (count)
20540 {
20541 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20542 {
20543 /* If main algorithm works on QImode, no epilogue is needed.
20544 For small sizes just don't align anything. */
20545 if (size_needed == 1)
20546 desired_align = align;
20547 else
20548 goto epilogue;
20549 }
20550 }
20551 else
20552 {
20553 label = gen_label_rtx ();
20554 emit_cmp_and_jump_insns (count_exp,
20555 GEN_INT (epilogue_size_needed),
20556 LTU, 0, counter_mode (count_exp), 1, label);
20557 if (expected_size == -1 || expected_size < epilogue_size_needed)
20558 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20559 else
20560 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20561 }
20562 }
20563
20564 /* Emit code to decide on runtime whether library call or inline should be
20565 used. */
20566 if (dynamic_check != -1)
20567 {
20568 if (CONST_INT_P (count_exp))
20569 {
20570 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20571 {
20572 emit_block_move_via_libcall (dst, src, count_exp, false);
20573 count_exp = const0_rtx;
20574 goto epilogue;
20575 }
20576 }
20577 else
20578 {
20579 rtx hot_label = gen_label_rtx ();
20580 jump_around_label = gen_label_rtx ();
20581 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20582 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20583 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20584 emit_block_move_via_libcall (dst, src, count_exp, false);
20585 emit_jump (jump_around_label);
20586 emit_label (hot_label);
20587 }
20588 }
20589
20590 /* Step 2: Alignment prologue. */
20591
20592 if (desired_align > align)
20593 {
20594 if (align_bytes == 0)
20595 {
20596 /* Except for the first move in epilogue, we no longer know
20597 constant offset in aliasing info. It don't seems to worth
20598 the pain to maintain it for the first move, so throw away
20599 the info early. */
20600 src = change_address (src, BLKmode, srcreg);
20601 dst = change_address (dst, BLKmode, destreg);
20602 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20603 desired_align);
20604 }
20605 else
20606 {
20607 /* If we know how many bytes need to be stored before dst is
20608 sufficiently aligned, maintain aliasing info accurately. */
20609 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20610 desired_align, align_bytes);
20611 count_exp = plus_constant (count_exp, -align_bytes);
20612 count -= align_bytes;
20613 }
20614 if (need_zero_guard
20615 && (count < (unsigned HOST_WIDE_INT) size_needed
20616 || (align_bytes == 0
20617 && count < ((unsigned HOST_WIDE_INT) size_needed
20618 + desired_align - align))))
20619 {
20620 /* It is possible that we copied enough so the main loop will not
20621 execute. */
20622 gcc_assert (size_needed > 1);
20623 if (label == NULL_RTX)
20624 label = gen_label_rtx ();
20625 emit_cmp_and_jump_insns (count_exp,
20626 GEN_INT (size_needed),
20627 LTU, 0, counter_mode (count_exp), 1, label);
20628 if (expected_size == -1
20629 || expected_size < (desired_align - align) / 2 + size_needed)
20630 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20631 else
20632 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20633 }
20634 }
20635 if (label && size_needed == 1)
20636 {
20637 emit_label (label);
20638 LABEL_NUSES (label) = 1;
20639 label = NULL;
20640 epilogue_size_needed = 1;
20641 }
20642 else if (label == NULL_RTX)
20643 epilogue_size_needed = size_needed;
20644
20645 /* Step 3: Main loop. */
20646
20647 switch (alg)
20648 {
20649 case libcall:
20650 case no_stringop:
20651 gcc_unreachable ();
20652 case loop_1_byte:
20653 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20654 count_exp, QImode, 1, expected_size);
20655 break;
20656 case loop:
20657 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20658 count_exp, Pmode, 1, expected_size);
20659 break;
20660 case unrolled_loop:
20661 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20662 registers for 4 temporaries anyway. */
20663 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20664 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20665 expected_size);
20666 break;
20667 case rep_prefix_8_byte:
20668 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20669 DImode);
20670 break;
20671 case rep_prefix_4_byte:
20672 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20673 SImode);
20674 break;
20675 case rep_prefix_1_byte:
20676 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20677 QImode);
20678 break;
20679 }
20680 /* Adjust properly the offset of src and dest memory for aliasing. */
20681 if (CONST_INT_P (count_exp))
20682 {
20683 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20684 (count / size_needed) * size_needed);
20685 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20686 (count / size_needed) * size_needed);
20687 }
20688 else
20689 {
20690 src = change_address (src, BLKmode, srcreg);
20691 dst = change_address (dst, BLKmode, destreg);
20692 }
20693
20694 /* Step 4: Epilogue to copy the remaining bytes. */
20695 epilogue:
20696 if (label)
20697 {
20698 /* When the main loop is done, COUNT_EXP might hold original count,
20699 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20700 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20701 bytes. Compensate if needed. */
20702
20703 if (size_needed < epilogue_size_needed)
20704 {
20705 tmp =
20706 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20707 GEN_INT (size_needed - 1), count_exp, 1,
20708 OPTAB_DIRECT);
20709 if (tmp != count_exp)
20710 emit_move_insn (count_exp, tmp);
20711 }
20712 emit_label (label);
20713 LABEL_NUSES (label) = 1;
20714 }
20715
20716 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20717 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20718 epilogue_size_needed);
20719 if (jump_around_label)
20720 emit_label (jump_around_label);
20721 return true;
20722 }
20723
20724 /* Helper function for memcpy. For QImode value 0xXY produce
20725 0xXYXYXYXY of wide specified by MODE. This is essentially
20726 a * 0x10101010, but we can do slightly better than
20727 synth_mult by unwinding the sequence by hand on CPUs with
20728 slow multiply. */
20729 static rtx
20730 promote_duplicated_reg (enum machine_mode mode, rtx val)
20731 {
20732 enum machine_mode valmode = GET_MODE (val);
20733 rtx tmp;
20734 int nops = mode == DImode ? 3 : 2;
20735
20736 gcc_assert (mode == SImode || mode == DImode);
20737 if (val == const0_rtx)
20738 return copy_to_mode_reg (mode, const0_rtx);
20739 if (CONST_INT_P (val))
20740 {
20741 HOST_WIDE_INT v = INTVAL (val) & 255;
20742
20743 v |= v << 8;
20744 v |= v << 16;
20745 if (mode == DImode)
20746 v |= (v << 16) << 16;
20747 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20748 }
20749
20750 if (valmode == VOIDmode)
20751 valmode = QImode;
20752 if (valmode != QImode)
20753 val = gen_lowpart (QImode, val);
20754 if (mode == QImode)
20755 return val;
20756 if (!TARGET_PARTIAL_REG_STALL)
20757 nops--;
20758 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20759 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20760 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20761 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20762 {
20763 rtx reg = convert_modes (mode, QImode, val, true);
20764 tmp = promote_duplicated_reg (mode, const1_rtx);
20765 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20766 OPTAB_DIRECT);
20767 }
20768 else
20769 {
20770 rtx reg = convert_modes (mode, QImode, val, true);
20771
20772 if (!TARGET_PARTIAL_REG_STALL)
20773 if (mode == SImode)
20774 emit_insn (gen_movsi_insv_1 (reg, reg));
20775 else
20776 emit_insn (gen_movdi_insv_1 (reg, reg));
20777 else
20778 {
20779 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20780 NULL, 1, OPTAB_DIRECT);
20781 reg =
20782 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20783 }
20784 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20785 NULL, 1, OPTAB_DIRECT);
20786 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20787 if (mode == SImode)
20788 return reg;
20789 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20790 NULL, 1, OPTAB_DIRECT);
20791 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20792 return reg;
20793 }
20794 }
20795
20796 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20797 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20798 alignment from ALIGN to DESIRED_ALIGN. */
20799 static rtx
20800 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20801 {
20802 rtx promoted_val;
20803
20804 if (TARGET_64BIT
20805 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20806 promoted_val = promote_duplicated_reg (DImode, val);
20807 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20808 promoted_val = promote_duplicated_reg (SImode, val);
20809 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20810 promoted_val = promote_duplicated_reg (HImode, val);
20811 else
20812 promoted_val = val;
20813
20814 return promoted_val;
20815 }
20816
20817 /* Expand string clear operation (bzero). Use i386 string operations when
20818 profitable. See expand_movmem comment for explanation of individual
20819 steps performed. */
20820 bool
20821 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20822 rtx expected_align_exp, rtx expected_size_exp)
20823 {
20824 rtx destreg;
20825 rtx label = NULL;
20826 rtx tmp;
20827 rtx jump_around_label = NULL;
20828 HOST_WIDE_INT align = 1;
20829 unsigned HOST_WIDE_INT count = 0;
20830 HOST_WIDE_INT expected_size = -1;
20831 int size_needed = 0, epilogue_size_needed;
20832 int desired_align = 0, align_bytes = 0;
20833 enum stringop_alg alg;
20834 rtx promoted_val = NULL;
20835 bool force_loopy_epilogue = false;
20836 int dynamic_check;
20837 bool need_zero_guard = false;
20838
20839 if (CONST_INT_P (align_exp))
20840 align = INTVAL (align_exp);
20841 /* i386 can do misaligned access on reasonably increased cost. */
20842 if (CONST_INT_P (expected_align_exp)
20843 && INTVAL (expected_align_exp) > align)
20844 align = INTVAL (expected_align_exp);
20845 if (CONST_INT_P (count_exp))
20846 count = expected_size = INTVAL (count_exp);
20847 if (CONST_INT_P (expected_size_exp) && count == 0)
20848 expected_size = INTVAL (expected_size_exp);
20849
20850 /* Make sure we don't need to care about overflow later on. */
20851 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20852 return false;
20853
20854 /* Step 0: Decide on preferred algorithm, desired alignment and
20855 size of chunks to be copied by main loop. */
20856
20857 alg = decide_alg (count, expected_size, true, &dynamic_check);
20858 desired_align = decide_alignment (align, alg, expected_size);
20859
20860 if (!TARGET_ALIGN_STRINGOPS)
20861 align = desired_align;
20862
20863 if (alg == libcall)
20864 return false;
20865 gcc_assert (alg != no_stringop);
20866 if (!count)
20867 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20868 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20869 switch (alg)
20870 {
20871 case libcall:
20872 case no_stringop:
20873 gcc_unreachable ();
20874 case loop:
20875 need_zero_guard = true;
20876 size_needed = GET_MODE_SIZE (Pmode);
20877 break;
20878 case unrolled_loop:
20879 need_zero_guard = true;
20880 size_needed = GET_MODE_SIZE (Pmode) * 4;
20881 break;
20882 case rep_prefix_8_byte:
20883 size_needed = 8;
20884 break;
20885 case rep_prefix_4_byte:
20886 size_needed = 4;
20887 break;
20888 case rep_prefix_1_byte:
20889 size_needed = 1;
20890 break;
20891 case loop_1_byte:
20892 need_zero_guard = true;
20893 size_needed = 1;
20894 break;
20895 }
20896 epilogue_size_needed = size_needed;
20897
20898 /* Step 1: Prologue guard. */
20899
20900 /* Alignment code needs count to be in register. */
20901 if (CONST_INT_P (count_exp) && desired_align > align)
20902 {
20903 if (INTVAL (count_exp) > desired_align
20904 && INTVAL (count_exp) > size_needed)
20905 {
20906 align_bytes
20907 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20908 if (align_bytes <= 0)
20909 align_bytes = 0;
20910 else
20911 align_bytes = desired_align - align_bytes;
20912 }
20913 if (align_bytes == 0)
20914 {
20915 enum machine_mode mode = SImode;
20916 if (TARGET_64BIT && (count & ~0xffffffff))
20917 mode = DImode;
20918 count_exp = force_reg (mode, count_exp);
20919 }
20920 }
20921 /* Do the cheap promotion to allow better CSE across the
20922 main loop and epilogue (ie one load of the big constant in the
20923 front of all code. */
20924 if (CONST_INT_P (val_exp))
20925 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20926 desired_align, align);
20927 /* Ensure that alignment prologue won't copy past end of block. */
20928 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20929 {
20930 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20931 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20932 Make sure it is power of 2. */
20933 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20934
20935 /* To improve performance of small blocks, we jump around the VAL
20936 promoting mode. This mean that if the promoted VAL is not constant,
20937 we might not use it in the epilogue and have to use byte
20938 loop variant. */
20939 if (epilogue_size_needed > 2 && !promoted_val)
20940 force_loopy_epilogue = true;
20941 if (count)
20942 {
20943 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20944 {
20945 /* If main algorithm works on QImode, no epilogue is needed.
20946 For small sizes just don't align anything. */
20947 if (size_needed == 1)
20948 desired_align = align;
20949 else
20950 goto epilogue;
20951 }
20952 }
20953 else
20954 {
20955 label = gen_label_rtx ();
20956 emit_cmp_and_jump_insns (count_exp,
20957 GEN_INT (epilogue_size_needed),
20958 LTU, 0, counter_mode (count_exp), 1, label);
20959 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20960 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20961 else
20962 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20963 }
20964 }
20965 if (dynamic_check != -1)
20966 {
20967 rtx hot_label = gen_label_rtx ();
20968 jump_around_label = gen_label_rtx ();
20969 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20970 LEU, 0, counter_mode (count_exp), 1, hot_label);
20971 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20972 set_storage_via_libcall (dst, count_exp, val_exp, false);
20973 emit_jump (jump_around_label);
20974 emit_label (hot_label);
20975 }
20976
20977 /* Step 2: Alignment prologue. */
20978
20979 /* Do the expensive promotion once we branched off the small blocks. */
20980 if (!promoted_val)
20981 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20982 desired_align, align);
20983 gcc_assert (desired_align >= 1 && align >= 1);
20984
20985 if (desired_align > align)
20986 {
20987 if (align_bytes == 0)
20988 {
20989 /* Except for the first move in epilogue, we no longer know
20990 constant offset in aliasing info. It don't seems to worth
20991 the pain to maintain it for the first move, so throw away
20992 the info early. */
20993 dst = change_address (dst, BLKmode, destreg);
20994 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20995 desired_align);
20996 }
20997 else
20998 {
20999 /* If we know how many bytes need to be stored before dst is
21000 sufficiently aligned, maintain aliasing info accurately. */
21001 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21002 desired_align, align_bytes);
21003 count_exp = plus_constant (count_exp, -align_bytes);
21004 count -= align_bytes;
21005 }
21006 if (need_zero_guard
21007 && (count < (unsigned HOST_WIDE_INT) size_needed
21008 || (align_bytes == 0
21009 && count < ((unsigned HOST_WIDE_INT) size_needed
21010 + desired_align - align))))
21011 {
21012 /* It is possible that we copied enough so the main loop will not
21013 execute. */
21014 gcc_assert (size_needed > 1);
21015 if (label == NULL_RTX)
21016 label = gen_label_rtx ();
21017 emit_cmp_and_jump_insns (count_exp,
21018 GEN_INT (size_needed),
21019 LTU, 0, counter_mode (count_exp), 1, label);
21020 if (expected_size == -1
21021 || expected_size < (desired_align - align) / 2 + size_needed)
21022 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21023 else
21024 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21025 }
21026 }
21027 if (label && size_needed == 1)
21028 {
21029 emit_label (label);
21030 LABEL_NUSES (label) = 1;
21031 label = NULL;
21032 promoted_val = val_exp;
21033 epilogue_size_needed = 1;
21034 }
21035 else if (label == NULL_RTX)
21036 epilogue_size_needed = size_needed;
21037
21038 /* Step 3: Main loop. */
21039
21040 switch (alg)
21041 {
21042 case libcall:
21043 case no_stringop:
21044 gcc_unreachable ();
21045 case loop_1_byte:
21046 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21047 count_exp, QImode, 1, expected_size);
21048 break;
21049 case loop:
21050 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21051 count_exp, Pmode, 1, expected_size);
21052 break;
21053 case unrolled_loop:
21054 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21055 count_exp, Pmode, 4, expected_size);
21056 break;
21057 case rep_prefix_8_byte:
21058 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21059 DImode, val_exp);
21060 break;
21061 case rep_prefix_4_byte:
21062 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21063 SImode, val_exp);
21064 break;
21065 case rep_prefix_1_byte:
21066 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21067 QImode, val_exp);
21068 break;
21069 }
21070 /* Adjust properly the offset of src and dest memory for aliasing. */
21071 if (CONST_INT_P (count_exp))
21072 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21073 (count / size_needed) * size_needed);
21074 else
21075 dst = change_address (dst, BLKmode, destreg);
21076
21077 /* Step 4: Epilogue to copy the remaining bytes. */
21078
21079 if (label)
21080 {
21081 /* When the main loop is done, COUNT_EXP might hold original count,
21082 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21083 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21084 bytes. Compensate if needed. */
21085
21086 if (size_needed < epilogue_size_needed)
21087 {
21088 tmp =
21089 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21090 GEN_INT (size_needed - 1), count_exp, 1,
21091 OPTAB_DIRECT);
21092 if (tmp != count_exp)
21093 emit_move_insn (count_exp, tmp);
21094 }
21095 emit_label (label);
21096 LABEL_NUSES (label) = 1;
21097 }
21098 epilogue:
21099 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21100 {
21101 if (force_loopy_epilogue)
21102 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21103 epilogue_size_needed);
21104 else
21105 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21106 epilogue_size_needed);
21107 }
21108 if (jump_around_label)
21109 emit_label (jump_around_label);
21110 return true;
21111 }
21112
21113 /* Expand the appropriate insns for doing strlen if not just doing
21114 repnz; scasb
21115
21116 out = result, initialized with the start address
21117 align_rtx = alignment of the address.
21118 scratch = scratch register, initialized with the startaddress when
21119 not aligned, otherwise undefined
21120
21121 This is just the body. It needs the initializations mentioned above and
21122 some address computing at the end. These things are done in i386.md. */
21123
21124 static void
21125 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21126 {
21127 int align;
21128 rtx tmp;
21129 rtx align_2_label = NULL_RTX;
21130 rtx align_3_label = NULL_RTX;
21131 rtx align_4_label = gen_label_rtx ();
21132 rtx end_0_label = gen_label_rtx ();
21133 rtx mem;
21134 rtx tmpreg = gen_reg_rtx (SImode);
21135 rtx scratch = gen_reg_rtx (SImode);
21136 rtx cmp;
21137
21138 align = 0;
21139 if (CONST_INT_P (align_rtx))
21140 align = INTVAL (align_rtx);
21141
21142 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21143
21144 /* Is there a known alignment and is it less than 4? */
21145 if (align < 4)
21146 {
21147 rtx scratch1 = gen_reg_rtx (Pmode);
21148 emit_move_insn (scratch1, out);
21149 /* Is there a known alignment and is it not 2? */
21150 if (align != 2)
21151 {
21152 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21153 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21154
21155 /* Leave just the 3 lower bits. */
21156 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21157 NULL_RTX, 0, OPTAB_WIDEN);
21158
21159 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21160 Pmode, 1, align_4_label);
21161 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21162 Pmode, 1, align_2_label);
21163 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21164 Pmode, 1, align_3_label);
21165 }
21166 else
21167 {
21168 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21169 check if is aligned to 4 - byte. */
21170
21171 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21172 NULL_RTX, 0, OPTAB_WIDEN);
21173
21174 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21175 Pmode, 1, align_4_label);
21176 }
21177
21178 mem = change_address (src, QImode, out);
21179
21180 /* Now compare the bytes. */
21181
21182 /* Compare the first n unaligned byte on a byte per byte basis. */
21183 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21184 QImode, 1, end_0_label);
21185
21186 /* Increment the address. */
21187 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21188
21189 /* Not needed with an alignment of 2 */
21190 if (align != 2)
21191 {
21192 emit_label (align_2_label);
21193
21194 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21195 end_0_label);
21196
21197 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21198
21199 emit_label (align_3_label);
21200 }
21201
21202 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21203 end_0_label);
21204
21205 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21206 }
21207
21208 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21209 align this loop. It gives only huge programs, but does not help to
21210 speed up. */
21211 emit_label (align_4_label);
21212
21213 mem = change_address (src, SImode, out);
21214 emit_move_insn (scratch, mem);
21215 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21216
21217 /* This formula yields a nonzero result iff one of the bytes is zero.
21218 This saves three branches inside loop and many cycles. */
21219
21220 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21221 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21222 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21223 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21224 gen_int_mode (0x80808080, SImode)));
21225 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21226 align_4_label);
21227
21228 if (TARGET_CMOVE)
21229 {
21230 rtx reg = gen_reg_rtx (SImode);
21231 rtx reg2 = gen_reg_rtx (Pmode);
21232 emit_move_insn (reg, tmpreg);
21233 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21234
21235 /* If zero is not in the first two bytes, move two bytes forward. */
21236 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21237 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21238 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21239 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21240 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21241 reg,
21242 tmpreg)));
21243 /* Emit lea manually to avoid clobbering of flags. */
21244 emit_insn (gen_rtx_SET (SImode, reg2,
21245 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21246
21247 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21248 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21249 emit_insn (gen_rtx_SET (VOIDmode, out,
21250 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21251 reg2,
21252 out)));
21253 }
21254 else
21255 {
21256 rtx end_2_label = gen_label_rtx ();
21257 /* Is zero in the first two bytes? */
21258
21259 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21260 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21261 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21262 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21263 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21264 pc_rtx);
21265 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21266 JUMP_LABEL (tmp) = end_2_label;
21267
21268 /* Not in the first two. Move two bytes forward. */
21269 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21270 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21271
21272 emit_label (end_2_label);
21273
21274 }
21275
21276 /* Avoid branch in fixing the byte. */
21277 tmpreg = gen_lowpart (QImode, tmpreg);
21278 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21279 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21280 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21281 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21282
21283 emit_label (end_0_label);
21284 }
21285
21286 /* Expand strlen. */
21287
21288 bool
21289 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21290 {
21291 rtx addr, scratch1, scratch2, scratch3, scratch4;
21292
21293 /* The generic case of strlen expander is long. Avoid it's
21294 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21295
21296 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21297 && !TARGET_INLINE_ALL_STRINGOPS
21298 && !optimize_insn_for_size_p ()
21299 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21300 return false;
21301
21302 addr = force_reg (Pmode, XEXP (src, 0));
21303 scratch1 = gen_reg_rtx (Pmode);
21304
21305 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21306 && !optimize_insn_for_size_p ())
21307 {
21308 /* Well it seems that some optimizer does not combine a call like
21309 foo(strlen(bar), strlen(bar));
21310 when the move and the subtraction is done here. It does calculate
21311 the length just once when these instructions are done inside of
21312 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21313 often used and I use one fewer register for the lifetime of
21314 output_strlen_unroll() this is better. */
21315
21316 emit_move_insn (out, addr);
21317
21318 ix86_expand_strlensi_unroll_1 (out, src, align);
21319
21320 /* strlensi_unroll_1 returns the address of the zero at the end of
21321 the string, like memchr(), so compute the length by subtracting
21322 the start address. */
21323 emit_insn (ix86_gen_sub3 (out, out, addr));
21324 }
21325 else
21326 {
21327 rtx unspec;
21328
21329 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21330 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21331 return false;
21332
21333 scratch2 = gen_reg_rtx (Pmode);
21334 scratch3 = gen_reg_rtx (Pmode);
21335 scratch4 = force_reg (Pmode, constm1_rtx);
21336
21337 emit_move_insn (scratch3, addr);
21338 eoschar = force_reg (QImode, eoschar);
21339
21340 src = replace_equiv_address_nv (src, scratch3);
21341
21342 /* If .md starts supporting :P, this can be done in .md. */
21343 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21344 scratch4), UNSPEC_SCAS);
21345 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21346 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21347 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21348 }
21349 return true;
21350 }
21351
21352 /* For given symbol (function) construct code to compute address of it's PLT
21353 entry in large x86-64 PIC model. */
21354 rtx
21355 construct_plt_address (rtx symbol)
21356 {
21357 rtx tmp = gen_reg_rtx (Pmode);
21358 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21359
21360 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21361 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21362
21363 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21364 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21365 return tmp;
21366 }
21367
21368 rtx
21369 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21370 rtx callarg2,
21371 rtx pop, bool sibcall)
21372 {
21373 rtx use = NULL, call;
21374
21375 if (pop == const0_rtx)
21376 pop = NULL;
21377 gcc_assert (!TARGET_64BIT || !pop);
21378
21379 if (TARGET_MACHO && !TARGET_64BIT)
21380 {
21381 #if TARGET_MACHO
21382 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21383 fnaddr = machopic_indirect_call_target (fnaddr);
21384 #endif
21385 }
21386 else
21387 {
21388 /* Static functions and indirect calls don't need the pic register. */
21389 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21390 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21391 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21392 use_reg (&use, pic_offset_table_rtx);
21393 }
21394
21395 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21396 {
21397 rtx al = gen_rtx_REG (QImode, AX_REG);
21398 emit_move_insn (al, callarg2);
21399 use_reg (&use, al);
21400 }
21401
21402 if (ix86_cmodel == CM_LARGE_PIC
21403 && MEM_P (fnaddr)
21404 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21405 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21406 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21407 else if (sibcall
21408 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21409 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21410 {
21411 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21412 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21413 }
21414
21415 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21416 if (retval)
21417 call = gen_rtx_SET (VOIDmode, retval, call);
21418 if (pop)
21419 {
21420 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21421 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21422 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21423 }
21424 if (TARGET_64BIT_MS_ABI
21425 && (!callarg2 || INTVAL (callarg2) != -2))
21426 {
21427 /* We need to represent that SI and DI registers are clobbered
21428 by SYSV calls. */
21429 static int clobbered_registers[] = {
21430 XMM6_REG, XMM7_REG, XMM8_REG,
21431 XMM9_REG, XMM10_REG, XMM11_REG,
21432 XMM12_REG, XMM13_REG, XMM14_REG,
21433 XMM15_REG, SI_REG, DI_REG
21434 };
21435 unsigned int i;
21436 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21437 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21438 UNSPEC_MS_TO_SYSV_CALL);
21439
21440 vec[0] = call;
21441 vec[1] = unspec;
21442 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21443 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21444 ? TImode : DImode,
21445 gen_rtx_REG
21446 (SSE_REGNO_P (clobbered_registers[i])
21447 ? TImode : DImode,
21448 clobbered_registers[i]));
21449
21450 call = gen_rtx_PARALLEL (VOIDmode,
21451 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21452 + 2, vec));
21453 }
21454
21455 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21456 if (TARGET_VZEROUPPER)
21457 {
21458 rtx unspec;
21459 int avx256;
21460
21461 if (cfun->machine->callee_pass_avx256_p)
21462 {
21463 if (cfun->machine->callee_return_avx256_p)
21464 avx256 = callee_return_pass_avx256;
21465 else
21466 avx256 = callee_pass_avx256;
21467 }
21468 else if (cfun->machine->callee_return_avx256_p)
21469 avx256 = callee_return_avx256;
21470 else
21471 avx256 = call_no_avx256;
21472
21473 if (reload_completed)
21474 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21475 else
21476 {
21477 unspec = gen_rtx_UNSPEC (VOIDmode,
21478 gen_rtvec (1, GEN_INT (avx256)),
21479 UNSPEC_CALL_NEEDS_VZEROUPPER);
21480 call = gen_rtx_PARALLEL (VOIDmode,
21481 gen_rtvec (2, call, unspec));
21482 }
21483 }
21484
21485 call = emit_call_insn (call);
21486 if (use)
21487 CALL_INSN_FUNCTION_USAGE (call) = use;
21488
21489 return call;
21490 }
21491
21492 void
21493 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21494 {
21495 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21496 emit_insn (gen_avx_vzeroupper (vzeroupper));
21497 emit_call_insn (call);
21498 }
21499
21500 /* Output the assembly for a call instruction. */
21501
21502 const char *
21503 ix86_output_call_insn (rtx insn, rtx call_op)
21504 {
21505 bool direct_p = constant_call_address_operand (call_op, Pmode);
21506 bool seh_nop_p = false;
21507 const char *xasm;
21508
21509 if (SIBLING_CALL_P (insn))
21510 {
21511 if (direct_p)
21512 xasm = "jmp\t%P0";
21513 /* SEH epilogue detection requires the indirect branch case
21514 to include REX.W. */
21515 else if (TARGET_SEH)
21516 xasm = "rex.W jmp %A0";
21517 else
21518 xasm = "jmp\t%A0";
21519
21520 output_asm_insn (xasm, &call_op);
21521 return "";
21522 }
21523
21524 /* SEH unwinding can require an extra nop to be emitted in several
21525 circumstances. Determine if we have one of those. */
21526 if (TARGET_SEH)
21527 {
21528 rtx i;
21529
21530 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21531 {
21532 /* If we get to another real insn, we don't need the nop. */
21533 if (INSN_P (i))
21534 break;
21535
21536 /* If we get to the epilogue note, prevent a catch region from
21537 being adjacent to the standard epilogue sequence. If non-
21538 call-exceptions, we'll have done this during epilogue emission. */
21539 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21540 && !flag_non_call_exceptions
21541 && !can_throw_internal (insn))
21542 {
21543 seh_nop_p = true;
21544 break;
21545 }
21546 }
21547
21548 /* If we didn't find a real insn following the call, prevent the
21549 unwinder from looking into the next function. */
21550 if (i == NULL)
21551 seh_nop_p = true;
21552 }
21553
21554 if (direct_p)
21555 xasm = "call\t%P0";
21556 else
21557 xasm = "call\t%A0";
21558
21559 output_asm_insn (xasm, &call_op);
21560
21561 if (seh_nop_p)
21562 return "nop";
21563
21564 return "";
21565 }
21566 \f
21567 /* Clear stack slot assignments remembered from previous functions.
21568 This is called from INIT_EXPANDERS once before RTL is emitted for each
21569 function. */
21570
21571 static struct machine_function *
21572 ix86_init_machine_status (void)
21573 {
21574 struct machine_function *f;
21575
21576 f = ggc_alloc_cleared_machine_function ();
21577 f->use_fast_prologue_epilogue_nregs = -1;
21578 f->tls_descriptor_call_expanded_p = 0;
21579 f->call_abi = ix86_abi;
21580
21581 return f;
21582 }
21583
21584 /* Return a MEM corresponding to a stack slot with mode MODE.
21585 Allocate a new slot if necessary.
21586
21587 The RTL for a function can have several slots available: N is
21588 which slot to use. */
21589
21590 rtx
21591 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21592 {
21593 struct stack_local_entry *s;
21594
21595 gcc_assert (n < MAX_386_STACK_LOCALS);
21596
21597 /* Virtual slot is valid only before vregs are instantiated. */
21598 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21599
21600 for (s = ix86_stack_locals; s; s = s->next)
21601 if (s->mode == mode && s->n == n)
21602 return copy_rtx (s->rtl);
21603
21604 s = ggc_alloc_stack_local_entry ();
21605 s->n = n;
21606 s->mode = mode;
21607 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21608
21609 s->next = ix86_stack_locals;
21610 ix86_stack_locals = s;
21611 return s->rtl;
21612 }
21613 \f
21614 /* Calculate the length of the memory address in the instruction
21615 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21616
21617 int
21618 memory_address_length (rtx addr)
21619 {
21620 struct ix86_address parts;
21621 rtx base, index, disp;
21622 int len;
21623 int ok;
21624
21625 if (GET_CODE (addr) == PRE_DEC
21626 || GET_CODE (addr) == POST_INC
21627 || GET_CODE (addr) == PRE_MODIFY
21628 || GET_CODE (addr) == POST_MODIFY)
21629 return 0;
21630
21631 ok = ix86_decompose_address (addr, &parts);
21632 gcc_assert (ok);
21633
21634 if (parts.base && GET_CODE (parts.base) == SUBREG)
21635 parts.base = SUBREG_REG (parts.base);
21636 if (parts.index && GET_CODE (parts.index) == SUBREG)
21637 parts.index = SUBREG_REG (parts.index);
21638
21639 base = parts.base;
21640 index = parts.index;
21641 disp = parts.disp;
21642 len = 0;
21643
21644 /* Rule of thumb:
21645 - esp as the base always wants an index,
21646 - ebp as the base always wants a displacement,
21647 - r12 as the base always wants an index,
21648 - r13 as the base always wants a displacement. */
21649
21650 /* Register Indirect. */
21651 if (base && !index && !disp)
21652 {
21653 /* esp (for its index) and ebp (for its displacement) need
21654 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21655 code. */
21656 if (REG_P (addr)
21657 && (addr == arg_pointer_rtx
21658 || addr == frame_pointer_rtx
21659 || REGNO (addr) == SP_REG
21660 || REGNO (addr) == BP_REG
21661 || REGNO (addr) == R12_REG
21662 || REGNO (addr) == R13_REG))
21663 len = 1;
21664 }
21665
21666 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21667 is not disp32, but disp32(%rip), so for disp32
21668 SIB byte is needed, unless print_operand_address
21669 optimizes it into disp32(%rip) or (%rip) is implied
21670 by UNSPEC. */
21671 else if (disp && !base && !index)
21672 {
21673 len = 4;
21674 if (TARGET_64BIT)
21675 {
21676 rtx symbol = disp;
21677
21678 if (GET_CODE (disp) == CONST)
21679 symbol = XEXP (disp, 0);
21680 if (GET_CODE (symbol) == PLUS
21681 && CONST_INT_P (XEXP (symbol, 1)))
21682 symbol = XEXP (symbol, 0);
21683
21684 if (GET_CODE (symbol) != LABEL_REF
21685 && (GET_CODE (symbol) != SYMBOL_REF
21686 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21687 && (GET_CODE (symbol) != UNSPEC
21688 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21689 && XINT (symbol, 1) != UNSPEC_PCREL
21690 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21691 len += 1;
21692 }
21693 }
21694
21695 else
21696 {
21697 /* Find the length of the displacement constant. */
21698 if (disp)
21699 {
21700 if (base && satisfies_constraint_K (disp))
21701 len = 1;
21702 else
21703 len = 4;
21704 }
21705 /* ebp always wants a displacement. Similarly r13. */
21706 else if (base && REG_P (base)
21707 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21708 len = 1;
21709
21710 /* An index requires the two-byte modrm form.... */
21711 if (index
21712 /* ...like esp (or r12), which always wants an index. */
21713 || base == arg_pointer_rtx
21714 || base == frame_pointer_rtx
21715 || (base && REG_P (base)
21716 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21717 len += 1;
21718 }
21719
21720 switch (parts.seg)
21721 {
21722 case SEG_FS:
21723 case SEG_GS:
21724 len += 1;
21725 break;
21726 default:
21727 break;
21728 }
21729
21730 return len;
21731 }
21732
21733 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21734 is set, expect that insn have 8bit immediate alternative. */
21735 int
21736 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21737 {
21738 int len = 0;
21739 int i;
21740 extract_insn_cached (insn);
21741 for (i = recog_data.n_operands - 1; i >= 0; --i)
21742 if (CONSTANT_P (recog_data.operand[i]))
21743 {
21744 enum attr_mode mode = get_attr_mode (insn);
21745
21746 gcc_assert (!len);
21747 if (shortform && CONST_INT_P (recog_data.operand[i]))
21748 {
21749 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21750 switch (mode)
21751 {
21752 case MODE_QI:
21753 len = 1;
21754 continue;
21755 case MODE_HI:
21756 ival = trunc_int_for_mode (ival, HImode);
21757 break;
21758 case MODE_SI:
21759 ival = trunc_int_for_mode (ival, SImode);
21760 break;
21761 default:
21762 break;
21763 }
21764 if (IN_RANGE (ival, -128, 127))
21765 {
21766 len = 1;
21767 continue;
21768 }
21769 }
21770 switch (mode)
21771 {
21772 case MODE_QI:
21773 len = 1;
21774 break;
21775 case MODE_HI:
21776 len = 2;
21777 break;
21778 case MODE_SI:
21779 len = 4;
21780 break;
21781 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21782 case MODE_DI:
21783 len = 4;
21784 break;
21785 default:
21786 fatal_insn ("unknown insn mode", insn);
21787 }
21788 }
21789 return len;
21790 }
21791 /* Compute default value for "length_address" attribute. */
21792 int
21793 ix86_attr_length_address_default (rtx insn)
21794 {
21795 int i;
21796
21797 if (get_attr_type (insn) == TYPE_LEA)
21798 {
21799 rtx set = PATTERN (insn), addr;
21800
21801 if (GET_CODE (set) == PARALLEL)
21802 set = XVECEXP (set, 0, 0);
21803
21804 gcc_assert (GET_CODE (set) == SET);
21805
21806 addr = SET_SRC (set);
21807 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21808 {
21809 if (GET_CODE (addr) == ZERO_EXTEND)
21810 addr = XEXP (addr, 0);
21811 if (GET_CODE (addr) == SUBREG)
21812 addr = SUBREG_REG (addr);
21813 }
21814
21815 return memory_address_length (addr);
21816 }
21817
21818 extract_insn_cached (insn);
21819 for (i = recog_data.n_operands - 1; i >= 0; --i)
21820 if (MEM_P (recog_data.operand[i]))
21821 {
21822 constrain_operands_cached (reload_completed);
21823 if (which_alternative != -1)
21824 {
21825 const char *constraints = recog_data.constraints[i];
21826 int alt = which_alternative;
21827
21828 while (*constraints == '=' || *constraints == '+')
21829 constraints++;
21830 while (alt-- > 0)
21831 while (*constraints++ != ',')
21832 ;
21833 /* Skip ignored operands. */
21834 if (*constraints == 'X')
21835 continue;
21836 }
21837 return memory_address_length (XEXP (recog_data.operand[i], 0));
21838 }
21839 return 0;
21840 }
21841
21842 /* Compute default value for "length_vex" attribute. It includes
21843 2 or 3 byte VEX prefix and 1 opcode byte. */
21844
21845 int
21846 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
21847 {
21848 int i;
21849
21850 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21851 byte VEX prefix. */
21852 if (!has_0f_opcode || has_vex_w)
21853 return 3 + 1;
21854
21855 /* We can always use 2 byte VEX prefix in 32bit. */
21856 if (!TARGET_64BIT)
21857 return 2 + 1;
21858
21859 extract_insn_cached (insn);
21860
21861 for (i = recog_data.n_operands - 1; i >= 0; --i)
21862 if (REG_P (recog_data.operand[i]))
21863 {
21864 /* REX.W bit uses 3 byte VEX prefix. */
21865 if (GET_MODE (recog_data.operand[i]) == DImode
21866 && GENERAL_REG_P (recog_data.operand[i]))
21867 return 3 + 1;
21868 }
21869 else
21870 {
21871 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21872 if (MEM_P (recog_data.operand[i])
21873 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21874 return 3 + 1;
21875 }
21876
21877 return 2 + 1;
21878 }
21879 \f
21880 /* Return the maximum number of instructions a cpu can issue. */
21881
21882 static int
21883 ix86_issue_rate (void)
21884 {
21885 switch (ix86_tune)
21886 {
21887 case PROCESSOR_PENTIUM:
21888 case PROCESSOR_ATOM:
21889 case PROCESSOR_K6:
21890 return 2;
21891
21892 case PROCESSOR_PENTIUMPRO:
21893 case PROCESSOR_PENTIUM4:
21894 case PROCESSOR_CORE2_32:
21895 case PROCESSOR_CORE2_64:
21896 case PROCESSOR_COREI7_32:
21897 case PROCESSOR_COREI7_64:
21898 case PROCESSOR_ATHLON:
21899 case PROCESSOR_K8:
21900 case PROCESSOR_AMDFAM10:
21901 case PROCESSOR_NOCONA:
21902 case PROCESSOR_GENERIC32:
21903 case PROCESSOR_GENERIC64:
21904 case PROCESSOR_BDVER1:
21905 case PROCESSOR_BTVER1:
21906 return 3;
21907
21908 default:
21909 return 1;
21910 }
21911 }
21912
21913 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
21914 by DEP_INSN and nothing set by DEP_INSN. */
21915
21916 static bool
21917 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21918 {
21919 rtx set, set2;
21920
21921 /* Simplify the test for uninteresting insns. */
21922 if (insn_type != TYPE_SETCC
21923 && insn_type != TYPE_ICMOV
21924 && insn_type != TYPE_FCMOV
21925 && insn_type != TYPE_IBR)
21926 return false;
21927
21928 if ((set = single_set (dep_insn)) != 0)
21929 {
21930 set = SET_DEST (set);
21931 set2 = NULL_RTX;
21932 }
21933 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21934 && XVECLEN (PATTERN (dep_insn), 0) == 2
21935 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21936 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21937 {
21938 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21939 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21940 }
21941 else
21942 return false;
21943
21944 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21945 return false;
21946
21947 /* This test is true if the dependent insn reads the flags but
21948 not any other potentially set register. */
21949 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21950 return false;
21951
21952 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21953 return false;
21954
21955 return true;
21956 }
21957
21958 /* Return true iff USE_INSN has a memory address with operands set by
21959 SET_INSN. */
21960
21961 bool
21962 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21963 {
21964 int i;
21965 extract_insn_cached (use_insn);
21966 for (i = recog_data.n_operands - 1; i >= 0; --i)
21967 if (MEM_P (recog_data.operand[i]))
21968 {
21969 rtx addr = XEXP (recog_data.operand[i], 0);
21970 return modified_in_p (addr, set_insn) != 0;
21971 }
21972 return false;
21973 }
21974
21975 static int
21976 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21977 {
21978 enum attr_type insn_type, dep_insn_type;
21979 enum attr_memory memory;
21980 rtx set, set2;
21981 int dep_insn_code_number;
21982
21983 /* Anti and output dependencies have zero cost on all CPUs. */
21984 if (REG_NOTE_KIND (link) != 0)
21985 return 0;
21986
21987 dep_insn_code_number = recog_memoized (dep_insn);
21988
21989 /* If we can't recognize the insns, we can't really do anything. */
21990 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21991 return cost;
21992
21993 insn_type = get_attr_type (insn);
21994 dep_insn_type = get_attr_type (dep_insn);
21995
21996 switch (ix86_tune)
21997 {
21998 case PROCESSOR_PENTIUM:
21999 /* Address Generation Interlock adds a cycle of latency. */
22000 if (insn_type == TYPE_LEA)
22001 {
22002 rtx addr = PATTERN (insn);
22003
22004 if (GET_CODE (addr) == PARALLEL)
22005 addr = XVECEXP (addr, 0, 0);
22006
22007 gcc_assert (GET_CODE (addr) == SET);
22008
22009 addr = SET_SRC (addr);
22010 if (modified_in_p (addr, dep_insn))
22011 cost += 1;
22012 }
22013 else if (ix86_agi_dependent (dep_insn, insn))
22014 cost += 1;
22015
22016 /* ??? Compares pair with jump/setcc. */
22017 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22018 cost = 0;
22019
22020 /* Floating point stores require value to be ready one cycle earlier. */
22021 if (insn_type == TYPE_FMOV
22022 && get_attr_memory (insn) == MEMORY_STORE
22023 && !ix86_agi_dependent (dep_insn, insn))
22024 cost += 1;
22025 break;
22026
22027 case PROCESSOR_PENTIUMPRO:
22028 memory = get_attr_memory (insn);
22029
22030 /* INT->FP conversion is expensive. */
22031 if (get_attr_fp_int_src (dep_insn))
22032 cost += 5;
22033
22034 /* There is one cycle extra latency between an FP op and a store. */
22035 if (insn_type == TYPE_FMOV
22036 && (set = single_set (dep_insn)) != NULL_RTX
22037 && (set2 = single_set (insn)) != NULL_RTX
22038 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22039 && MEM_P (SET_DEST (set2)))
22040 cost += 1;
22041
22042 /* Show ability of reorder buffer to hide latency of load by executing
22043 in parallel with previous instruction in case
22044 previous instruction is not needed to compute the address. */
22045 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22046 && !ix86_agi_dependent (dep_insn, insn))
22047 {
22048 /* Claim moves to take one cycle, as core can issue one load
22049 at time and the next load can start cycle later. */
22050 if (dep_insn_type == TYPE_IMOV
22051 || dep_insn_type == TYPE_FMOV)
22052 cost = 1;
22053 else if (cost > 1)
22054 cost--;
22055 }
22056 break;
22057
22058 case PROCESSOR_K6:
22059 memory = get_attr_memory (insn);
22060
22061 /* The esp dependency is resolved before the instruction is really
22062 finished. */
22063 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22064 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22065 return 1;
22066
22067 /* INT->FP conversion is expensive. */
22068 if (get_attr_fp_int_src (dep_insn))
22069 cost += 5;
22070
22071 /* Show ability of reorder buffer to hide latency of load by executing
22072 in parallel with previous instruction in case
22073 previous instruction is not needed to compute the address. */
22074 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22075 && !ix86_agi_dependent (dep_insn, insn))
22076 {
22077 /* Claim moves to take one cycle, as core can issue one load
22078 at time and the next load can start cycle later. */
22079 if (dep_insn_type == TYPE_IMOV
22080 || dep_insn_type == TYPE_FMOV)
22081 cost = 1;
22082 else if (cost > 2)
22083 cost -= 2;
22084 else
22085 cost = 1;
22086 }
22087 break;
22088
22089 case PROCESSOR_ATHLON:
22090 case PROCESSOR_K8:
22091 case PROCESSOR_AMDFAM10:
22092 case PROCESSOR_BDVER1:
22093 case PROCESSOR_BTVER1:
22094 case PROCESSOR_ATOM:
22095 case PROCESSOR_GENERIC32:
22096 case PROCESSOR_GENERIC64:
22097 memory = get_attr_memory (insn);
22098
22099 /* Show ability of reorder buffer to hide latency of load by executing
22100 in parallel with previous instruction in case
22101 previous instruction is not needed to compute the address. */
22102 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22103 && !ix86_agi_dependent (dep_insn, insn))
22104 {
22105 enum attr_unit unit = get_attr_unit (insn);
22106 int loadcost = 3;
22107
22108 /* Because of the difference between the length of integer and
22109 floating unit pipeline preparation stages, the memory operands
22110 for floating point are cheaper.
22111
22112 ??? For Athlon it the difference is most probably 2. */
22113 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22114 loadcost = 3;
22115 else
22116 loadcost = TARGET_ATHLON ? 2 : 0;
22117
22118 if (cost >= loadcost)
22119 cost -= loadcost;
22120 else
22121 cost = 0;
22122 }
22123
22124 default:
22125 break;
22126 }
22127
22128 return cost;
22129 }
22130
22131 /* How many alternative schedules to try. This should be as wide as the
22132 scheduling freedom in the DFA, but no wider. Making this value too
22133 large results extra work for the scheduler. */
22134
22135 static int
22136 ia32_multipass_dfa_lookahead (void)
22137 {
22138 switch (ix86_tune)
22139 {
22140 case PROCESSOR_PENTIUM:
22141 return 2;
22142
22143 case PROCESSOR_PENTIUMPRO:
22144 case PROCESSOR_K6:
22145 return 1;
22146
22147 case PROCESSOR_CORE2_32:
22148 case PROCESSOR_CORE2_64:
22149 case PROCESSOR_COREI7_32:
22150 case PROCESSOR_COREI7_64:
22151 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22152 as many instructions can be executed on a cycle, i.e.,
22153 issue_rate. I wonder why tuning for many CPUs does not do this. */
22154 return ix86_issue_rate ();
22155
22156 default:
22157 return 0;
22158 }
22159 }
22160
22161 \f
22162
22163 /* Model decoder of Core 2/i7.
22164 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22165 track the instruction fetch block boundaries and make sure that long
22166 (9+ bytes) instructions are assigned to D0. */
22167
22168 /* Maximum length of an insn that can be handled by
22169 a secondary decoder unit. '8' for Core 2/i7. */
22170 static int core2i7_secondary_decoder_max_insn_size;
22171
22172 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22173 '16' for Core 2/i7. */
22174 static int core2i7_ifetch_block_size;
22175
22176 /* Maximum number of instructions decoder can handle per cycle.
22177 '6' for Core 2/i7. */
22178 static int core2i7_ifetch_block_max_insns;
22179
22180 typedef struct ix86_first_cycle_multipass_data_ *
22181 ix86_first_cycle_multipass_data_t;
22182 typedef const struct ix86_first_cycle_multipass_data_ *
22183 const_ix86_first_cycle_multipass_data_t;
22184
22185 /* A variable to store target state across calls to max_issue within
22186 one cycle. */
22187 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22188 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22189
22190 /* Initialize DATA. */
22191 static void
22192 core2i7_first_cycle_multipass_init (void *_data)
22193 {
22194 ix86_first_cycle_multipass_data_t data
22195 = (ix86_first_cycle_multipass_data_t) _data;
22196
22197 data->ifetch_block_len = 0;
22198 data->ifetch_block_n_insns = 0;
22199 data->ready_try_change = NULL;
22200 data->ready_try_change_size = 0;
22201 }
22202
22203 /* Advancing the cycle; reset ifetch block counts. */
22204 static void
22205 core2i7_dfa_post_advance_cycle (void)
22206 {
22207 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22208
22209 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22210
22211 data->ifetch_block_len = 0;
22212 data->ifetch_block_n_insns = 0;
22213 }
22214
22215 static int min_insn_size (rtx);
22216
22217 /* Filter out insns from ready_try that the core will not be able to issue
22218 on current cycle due to decoder. */
22219 static void
22220 core2i7_first_cycle_multipass_filter_ready_try
22221 (const_ix86_first_cycle_multipass_data_t data,
22222 char *ready_try, int n_ready, bool first_cycle_insn_p)
22223 {
22224 while (n_ready--)
22225 {
22226 rtx insn;
22227 int insn_size;
22228
22229 if (ready_try[n_ready])
22230 continue;
22231
22232 insn = get_ready_element (n_ready);
22233 insn_size = min_insn_size (insn);
22234
22235 if (/* If this is a too long an insn for a secondary decoder ... */
22236 (!first_cycle_insn_p
22237 && insn_size > core2i7_secondary_decoder_max_insn_size)
22238 /* ... or it would not fit into the ifetch block ... */
22239 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22240 /* ... or the decoder is full already ... */
22241 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22242 /* ... mask the insn out. */
22243 {
22244 ready_try[n_ready] = 1;
22245
22246 if (data->ready_try_change)
22247 SET_BIT (data->ready_try_change, n_ready);
22248 }
22249 }
22250 }
22251
22252 /* Prepare for a new round of multipass lookahead scheduling. */
22253 static void
22254 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22255 bool first_cycle_insn_p)
22256 {
22257 ix86_first_cycle_multipass_data_t data
22258 = (ix86_first_cycle_multipass_data_t) _data;
22259 const_ix86_first_cycle_multipass_data_t prev_data
22260 = ix86_first_cycle_multipass_data;
22261
22262 /* Restore the state from the end of the previous round. */
22263 data->ifetch_block_len = prev_data->ifetch_block_len;
22264 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22265
22266 /* Filter instructions that cannot be issued on current cycle due to
22267 decoder restrictions. */
22268 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22269 first_cycle_insn_p);
22270 }
22271
22272 /* INSN is being issued in current solution. Account for its impact on
22273 the decoder model. */
22274 static void
22275 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22276 rtx insn, const void *_prev_data)
22277 {
22278 ix86_first_cycle_multipass_data_t data
22279 = (ix86_first_cycle_multipass_data_t) _data;
22280 const_ix86_first_cycle_multipass_data_t prev_data
22281 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22282
22283 int insn_size = min_insn_size (insn);
22284
22285 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22286 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22287 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22288 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22289
22290 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22291 if (!data->ready_try_change)
22292 {
22293 data->ready_try_change = sbitmap_alloc (n_ready);
22294 data->ready_try_change_size = n_ready;
22295 }
22296 else if (data->ready_try_change_size < n_ready)
22297 {
22298 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22299 n_ready, 0);
22300 data->ready_try_change_size = n_ready;
22301 }
22302 sbitmap_zero (data->ready_try_change);
22303
22304 /* Filter out insns from ready_try that the core will not be able to issue
22305 on current cycle due to decoder. */
22306 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22307 false);
22308 }
22309
22310 /* Revert the effect on ready_try. */
22311 static void
22312 core2i7_first_cycle_multipass_backtrack (const void *_data,
22313 char *ready_try,
22314 int n_ready ATTRIBUTE_UNUSED)
22315 {
22316 const_ix86_first_cycle_multipass_data_t data
22317 = (const_ix86_first_cycle_multipass_data_t) _data;
22318 unsigned int i = 0;
22319 sbitmap_iterator sbi;
22320
22321 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22322 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22323 {
22324 ready_try[i] = 0;
22325 }
22326 }
22327
22328 /* Save the result of multipass lookahead scheduling for the next round. */
22329 static void
22330 core2i7_first_cycle_multipass_end (const void *_data)
22331 {
22332 const_ix86_first_cycle_multipass_data_t data
22333 = (const_ix86_first_cycle_multipass_data_t) _data;
22334 ix86_first_cycle_multipass_data_t next_data
22335 = ix86_first_cycle_multipass_data;
22336
22337 if (data != NULL)
22338 {
22339 next_data->ifetch_block_len = data->ifetch_block_len;
22340 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22341 }
22342 }
22343
22344 /* Deallocate target data. */
22345 static void
22346 core2i7_first_cycle_multipass_fini (void *_data)
22347 {
22348 ix86_first_cycle_multipass_data_t data
22349 = (ix86_first_cycle_multipass_data_t) _data;
22350
22351 if (data->ready_try_change)
22352 {
22353 sbitmap_free (data->ready_try_change);
22354 data->ready_try_change = NULL;
22355 data->ready_try_change_size = 0;
22356 }
22357 }
22358
22359 /* Prepare for scheduling pass. */
22360 static void
22361 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22362 int verbose ATTRIBUTE_UNUSED,
22363 int max_uid ATTRIBUTE_UNUSED)
22364 {
22365 /* Install scheduling hooks for current CPU. Some of these hooks are used
22366 in time-critical parts of the scheduler, so we only set them up when
22367 they are actually used. */
22368 switch (ix86_tune)
22369 {
22370 case PROCESSOR_CORE2_32:
22371 case PROCESSOR_CORE2_64:
22372 case PROCESSOR_COREI7_32:
22373 case PROCESSOR_COREI7_64:
22374 targetm.sched.dfa_post_advance_cycle
22375 = core2i7_dfa_post_advance_cycle;
22376 targetm.sched.first_cycle_multipass_init
22377 = core2i7_first_cycle_multipass_init;
22378 targetm.sched.first_cycle_multipass_begin
22379 = core2i7_first_cycle_multipass_begin;
22380 targetm.sched.first_cycle_multipass_issue
22381 = core2i7_first_cycle_multipass_issue;
22382 targetm.sched.first_cycle_multipass_backtrack
22383 = core2i7_first_cycle_multipass_backtrack;
22384 targetm.sched.first_cycle_multipass_end
22385 = core2i7_first_cycle_multipass_end;
22386 targetm.sched.first_cycle_multipass_fini
22387 = core2i7_first_cycle_multipass_fini;
22388
22389 /* Set decoder parameters. */
22390 core2i7_secondary_decoder_max_insn_size = 8;
22391 core2i7_ifetch_block_size = 16;
22392 core2i7_ifetch_block_max_insns = 6;
22393 break;
22394
22395 default:
22396 targetm.sched.dfa_post_advance_cycle = NULL;
22397 targetm.sched.first_cycle_multipass_init = NULL;
22398 targetm.sched.first_cycle_multipass_begin = NULL;
22399 targetm.sched.first_cycle_multipass_issue = NULL;
22400 targetm.sched.first_cycle_multipass_backtrack = NULL;
22401 targetm.sched.first_cycle_multipass_end = NULL;
22402 targetm.sched.first_cycle_multipass_fini = NULL;
22403 break;
22404 }
22405 }
22406
22407 \f
22408 /* Compute the alignment given to a constant that is being placed in memory.
22409 EXP is the constant and ALIGN is the alignment that the object would
22410 ordinarily have.
22411 The value of this function is used instead of that alignment to align
22412 the object. */
22413
22414 int
22415 ix86_constant_alignment (tree exp, int align)
22416 {
22417 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22418 || TREE_CODE (exp) == INTEGER_CST)
22419 {
22420 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22421 return 64;
22422 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22423 return 128;
22424 }
22425 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22426 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22427 return BITS_PER_WORD;
22428
22429 return align;
22430 }
22431
22432 /* Compute the alignment for a static variable.
22433 TYPE is the data type, and ALIGN is the alignment that
22434 the object would ordinarily have. The value of this function is used
22435 instead of that alignment to align the object. */
22436
22437 int
22438 ix86_data_alignment (tree type, int align)
22439 {
22440 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22441
22442 if (AGGREGATE_TYPE_P (type)
22443 && TYPE_SIZE (type)
22444 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22445 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22446 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22447 && align < max_align)
22448 align = max_align;
22449
22450 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22451 to 16byte boundary. */
22452 if (TARGET_64BIT)
22453 {
22454 if (AGGREGATE_TYPE_P (type)
22455 && TYPE_SIZE (type)
22456 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22457 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22458 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22459 return 128;
22460 }
22461
22462 if (TREE_CODE (type) == ARRAY_TYPE)
22463 {
22464 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22465 return 64;
22466 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22467 return 128;
22468 }
22469 else if (TREE_CODE (type) == COMPLEX_TYPE)
22470 {
22471
22472 if (TYPE_MODE (type) == DCmode && align < 64)
22473 return 64;
22474 if ((TYPE_MODE (type) == XCmode
22475 || TYPE_MODE (type) == TCmode) && align < 128)
22476 return 128;
22477 }
22478 else if ((TREE_CODE (type) == RECORD_TYPE
22479 || TREE_CODE (type) == UNION_TYPE
22480 || TREE_CODE (type) == QUAL_UNION_TYPE)
22481 && TYPE_FIELDS (type))
22482 {
22483 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22484 return 64;
22485 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22486 return 128;
22487 }
22488 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22489 || TREE_CODE (type) == INTEGER_TYPE)
22490 {
22491 if (TYPE_MODE (type) == DFmode && align < 64)
22492 return 64;
22493 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22494 return 128;
22495 }
22496
22497 return align;
22498 }
22499
22500 /* Compute the alignment for a local variable or a stack slot. EXP is
22501 the data type or decl itself, MODE is the widest mode available and
22502 ALIGN is the alignment that the object would ordinarily have. The
22503 value of this macro is used instead of that alignment to align the
22504 object. */
22505
22506 unsigned int
22507 ix86_local_alignment (tree exp, enum machine_mode mode,
22508 unsigned int align)
22509 {
22510 tree type, decl;
22511
22512 if (exp && DECL_P (exp))
22513 {
22514 type = TREE_TYPE (exp);
22515 decl = exp;
22516 }
22517 else
22518 {
22519 type = exp;
22520 decl = NULL;
22521 }
22522
22523 /* Don't do dynamic stack realignment for long long objects with
22524 -mpreferred-stack-boundary=2. */
22525 if (!TARGET_64BIT
22526 && align == 64
22527 && ix86_preferred_stack_boundary < 64
22528 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22529 && (!type || !TYPE_USER_ALIGN (type))
22530 && (!decl || !DECL_USER_ALIGN (decl)))
22531 align = 32;
22532
22533 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22534 register in MODE. We will return the largest alignment of XF
22535 and DF. */
22536 if (!type)
22537 {
22538 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22539 align = GET_MODE_ALIGNMENT (DFmode);
22540 return align;
22541 }
22542
22543 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22544 to 16byte boundary. Exact wording is:
22545
22546 An array uses the same alignment as its elements, except that a local or
22547 global array variable of length at least 16 bytes or
22548 a C99 variable-length array variable always has alignment of at least 16 bytes.
22549
22550 This was added to allow use of aligned SSE instructions at arrays. This
22551 rule is meant for static storage (where compiler can not do the analysis
22552 by itself). We follow it for automatic variables only when convenient.
22553 We fully control everything in the function compiled and functions from
22554 other unit can not rely on the alignment.
22555
22556 Exclude va_list type. It is the common case of local array where
22557 we can not benefit from the alignment. */
22558 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22559 && TARGET_SSE)
22560 {
22561 if (AGGREGATE_TYPE_P (type)
22562 && (va_list_type_node == NULL_TREE
22563 || (TYPE_MAIN_VARIANT (type)
22564 != TYPE_MAIN_VARIANT (va_list_type_node)))
22565 && TYPE_SIZE (type)
22566 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22567 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22568 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22569 return 128;
22570 }
22571 if (TREE_CODE (type) == ARRAY_TYPE)
22572 {
22573 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22574 return 64;
22575 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22576 return 128;
22577 }
22578 else if (TREE_CODE (type) == COMPLEX_TYPE)
22579 {
22580 if (TYPE_MODE (type) == DCmode && align < 64)
22581 return 64;
22582 if ((TYPE_MODE (type) == XCmode
22583 || TYPE_MODE (type) == TCmode) && align < 128)
22584 return 128;
22585 }
22586 else if ((TREE_CODE (type) == RECORD_TYPE
22587 || TREE_CODE (type) == UNION_TYPE
22588 || TREE_CODE (type) == QUAL_UNION_TYPE)
22589 && TYPE_FIELDS (type))
22590 {
22591 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22592 return 64;
22593 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22594 return 128;
22595 }
22596 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22597 || TREE_CODE (type) == INTEGER_TYPE)
22598 {
22599
22600 if (TYPE_MODE (type) == DFmode && align < 64)
22601 return 64;
22602 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22603 return 128;
22604 }
22605 return align;
22606 }
22607
22608 /* Compute the minimum required alignment for dynamic stack realignment
22609 purposes for a local variable, parameter or a stack slot. EXP is
22610 the data type or decl itself, MODE is its mode and ALIGN is the
22611 alignment that the object would ordinarily have. */
22612
22613 unsigned int
22614 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22615 unsigned int align)
22616 {
22617 tree type, decl;
22618
22619 if (exp && DECL_P (exp))
22620 {
22621 type = TREE_TYPE (exp);
22622 decl = exp;
22623 }
22624 else
22625 {
22626 type = exp;
22627 decl = NULL;
22628 }
22629
22630 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22631 return align;
22632
22633 /* Don't do dynamic stack realignment for long long objects with
22634 -mpreferred-stack-boundary=2. */
22635 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22636 && (!type || !TYPE_USER_ALIGN (type))
22637 && (!decl || !DECL_USER_ALIGN (decl)))
22638 return 32;
22639
22640 return align;
22641 }
22642 \f
22643 /* Find a location for the static chain incoming to a nested function.
22644 This is a register, unless all free registers are used by arguments. */
22645
22646 static rtx
22647 ix86_static_chain (const_tree fndecl, bool incoming_p)
22648 {
22649 unsigned regno;
22650
22651 if (!DECL_STATIC_CHAIN (fndecl))
22652 return NULL;
22653
22654 if (TARGET_64BIT)
22655 {
22656 /* We always use R10 in 64-bit mode. */
22657 regno = R10_REG;
22658 }
22659 else
22660 {
22661 tree fntype;
22662 unsigned int ccvt;
22663
22664 /* By default in 32-bit mode we use ECX to pass the static chain. */
22665 regno = CX_REG;
22666
22667 fntype = TREE_TYPE (fndecl);
22668 ccvt = ix86_get_callcvt (fntype);
22669 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22670 {
22671 /* Fastcall functions use ecx/edx for arguments, which leaves
22672 us with EAX for the static chain.
22673 Thiscall functions use ecx for arguments, which also
22674 leaves us with EAX for the static chain. */
22675 regno = AX_REG;
22676 }
22677 else if (ix86_function_regparm (fntype, fndecl) == 3)
22678 {
22679 /* For regparm 3, we have no free call-clobbered registers in
22680 which to store the static chain. In order to implement this,
22681 we have the trampoline push the static chain to the stack.
22682 However, we can't push a value below the return address when
22683 we call the nested function directly, so we have to use an
22684 alternate entry point. For this we use ESI, and have the
22685 alternate entry point push ESI, so that things appear the
22686 same once we're executing the nested function. */
22687 if (incoming_p)
22688 {
22689 if (fndecl == current_function_decl)
22690 ix86_static_chain_on_stack = true;
22691 return gen_frame_mem (SImode,
22692 plus_constant (arg_pointer_rtx, -8));
22693 }
22694 regno = SI_REG;
22695 }
22696 }
22697
22698 return gen_rtx_REG (Pmode, regno);
22699 }
22700
22701 /* Emit RTL insns to initialize the variable parts of a trampoline.
22702 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22703 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22704 to be passed to the target function. */
22705
22706 static void
22707 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22708 {
22709 rtx mem, fnaddr;
22710
22711 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22712
22713 if (!TARGET_64BIT)
22714 {
22715 rtx disp, chain;
22716 int opcode;
22717
22718 /* Depending on the static chain location, either load a register
22719 with a constant, or push the constant to the stack. All of the
22720 instructions are the same size. */
22721 chain = ix86_static_chain (fndecl, true);
22722 if (REG_P (chain))
22723 {
22724 if (REGNO (chain) == CX_REG)
22725 opcode = 0xb9;
22726 else if (REGNO (chain) == AX_REG)
22727 opcode = 0xb8;
22728 else
22729 gcc_unreachable ();
22730 }
22731 else
22732 opcode = 0x68;
22733
22734 mem = adjust_address (m_tramp, QImode, 0);
22735 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22736
22737 mem = adjust_address (m_tramp, SImode, 1);
22738 emit_move_insn (mem, chain_value);
22739
22740 /* Compute offset from the end of the jmp to the target function.
22741 In the case in which the trampoline stores the static chain on
22742 the stack, we need to skip the first insn which pushes the
22743 (call-saved) register static chain; this push is 1 byte. */
22744 disp = expand_binop (SImode, sub_optab, fnaddr,
22745 plus_constant (XEXP (m_tramp, 0),
22746 MEM_P (chain) ? 9 : 10),
22747 NULL_RTX, 1, OPTAB_DIRECT);
22748
22749 mem = adjust_address (m_tramp, QImode, 5);
22750 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22751
22752 mem = adjust_address (m_tramp, SImode, 6);
22753 emit_move_insn (mem, disp);
22754 }
22755 else
22756 {
22757 int offset = 0;
22758
22759 /* Load the function address to r11. Try to load address using
22760 the shorter movl instead of movabs. We may want to support
22761 movq for kernel mode, but kernel does not use trampolines at
22762 the moment. */
22763 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22764 {
22765 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22766
22767 mem = adjust_address (m_tramp, HImode, offset);
22768 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22769
22770 mem = adjust_address (m_tramp, SImode, offset + 2);
22771 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22772 offset += 6;
22773 }
22774 else
22775 {
22776 mem = adjust_address (m_tramp, HImode, offset);
22777 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22778
22779 mem = adjust_address (m_tramp, DImode, offset + 2);
22780 emit_move_insn (mem, fnaddr);
22781 offset += 10;
22782 }
22783
22784 /* Load static chain using movabs to r10. */
22785 mem = adjust_address (m_tramp, HImode, offset);
22786 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
22787
22788 mem = adjust_address (m_tramp, DImode, offset + 2);
22789 emit_move_insn (mem, chain_value);
22790 offset += 10;
22791
22792 /* Jump to r11; the last (unused) byte is a nop, only there to
22793 pad the write out to a single 32-bit store. */
22794 mem = adjust_address (m_tramp, SImode, offset);
22795 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22796 offset += 4;
22797
22798 gcc_assert (offset <= TRAMPOLINE_SIZE);
22799 }
22800
22801 #ifdef HAVE_ENABLE_EXECUTE_STACK
22802 #ifdef CHECK_EXECUTE_STACK_ENABLED
22803 if (CHECK_EXECUTE_STACK_ENABLED)
22804 #endif
22805 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22806 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22807 #endif
22808 }
22809 \f
22810 /* The following file contains several enumerations and data structures
22811 built from the definitions in i386-builtin-types.def. */
22812
22813 #include "i386-builtin-types.inc"
22814
22815 /* Table for the ix86 builtin non-function types. */
22816 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22817
22818 /* Retrieve an element from the above table, building some of
22819 the types lazily. */
22820
22821 static tree
22822 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22823 {
22824 unsigned int index;
22825 tree type, itype;
22826
22827 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22828
22829 type = ix86_builtin_type_tab[(int) tcode];
22830 if (type != NULL)
22831 return type;
22832
22833 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22834 if (tcode <= IX86_BT_LAST_VECT)
22835 {
22836 enum machine_mode mode;
22837
22838 index = tcode - IX86_BT_LAST_PRIM - 1;
22839 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22840 mode = ix86_builtin_type_vect_mode[index];
22841
22842 type = build_vector_type_for_mode (itype, mode);
22843 }
22844 else
22845 {
22846 int quals;
22847
22848 index = tcode - IX86_BT_LAST_VECT - 1;
22849 if (tcode <= IX86_BT_LAST_PTR)
22850 quals = TYPE_UNQUALIFIED;
22851 else
22852 quals = TYPE_QUAL_CONST;
22853
22854 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22855 if (quals != TYPE_UNQUALIFIED)
22856 itype = build_qualified_type (itype, quals);
22857
22858 type = build_pointer_type (itype);
22859 }
22860
22861 ix86_builtin_type_tab[(int) tcode] = type;
22862 return type;
22863 }
22864
22865 /* Table for the ix86 builtin function types. */
22866 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
22867
22868 /* Retrieve an element from the above table, building some of
22869 the types lazily. */
22870
22871 static tree
22872 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
22873 {
22874 tree type;
22875
22876 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
22877
22878 type = ix86_builtin_func_type_tab[(int) tcode];
22879 if (type != NULL)
22880 return type;
22881
22882 if (tcode <= IX86_BT_LAST_FUNC)
22883 {
22884 unsigned start = ix86_builtin_func_start[(int) tcode];
22885 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
22886 tree rtype, atype, args = void_list_node;
22887 unsigned i;
22888
22889 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
22890 for (i = after - 1; i > start; --i)
22891 {
22892 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22893 args = tree_cons (NULL, atype, args);
22894 }
22895
22896 type = build_function_type (rtype, args);
22897 }
22898 else
22899 {
22900 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
22901 enum ix86_builtin_func_type icode;
22902
22903 icode = ix86_builtin_func_alias_base[index];
22904 type = ix86_get_builtin_func_type (icode);
22905 }
22906
22907 ix86_builtin_func_type_tab[(int) tcode] = type;
22908 return type;
22909 }
22910
22911
22912 /* Codes for all the SSE/MMX builtins. */
22913 enum ix86_builtins
22914 {
22915 IX86_BUILTIN_ADDPS,
22916 IX86_BUILTIN_ADDSS,
22917 IX86_BUILTIN_DIVPS,
22918 IX86_BUILTIN_DIVSS,
22919 IX86_BUILTIN_MULPS,
22920 IX86_BUILTIN_MULSS,
22921 IX86_BUILTIN_SUBPS,
22922 IX86_BUILTIN_SUBSS,
22923
22924 IX86_BUILTIN_CMPEQPS,
22925 IX86_BUILTIN_CMPLTPS,
22926 IX86_BUILTIN_CMPLEPS,
22927 IX86_BUILTIN_CMPGTPS,
22928 IX86_BUILTIN_CMPGEPS,
22929 IX86_BUILTIN_CMPNEQPS,
22930 IX86_BUILTIN_CMPNLTPS,
22931 IX86_BUILTIN_CMPNLEPS,
22932 IX86_BUILTIN_CMPNGTPS,
22933 IX86_BUILTIN_CMPNGEPS,
22934 IX86_BUILTIN_CMPORDPS,
22935 IX86_BUILTIN_CMPUNORDPS,
22936 IX86_BUILTIN_CMPEQSS,
22937 IX86_BUILTIN_CMPLTSS,
22938 IX86_BUILTIN_CMPLESS,
22939 IX86_BUILTIN_CMPNEQSS,
22940 IX86_BUILTIN_CMPNLTSS,
22941 IX86_BUILTIN_CMPNLESS,
22942 IX86_BUILTIN_CMPNGTSS,
22943 IX86_BUILTIN_CMPNGESS,
22944 IX86_BUILTIN_CMPORDSS,
22945 IX86_BUILTIN_CMPUNORDSS,
22946
22947 IX86_BUILTIN_COMIEQSS,
22948 IX86_BUILTIN_COMILTSS,
22949 IX86_BUILTIN_COMILESS,
22950 IX86_BUILTIN_COMIGTSS,
22951 IX86_BUILTIN_COMIGESS,
22952 IX86_BUILTIN_COMINEQSS,
22953 IX86_BUILTIN_UCOMIEQSS,
22954 IX86_BUILTIN_UCOMILTSS,
22955 IX86_BUILTIN_UCOMILESS,
22956 IX86_BUILTIN_UCOMIGTSS,
22957 IX86_BUILTIN_UCOMIGESS,
22958 IX86_BUILTIN_UCOMINEQSS,
22959
22960 IX86_BUILTIN_CVTPI2PS,
22961 IX86_BUILTIN_CVTPS2PI,
22962 IX86_BUILTIN_CVTSI2SS,
22963 IX86_BUILTIN_CVTSI642SS,
22964 IX86_BUILTIN_CVTSS2SI,
22965 IX86_BUILTIN_CVTSS2SI64,
22966 IX86_BUILTIN_CVTTPS2PI,
22967 IX86_BUILTIN_CVTTSS2SI,
22968 IX86_BUILTIN_CVTTSS2SI64,
22969
22970 IX86_BUILTIN_MAXPS,
22971 IX86_BUILTIN_MAXSS,
22972 IX86_BUILTIN_MINPS,
22973 IX86_BUILTIN_MINSS,
22974
22975 IX86_BUILTIN_LOADUPS,
22976 IX86_BUILTIN_STOREUPS,
22977 IX86_BUILTIN_MOVSS,
22978
22979 IX86_BUILTIN_MOVHLPS,
22980 IX86_BUILTIN_MOVLHPS,
22981 IX86_BUILTIN_LOADHPS,
22982 IX86_BUILTIN_LOADLPS,
22983 IX86_BUILTIN_STOREHPS,
22984 IX86_BUILTIN_STORELPS,
22985
22986 IX86_BUILTIN_MASKMOVQ,
22987 IX86_BUILTIN_MOVMSKPS,
22988 IX86_BUILTIN_PMOVMSKB,
22989
22990 IX86_BUILTIN_MOVNTPS,
22991 IX86_BUILTIN_MOVNTQ,
22992
22993 IX86_BUILTIN_LOADDQU,
22994 IX86_BUILTIN_STOREDQU,
22995
22996 IX86_BUILTIN_PACKSSWB,
22997 IX86_BUILTIN_PACKSSDW,
22998 IX86_BUILTIN_PACKUSWB,
22999
23000 IX86_BUILTIN_PADDB,
23001 IX86_BUILTIN_PADDW,
23002 IX86_BUILTIN_PADDD,
23003 IX86_BUILTIN_PADDQ,
23004 IX86_BUILTIN_PADDSB,
23005 IX86_BUILTIN_PADDSW,
23006 IX86_BUILTIN_PADDUSB,
23007 IX86_BUILTIN_PADDUSW,
23008 IX86_BUILTIN_PSUBB,
23009 IX86_BUILTIN_PSUBW,
23010 IX86_BUILTIN_PSUBD,
23011 IX86_BUILTIN_PSUBQ,
23012 IX86_BUILTIN_PSUBSB,
23013 IX86_BUILTIN_PSUBSW,
23014 IX86_BUILTIN_PSUBUSB,
23015 IX86_BUILTIN_PSUBUSW,
23016
23017 IX86_BUILTIN_PAND,
23018 IX86_BUILTIN_PANDN,
23019 IX86_BUILTIN_POR,
23020 IX86_BUILTIN_PXOR,
23021
23022 IX86_BUILTIN_PAVGB,
23023 IX86_BUILTIN_PAVGW,
23024
23025 IX86_BUILTIN_PCMPEQB,
23026 IX86_BUILTIN_PCMPEQW,
23027 IX86_BUILTIN_PCMPEQD,
23028 IX86_BUILTIN_PCMPGTB,
23029 IX86_BUILTIN_PCMPGTW,
23030 IX86_BUILTIN_PCMPGTD,
23031
23032 IX86_BUILTIN_PMADDWD,
23033
23034 IX86_BUILTIN_PMAXSW,
23035 IX86_BUILTIN_PMAXUB,
23036 IX86_BUILTIN_PMINSW,
23037 IX86_BUILTIN_PMINUB,
23038
23039 IX86_BUILTIN_PMULHUW,
23040 IX86_BUILTIN_PMULHW,
23041 IX86_BUILTIN_PMULLW,
23042
23043 IX86_BUILTIN_PSADBW,
23044 IX86_BUILTIN_PSHUFW,
23045
23046 IX86_BUILTIN_PSLLW,
23047 IX86_BUILTIN_PSLLD,
23048 IX86_BUILTIN_PSLLQ,
23049 IX86_BUILTIN_PSRAW,
23050 IX86_BUILTIN_PSRAD,
23051 IX86_BUILTIN_PSRLW,
23052 IX86_BUILTIN_PSRLD,
23053 IX86_BUILTIN_PSRLQ,
23054 IX86_BUILTIN_PSLLWI,
23055 IX86_BUILTIN_PSLLDI,
23056 IX86_BUILTIN_PSLLQI,
23057 IX86_BUILTIN_PSRAWI,
23058 IX86_BUILTIN_PSRADI,
23059 IX86_BUILTIN_PSRLWI,
23060 IX86_BUILTIN_PSRLDI,
23061 IX86_BUILTIN_PSRLQI,
23062
23063 IX86_BUILTIN_PUNPCKHBW,
23064 IX86_BUILTIN_PUNPCKHWD,
23065 IX86_BUILTIN_PUNPCKHDQ,
23066 IX86_BUILTIN_PUNPCKLBW,
23067 IX86_BUILTIN_PUNPCKLWD,
23068 IX86_BUILTIN_PUNPCKLDQ,
23069
23070 IX86_BUILTIN_SHUFPS,
23071
23072 IX86_BUILTIN_RCPPS,
23073 IX86_BUILTIN_RCPSS,
23074 IX86_BUILTIN_RSQRTPS,
23075 IX86_BUILTIN_RSQRTPS_NR,
23076 IX86_BUILTIN_RSQRTSS,
23077 IX86_BUILTIN_RSQRTF,
23078 IX86_BUILTIN_SQRTPS,
23079 IX86_BUILTIN_SQRTPS_NR,
23080 IX86_BUILTIN_SQRTSS,
23081
23082 IX86_BUILTIN_UNPCKHPS,
23083 IX86_BUILTIN_UNPCKLPS,
23084
23085 IX86_BUILTIN_ANDPS,
23086 IX86_BUILTIN_ANDNPS,
23087 IX86_BUILTIN_ORPS,
23088 IX86_BUILTIN_XORPS,
23089
23090 IX86_BUILTIN_EMMS,
23091 IX86_BUILTIN_LDMXCSR,
23092 IX86_BUILTIN_STMXCSR,
23093 IX86_BUILTIN_SFENCE,
23094
23095 /* 3DNow! Original */
23096 IX86_BUILTIN_FEMMS,
23097 IX86_BUILTIN_PAVGUSB,
23098 IX86_BUILTIN_PF2ID,
23099 IX86_BUILTIN_PFACC,
23100 IX86_BUILTIN_PFADD,
23101 IX86_BUILTIN_PFCMPEQ,
23102 IX86_BUILTIN_PFCMPGE,
23103 IX86_BUILTIN_PFCMPGT,
23104 IX86_BUILTIN_PFMAX,
23105 IX86_BUILTIN_PFMIN,
23106 IX86_BUILTIN_PFMUL,
23107 IX86_BUILTIN_PFRCP,
23108 IX86_BUILTIN_PFRCPIT1,
23109 IX86_BUILTIN_PFRCPIT2,
23110 IX86_BUILTIN_PFRSQIT1,
23111 IX86_BUILTIN_PFRSQRT,
23112 IX86_BUILTIN_PFSUB,
23113 IX86_BUILTIN_PFSUBR,
23114 IX86_BUILTIN_PI2FD,
23115 IX86_BUILTIN_PMULHRW,
23116
23117 /* 3DNow! Athlon Extensions */
23118 IX86_BUILTIN_PF2IW,
23119 IX86_BUILTIN_PFNACC,
23120 IX86_BUILTIN_PFPNACC,
23121 IX86_BUILTIN_PI2FW,
23122 IX86_BUILTIN_PSWAPDSI,
23123 IX86_BUILTIN_PSWAPDSF,
23124
23125 /* SSE2 */
23126 IX86_BUILTIN_ADDPD,
23127 IX86_BUILTIN_ADDSD,
23128 IX86_BUILTIN_DIVPD,
23129 IX86_BUILTIN_DIVSD,
23130 IX86_BUILTIN_MULPD,
23131 IX86_BUILTIN_MULSD,
23132 IX86_BUILTIN_SUBPD,
23133 IX86_BUILTIN_SUBSD,
23134
23135 IX86_BUILTIN_CMPEQPD,
23136 IX86_BUILTIN_CMPLTPD,
23137 IX86_BUILTIN_CMPLEPD,
23138 IX86_BUILTIN_CMPGTPD,
23139 IX86_BUILTIN_CMPGEPD,
23140 IX86_BUILTIN_CMPNEQPD,
23141 IX86_BUILTIN_CMPNLTPD,
23142 IX86_BUILTIN_CMPNLEPD,
23143 IX86_BUILTIN_CMPNGTPD,
23144 IX86_BUILTIN_CMPNGEPD,
23145 IX86_BUILTIN_CMPORDPD,
23146 IX86_BUILTIN_CMPUNORDPD,
23147 IX86_BUILTIN_CMPEQSD,
23148 IX86_BUILTIN_CMPLTSD,
23149 IX86_BUILTIN_CMPLESD,
23150 IX86_BUILTIN_CMPNEQSD,
23151 IX86_BUILTIN_CMPNLTSD,
23152 IX86_BUILTIN_CMPNLESD,
23153 IX86_BUILTIN_CMPORDSD,
23154 IX86_BUILTIN_CMPUNORDSD,
23155
23156 IX86_BUILTIN_COMIEQSD,
23157 IX86_BUILTIN_COMILTSD,
23158 IX86_BUILTIN_COMILESD,
23159 IX86_BUILTIN_COMIGTSD,
23160 IX86_BUILTIN_COMIGESD,
23161 IX86_BUILTIN_COMINEQSD,
23162 IX86_BUILTIN_UCOMIEQSD,
23163 IX86_BUILTIN_UCOMILTSD,
23164 IX86_BUILTIN_UCOMILESD,
23165 IX86_BUILTIN_UCOMIGTSD,
23166 IX86_BUILTIN_UCOMIGESD,
23167 IX86_BUILTIN_UCOMINEQSD,
23168
23169 IX86_BUILTIN_MAXPD,
23170 IX86_BUILTIN_MAXSD,
23171 IX86_BUILTIN_MINPD,
23172 IX86_BUILTIN_MINSD,
23173
23174 IX86_BUILTIN_ANDPD,
23175 IX86_BUILTIN_ANDNPD,
23176 IX86_BUILTIN_ORPD,
23177 IX86_BUILTIN_XORPD,
23178
23179 IX86_BUILTIN_SQRTPD,
23180 IX86_BUILTIN_SQRTSD,
23181
23182 IX86_BUILTIN_UNPCKHPD,
23183 IX86_BUILTIN_UNPCKLPD,
23184
23185 IX86_BUILTIN_SHUFPD,
23186
23187 IX86_BUILTIN_LOADUPD,
23188 IX86_BUILTIN_STOREUPD,
23189 IX86_BUILTIN_MOVSD,
23190
23191 IX86_BUILTIN_LOADHPD,
23192 IX86_BUILTIN_LOADLPD,
23193
23194 IX86_BUILTIN_CVTDQ2PD,
23195 IX86_BUILTIN_CVTDQ2PS,
23196
23197 IX86_BUILTIN_CVTPD2DQ,
23198 IX86_BUILTIN_CVTPD2PI,
23199 IX86_BUILTIN_CVTPD2PS,
23200 IX86_BUILTIN_CVTTPD2DQ,
23201 IX86_BUILTIN_CVTTPD2PI,
23202
23203 IX86_BUILTIN_CVTPI2PD,
23204 IX86_BUILTIN_CVTSI2SD,
23205 IX86_BUILTIN_CVTSI642SD,
23206
23207 IX86_BUILTIN_CVTSD2SI,
23208 IX86_BUILTIN_CVTSD2SI64,
23209 IX86_BUILTIN_CVTSD2SS,
23210 IX86_BUILTIN_CVTSS2SD,
23211 IX86_BUILTIN_CVTTSD2SI,
23212 IX86_BUILTIN_CVTTSD2SI64,
23213
23214 IX86_BUILTIN_CVTPS2DQ,
23215 IX86_BUILTIN_CVTPS2PD,
23216 IX86_BUILTIN_CVTTPS2DQ,
23217
23218 IX86_BUILTIN_MOVNTI,
23219 IX86_BUILTIN_MOVNTPD,
23220 IX86_BUILTIN_MOVNTDQ,
23221
23222 IX86_BUILTIN_MOVQ128,
23223
23224 /* SSE2 MMX */
23225 IX86_BUILTIN_MASKMOVDQU,
23226 IX86_BUILTIN_MOVMSKPD,
23227 IX86_BUILTIN_PMOVMSKB128,
23228
23229 IX86_BUILTIN_PACKSSWB128,
23230 IX86_BUILTIN_PACKSSDW128,
23231 IX86_BUILTIN_PACKUSWB128,
23232
23233 IX86_BUILTIN_PADDB128,
23234 IX86_BUILTIN_PADDW128,
23235 IX86_BUILTIN_PADDD128,
23236 IX86_BUILTIN_PADDQ128,
23237 IX86_BUILTIN_PADDSB128,
23238 IX86_BUILTIN_PADDSW128,
23239 IX86_BUILTIN_PADDUSB128,
23240 IX86_BUILTIN_PADDUSW128,
23241 IX86_BUILTIN_PSUBB128,
23242 IX86_BUILTIN_PSUBW128,
23243 IX86_BUILTIN_PSUBD128,
23244 IX86_BUILTIN_PSUBQ128,
23245 IX86_BUILTIN_PSUBSB128,
23246 IX86_BUILTIN_PSUBSW128,
23247 IX86_BUILTIN_PSUBUSB128,
23248 IX86_BUILTIN_PSUBUSW128,
23249
23250 IX86_BUILTIN_PAND128,
23251 IX86_BUILTIN_PANDN128,
23252 IX86_BUILTIN_POR128,
23253 IX86_BUILTIN_PXOR128,
23254
23255 IX86_BUILTIN_PAVGB128,
23256 IX86_BUILTIN_PAVGW128,
23257
23258 IX86_BUILTIN_PCMPEQB128,
23259 IX86_BUILTIN_PCMPEQW128,
23260 IX86_BUILTIN_PCMPEQD128,
23261 IX86_BUILTIN_PCMPGTB128,
23262 IX86_BUILTIN_PCMPGTW128,
23263 IX86_BUILTIN_PCMPGTD128,
23264
23265 IX86_BUILTIN_PMADDWD128,
23266
23267 IX86_BUILTIN_PMAXSW128,
23268 IX86_BUILTIN_PMAXUB128,
23269 IX86_BUILTIN_PMINSW128,
23270 IX86_BUILTIN_PMINUB128,
23271
23272 IX86_BUILTIN_PMULUDQ,
23273 IX86_BUILTIN_PMULUDQ128,
23274 IX86_BUILTIN_PMULHUW128,
23275 IX86_BUILTIN_PMULHW128,
23276 IX86_BUILTIN_PMULLW128,
23277
23278 IX86_BUILTIN_PSADBW128,
23279 IX86_BUILTIN_PSHUFHW,
23280 IX86_BUILTIN_PSHUFLW,
23281 IX86_BUILTIN_PSHUFD,
23282
23283 IX86_BUILTIN_PSLLDQI128,
23284 IX86_BUILTIN_PSLLWI128,
23285 IX86_BUILTIN_PSLLDI128,
23286 IX86_BUILTIN_PSLLQI128,
23287 IX86_BUILTIN_PSRAWI128,
23288 IX86_BUILTIN_PSRADI128,
23289 IX86_BUILTIN_PSRLDQI128,
23290 IX86_BUILTIN_PSRLWI128,
23291 IX86_BUILTIN_PSRLDI128,
23292 IX86_BUILTIN_PSRLQI128,
23293
23294 IX86_BUILTIN_PSLLDQ128,
23295 IX86_BUILTIN_PSLLW128,
23296 IX86_BUILTIN_PSLLD128,
23297 IX86_BUILTIN_PSLLQ128,
23298 IX86_BUILTIN_PSRAW128,
23299 IX86_BUILTIN_PSRAD128,
23300 IX86_BUILTIN_PSRLW128,
23301 IX86_BUILTIN_PSRLD128,
23302 IX86_BUILTIN_PSRLQ128,
23303
23304 IX86_BUILTIN_PUNPCKHBW128,
23305 IX86_BUILTIN_PUNPCKHWD128,
23306 IX86_BUILTIN_PUNPCKHDQ128,
23307 IX86_BUILTIN_PUNPCKHQDQ128,
23308 IX86_BUILTIN_PUNPCKLBW128,
23309 IX86_BUILTIN_PUNPCKLWD128,
23310 IX86_BUILTIN_PUNPCKLDQ128,
23311 IX86_BUILTIN_PUNPCKLQDQ128,
23312
23313 IX86_BUILTIN_CLFLUSH,
23314 IX86_BUILTIN_MFENCE,
23315 IX86_BUILTIN_LFENCE,
23316 IX86_BUILTIN_PAUSE,
23317
23318 IX86_BUILTIN_BSRSI,
23319 IX86_BUILTIN_BSRDI,
23320 IX86_BUILTIN_RDPMC,
23321 IX86_BUILTIN_RDTSC,
23322 IX86_BUILTIN_RDTSCP,
23323 IX86_BUILTIN_ROLQI,
23324 IX86_BUILTIN_ROLHI,
23325 IX86_BUILTIN_RORQI,
23326 IX86_BUILTIN_RORHI,
23327
23328 /* SSE3. */
23329 IX86_BUILTIN_ADDSUBPS,
23330 IX86_BUILTIN_HADDPS,
23331 IX86_BUILTIN_HSUBPS,
23332 IX86_BUILTIN_MOVSHDUP,
23333 IX86_BUILTIN_MOVSLDUP,
23334 IX86_BUILTIN_ADDSUBPD,
23335 IX86_BUILTIN_HADDPD,
23336 IX86_BUILTIN_HSUBPD,
23337 IX86_BUILTIN_LDDQU,
23338
23339 IX86_BUILTIN_MONITOR,
23340 IX86_BUILTIN_MWAIT,
23341
23342 /* SSSE3. */
23343 IX86_BUILTIN_PHADDW,
23344 IX86_BUILTIN_PHADDD,
23345 IX86_BUILTIN_PHADDSW,
23346 IX86_BUILTIN_PHSUBW,
23347 IX86_BUILTIN_PHSUBD,
23348 IX86_BUILTIN_PHSUBSW,
23349 IX86_BUILTIN_PMADDUBSW,
23350 IX86_BUILTIN_PMULHRSW,
23351 IX86_BUILTIN_PSHUFB,
23352 IX86_BUILTIN_PSIGNB,
23353 IX86_BUILTIN_PSIGNW,
23354 IX86_BUILTIN_PSIGND,
23355 IX86_BUILTIN_PALIGNR,
23356 IX86_BUILTIN_PABSB,
23357 IX86_BUILTIN_PABSW,
23358 IX86_BUILTIN_PABSD,
23359
23360 IX86_BUILTIN_PHADDW128,
23361 IX86_BUILTIN_PHADDD128,
23362 IX86_BUILTIN_PHADDSW128,
23363 IX86_BUILTIN_PHSUBW128,
23364 IX86_BUILTIN_PHSUBD128,
23365 IX86_BUILTIN_PHSUBSW128,
23366 IX86_BUILTIN_PMADDUBSW128,
23367 IX86_BUILTIN_PMULHRSW128,
23368 IX86_BUILTIN_PSHUFB128,
23369 IX86_BUILTIN_PSIGNB128,
23370 IX86_BUILTIN_PSIGNW128,
23371 IX86_BUILTIN_PSIGND128,
23372 IX86_BUILTIN_PALIGNR128,
23373 IX86_BUILTIN_PABSB128,
23374 IX86_BUILTIN_PABSW128,
23375 IX86_BUILTIN_PABSD128,
23376
23377 /* AMDFAM10 - SSE4A New Instructions. */
23378 IX86_BUILTIN_MOVNTSD,
23379 IX86_BUILTIN_MOVNTSS,
23380 IX86_BUILTIN_EXTRQI,
23381 IX86_BUILTIN_EXTRQ,
23382 IX86_BUILTIN_INSERTQI,
23383 IX86_BUILTIN_INSERTQ,
23384
23385 /* SSE4.1. */
23386 IX86_BUILTIN_BLENDPD,
23387 IX86_BUILTIN_BLENDPS,
23388 IX86_BUILTIN_BLENDVPD,
23389 IX86_BUILTIN_BLENDVPS,
23390 IX86_BUILTIN_PBLENDVB128,
23391 IX86_BUILTIN_PBLENDW128,
23392
23393 IX86_BUILTIN_DPPD,
23394 IX86_BUILTIN_DPPS,
23395
23396 IX86_BUILTIN_INSERTPS128,
23397
23398 IX86_BUILTIN_MOVNTDQA,
23399 IX86_BUILTIN_MPSADBW128,
23400 IX86_BUILTIN_PACKUSDW128,
23401 IX86_BUILTIN_PCMPEQQ,
23402 IX86_BUILTIN_PHMINPOSUW128,
23403
23404 IX86_BUILTIN_PMAXSB128,
23405 IX86_BUILTIN_PMAXSD128,
23406 IX86_BUILTIN_PMAXUD128,
23407 IX86_BUILTIN_PMAXUW128,
23408
23409 IX86_BUILTIN_PMINSB128,
23410 IX86_BUILTIN_PMINSD128,
23411 IX86_BUILTIN_PMINUD128,
23412 IX86_BUILTIN_PMINUW128,
23413
23414 IX86_BUILTIN_PMOVSXBW128,
23415 IX86_BUILTIN_PMOVSXBD128,
23416 IX86_BUILTIN_PMOVSXBQ128,
23417 IX86_BUILTIN_PMOVSXWD128,
23418 IX86_BUILTIN_PMOVSXWQ128,
23419 IX86_BUILTIN_PMOVSXDQ128,
23420
23421 IX86_BUILTIN_PMOVZXBW128,
23422 IX86_BUILTIN_PMOVZXBD128,
23423 IX86_BUILTIN_PMOVZXBQ128,
23424 IX86_BUILTIN_PMOVZXWD128,
23425 IX86_BUILTIN_PMOVZXWQ128,
23426 IX86_BUILTIN_PMOVZXDQ128,
23427
23428 IX86_BUILTIN_PMULDQ128,
23429 IX86_BUILTIN_PMULLD128,
23430
23431 IX86_BUILTIN_ROUNDPD,
23432 IX86_BUILTIN_ROUNDPS,
23433 IX86_BUILTIN_ROUNDSD,
23434 IX86_BUILTIN_ROUNDSS,
23435
23436 IX86_BUILTIN_FLOORPD,
23437 IX86_BUILTIN_CEILPD,
23438 IX86_BUILTIN_TRUNCPD,
23439 IX86_BUILTIN_RINTPD,
23440 IX86_BUILTIN_FLOORPS,
23441 IX86_BUILTIN_CEILPS,
23442 IX86_BUILTIN_TRUNCPS,
23443 IX86_BUILTIN_RINTPS,
23444
23445 IX86_BUILTIN_PTESTZ,
23446 IX86_BUILTIN_PTESTC,
23447 IX86_BUILTIN_PTESTNZC,
23448
23449 IX86_BUILTIN_VEC_INIT_V2SI,
23450 IX86_BUILTIN_VEC_INIT_V4HI,
23451 IX86_BUILTIN_VEC_INIT_V8QI,
23452 IX86_BUILTIN_VEC_EXT_V2DF,
23453 IX86_BUILTIN_VEC_EXT_V2DI,
23454 IX86_BUILTIN_VEC_EXT_V4SF,
23455 IX86_BUILTIN_VEC_EXT_V4SI,
23456 IX86_BUILTIN_VEC_EXT_V8HI,
23457 IX86_BUILTIN_VEC_EXT_V2SI,
23458 IX86_BUILTIN_VEC_EXT_V4HI,
23459 IX86_BUILTIN_VEC_EXT_V16QI,
23460 IX86_BUILTIN_VEC_SET_V2DI,
23461 IX86_BUILTIN_VEC_SET_V4SF,
23462 IX86_BUILTIN_VEC_SET_V4SI,
23463 IX86_BUILTIN_VEC_SET_V8HI,
23464 IX86_BUILTIN_VEC_SET_V4HI,
23465 IX86_BUILTIN_VEC_SET_V16QI,
23466
23467 IX86_BUILTIN_VEC_PACK_SFIX,
23468
23469 /* SSE4.2. */
23470 IX86_BUILTIN_CRC32QI,
23471 IX86_BUILTIN_CRC32HI,
23472 IX86_BUILTIN_CRC32SI,
23473 IX86_BUILTIN_CRC32DI,
23474
23475 IX86_BUILTIN_PCMPESTRI128,
23476 IX86_BUILTIN_PCMPESTRM128,
23477 IX86_BUILTIN_PCMPESTRA128,
23478 IX86_BUILTIN_PCMPESTRC128,
23479 IX86_BUILTIN_PCMPESTRO128,
23480 IX86_BUILTIN_PCMPESTRS128,
23481 IX86_BUILTIN_PCMPESTRZ128,
23482 IX86_BUILTIN_PCMPISTRI128,
23483 IX86_BUILTIN_PCMPISTRM128,
23484 IX86_BUILTIN_PCMPISTRA128,
23485 IX86_BUILTIN_PCMPISTRC128,
23486 IX86_BUILTIN_PCMPISTRO128,
23487 IX86_BUILTIN_PCMPISTRS128,
23488 IX86_BUILTIN_PCMPISTRZ128,
23489
23490 IX86_BUILTIN_PCMPGTQ,
23491
23492 /* AES instructions */
23493 IX86_BUILTIN_AESENC128,
23494 IX86_BUILTIN_AESENCLAST128,
23495 IX86_BUILTIN_AESDEC128,
23496 IX86_BUILTIN_AESDECLAST128,
23497 IX86_BUILTIN_AESIMC128,
23498 IX86_BUILTIN_AESKEYGENASSIST128,
23499
23500 /* PCLMUL instruction */
23501 IX86_BUILTIN_PCLMULQDQ128,
23502
23503 /* AVX */
23504 IX86_BUILTIN_ADDPD256,
23505 IX86_BUILTIN_ADDPS256,
23506 IX86_BUILTIN_ADDSUBPD256,
23507 IX86_BUILTIN_ADDSUBPS256,
23508 IX86_BUILTIN_ANDPD256,
23509 IX86_BUILTIN_ANDPS256,
23510 IX86_BUILTIN_ANDNPD256,
23511 IX86_BUILTIN_ANDNPS256,
23512 IX86_BUILTIN_BLENDPD256,
23513 IX86_BUILTIN_BLENDPS256,
23514 IX86_BUILTIN_BLENDVPD256,
23515 IX86_BUILTIN_BLENDVPS256,
23516 IX86_BUILTIN_DIVPD256,
23517 IX86_BUILTIN_DIVPS256,
23518 IX86_BUILTIN_DPPS256,
23519 IX86_BUILTIN_HADDPD256,
23520 IX86_BUILTIN_HADDPS256,
23521 IX86_BUILTIN_HSUBPD256,
23522 IX86_BUILTIN_HSUBPS256,
23523 IX86_BUILTIN_MAXPD256,
23524 IX86_BUILTIN_MAXPS256,
23525 IX86_BUILTIN_MINPD256,
23526 IX86_BUILTIN_MINPS256,
23527 IX86_BUILTIN_MULPD256,
23528 IX86_BUILTIN_MULPS256,
23529 IX86_BUILTIN_ORPD256,
23530 IX86_BUILTIN_ORPS256,
23531 IX86_BUILTIN_SHUFPD256,
23532 IX86_BUILTIN_SHUFPS256,
23533 IX86_BUILTIN_SUBPD256,
23534 IX86_BUILTIN_SUBPS256,
23535 IX86_BUILTIN_XORPD256,
23536 IX86_BUILTIN_XORPS256,
23537 IX86_BUILTIN_CMPSD,
23538 IX86_BUILTIN_CMPSS,
23539 IX86_BUILTIN_CMPPD,
23540 IX86_BUILTIN_CMPPS,
23541 IX86_BUILTIN_CMPPD256,
23542 IX86_BUILTIN_CMPPS256,
23543 IX86_BUILTIN_CVTDQ2PD256,
23544 IX86_BUILTIN_CVTDQ2PS256,
23545 IX86_BUILTIN_CVTPD2PS256,
23546 IX86_BUILTIN_CVTPS2DQ256,
23547 IX86_BUILTIN_CVTPS2PD256,
23548 IX86_BUILTIN_CVTTPD2DQ256,
23549 IX86_BUILTIN_CVTPD2DQ256,
23550 IX86_BUILTIN_CVTTPS2DQ256,
23551 IX86_BUILTIN_EXTRACTF128PD256,
23552 IX86_BUILTIN_EXTRACTF128PS256,
23553 IX86_BUILTIN_EXTRACTF128SI256,
23554 IX86_BUILTIN_VZEROALL,
23555 IX86_BUILTIN_VZEROUPPER,
23556 IX86_BUILTIN_VPERMILVARPD,
23557 IX86_BUILTIN_VPERMILVARPS,
23558 IX86_BUILTIN_VPERMILVARPD256,
23559 IX86_BUILTIN_VPERMILVARPS256,
23560 IX86_BUILTIN_VPERMILPD,
23561 IX86_BUILTIN_VPERMILPS,
23562 IX86_BUILTIN_VPERMILPD256,
23563 IX86_BUILTIN_VPERMILPS256,
23564 IX86_BUILTIN_VPERMIL2PD,
23565 IX86_BUILTIN_VPERMIL2PS,
23566 IX86_BUILTIN_VPERMIL2PD256,
23567 IX86_BUILTIN_VPERMIL2PS256,
23568 IX86_BUILTIN_VPERM2F128PD256,
23569 IX86_BUILTIN_VPERM2F128PS256,
23570 IX86_BUILTIN_VPERM2F128SI256,
23571 IX86_BUILTIN_VBROADCASTSS,
23572 IX86_BUILTIN_VBROADCASTSD256,
23573 IX86_BUILTIN_VBROADCASTSS256,
23574 IX86_BUILTIN_VBROADCASTPD256,
23575 IX86_BUILTIN_VBROADCASTPS256,
23576 IX86_BUILTIN_VINSERTF128PD256,
23577 IX86_BUILTIN_VINSERTF128PS256,
23578 IX86_BUILTIN_VINSERTF128SI256,
23579 IX86_BUILTIN_LOADUPD256,
23580 IX86_BUILTIN_LOADUPS256,
23581 IX86_BUILTIN_STOREUPD256,
23582 IX86_BUILTIN_STOREUPS256,
23583 IX86_BUILTIN_LDDQU256,
23584 IX86_BUILTIN_MOVNTDQ256,
23585 IX86_BUILTIN_MOVNTPD256,
23586 IX86_BUILTIN_MOVNTPS256,
23587 IX86_BUILTIN_LOADDQU256,
23588 IX86_BUILTIN_STOREDQU256,
23589 IX86_BUILTIN_MASKLOADPD,
23590 IX86_BUILTIN_MASKLOADPS,
23591 IX86_BUILTIN_MASKSTOREPD,
23592 IX86_BUILTIN_MASKSTOREPS,
23593 IX86_BUILTIN_MASKLOADPD256,
23594 IX86_BUILTIN_MASKLOADPS256,
23595 IX86_BUILTIN_MASKSTOREPD256,
23596 IX86_BUILTIN_MASKSTOREPS256,
23597 IX86_BUILTIN_MOVSHDUP256,
23598 IX86_BUILTIN_MOVSLDUP256,
23599 IX86_BUILTIN_MOVDDUP256,
23600
23601 IX86_BUILTIN_SQRTPD256,
23602 IX86_BUILTIN_SQRTPS256,
23603 IX86_BUILTIN_SQRTPS_NR256,
23604 IX86_BUILTIN_RSQRTPS256,
23605 IX86_BUILTIN_RSQRTPS_NR256,
23606
23607 IX86_BUILTIN_RCPPS256,
23608
23609 IX86_BUILTIN_ROUNDPD256,
23610 IX86_BUILTIN_ROUNDPS256,
23611
23612 IX86_BUILTIN_FLOORPD256,
23613 IX86_BUILTIN_CEILPD256,
23614 IX86_BUILTIN_TRUNCPD256,
23615 IX86_BUILTIN_RINTPD256,
23616 IX86_BUILTIN_FLOORPS256,
23617 IX86_BUILTIN_CEILPS256,
23618 IX86_BUILTIN_TRUNCPS256,
23619 IX86_BUILTIN_RINTPS256,
23620
23621 IX86_BUILTIN_UNPCKHPD256,
23622 IX86_BUILTIN_UNPCKLPD256,
23623 IX86_BUILTIN_UNPCKHPS256,
23624 IX86_BUILTIN_UNPCKLPS256,
23625
23626 IX86_BUILTIN_SI256_SI,
23627 IX86_BUILTIN_PS256_PS,
23628 IX86_BUILTIN_PD256_PD,
23629 IX86_BUILTIN_SI_SI256,
23630 IX86_BUILTIN_PS_PS256,
23631 IX86_BUILTIN_PD_PD256,
23632
23633 IX86_BUILTIN_VTESTZPD,
23634 IX86_BUILTIN_VTESTCPD,
23635 IX86_BUILTIN_VTESTNZCPD,
23636 IX86_BUILTIN_VTESTZPS,
23637 IX86_BUILTIN_VTESTCPS,
23638 IX86_BUILTIN_VTESTNZCPS,
23639 IX86_BUILTIN_VTESTZPD256,
23640 IX86_BUILTIN_VTESTCPD256,
23641 IX86_BUILTIN_VTESTNZCPD256,
23642 IX86_BUILTIN_VTESTZPS256,
23643 IX86_BUILTIN_VTESTCPS256,
23644 IX86_BUILTIN_VTESTNZCPS256,
23645 IX86_BUILTIN_PTESTZ256,
23646 IX86_BUILTIN_PTESTC256,
23647 IX86_BUILTIN_PTESTNZC256,
23648
23649 IX86_BUILTIN_MOVMSKPD256,
23650 IX86_BUILTIN_MOVMSKPS256,
23651
23652 /* TFmode support builtins. */
23653 IX86_BUILTIN_INFQ,
23654 IX86_BUILTIN_HUGE_VALQ,
23655 IX86_BUILTIN_FABSQ,
23656 IX86_BUILTIN_COPYSIGNQ,
23657
23658 /* Vectorizer support builtins. */
23659 IX86_BUILTIN_CPYSGNPS,
23660 IX86_BUILTIN_CPYSGNPD,
23661 IX86_BUILTIN_CPYSGNPS256,
23662 IX86_BUILTIN_CPYSGNPD256,
23663
23664 IX86_BUILTIN_CVTUDQ2PS,
23665
23666 IX86_BUILTIN_VEC_PERM_V2DF,
23667 IX86_BUILTIN_VEC_PERM_V4SF,
23668 IX86_BUILTIN_VEC_PERM_V2DI,
23669 IX86_BUILTIN_VEC_PERM_V4SI,
23670 IX86_BUILTIN_VEC_PERM_V8HI,
23671 IX86_BUILTIN_VEC_PERM_V16QI,
23672 IX86_BUILTIN_VEC_PERM_V2DI_U,
23673 IX86_BUILTIN_VEC_PERM_V4SI_U,
23674 IX86_BUILTIN_VEC_PERM_V8HI_U,
23675 IX86_BUILTIN_VEC_PERM_V16QI_U,
23676 IX86_BUILTIN_VEC_PERM_V4DF,
23677 IX86_BUILTIN_VEC_PERM_V8SF,
23678
23679 /* FMA4 and XOP instructions. */
23680 IX86_BUILTIN_VFMADDSS,
23681 IX86_BUILTIN_VFMADDSD,
23682 IX86_BUILTIN_VFMADDPS,
23683 IX86_BUILTIN_VFMADDPD,
23684 IX86_BUILTIN_VFMADDPS256,
23685 IX86_BUILTIN_VFMADDPD256,
23686 IX86_BUILTIN_VFMADDSUBPS,
23687 IX86_BUILTIN_VFMADDSUBPD,
23688 IX86_BUILTIN_VFMADDSUBPS256,
23689 IX86_BUILTIN_VFMADDSUBPD256,
23690
23691 IX86_BUILTIN_VPCMOV,
23692 IX86_BUILTIN_VPCMOV_V2DI,
23693 IX86_BUILTIN_VPCMOV_V4SI,
23694 IX86_BUILTIN_VPCMOV_V8HI,
23695 IX86_BUILTIN_VPCMOV_V16QI,
23696 IX86_BUILTIN_VPCMOV_V4SF,
23697 IX86_BUILTIN_VPCMOV_V2DF,
23698 IX86_BUILTIN_VPCMOV256,
23699 IX86_BUILTIN_VPCMOV_V4DI256,
23700 IX86_BUILTIN_VPCMOV_V8SI256,
23701 IX86_BUILTIN_VPCMOV_V16HI256,
23702 IX86_BUILTIN_VPCMOV_V32QI256,
23703 IX86_BUILTIN_VPCMOV_V8SF256,
23704 IX86_BUILTIN_VPCMOV_V4DF256,
23705
23706 IX86_BUILTIN_VPPERM,
23707
23708 IX86_BUILTIN_VPMACSSWW,
23709 IX86_BUILTIN_VPMACSWW,
23710 IX86_BUILTIN_VPMACSSWD,
23711 IX86_BUILTIN_VPMACSWD,
23712 IX86_BUILTIN_VPMACSSDD,
23713 IX86_BUILTIN_VPMACSDD,
23714 IX86_BUILTIN_VPMACSSDQL,
23715 IX86_BUILTIN_VPMACSSDQH,
23716 IX86_BUILTIN_VPMACSDQL,
23717 IX86_BUILTIN_VPMACSDQH,
23718 IX86_BUILTIN_VPMADCSSWD,
23719 IX86_BUILTIN_VPMADCSWD,
23720
23721 IX86_BUILTIN_VPHADDBW,
23722 IX86_BUILTIN_VPHADDBD,
23723 IX86_BUILTIN_VPHADDBQ,
23724 IX86_BUILTIN_VPHADDWD,
23725 IX86_BUILTIN_VPHADDWQ,
23726 IX86_BUILTIN_VPHADDDQ,
23727 IX86_BUILTIN_VPHADDUBW,
23728 IX86_BUILTIN_VPHADDUBD,
23729 IX86_BUILTIN_VPHADDUBQ,
23730 IX86_BUILTIN_VPHADDUWD,
23731 IX86_BUILTIN_VPHADDUWQ,
23732 IX86_BUILTIN_VPHADDUDQ,
23733 IX86_BUILTIN_VPHSUBBW,
23734 IX86_BUILTIN_VPHSUBWD,
23735 IX86_BUILTIN_VPHSUBDQ,
23736
23737 IX86_BUILTIN_VPROTB,
23738 IX86_BUILTIN_VPROTW,
23739 IX86_BUILTIN_VPROTD,
23740 IX86_BUILTIN_VPROTQ,
23741 IX86_BUILTIN_VPROTB_IMM,
23742 IX86_BUILTIN_VPROTW_IMM,
23743 IX86_BUILTIN_VPROTD_IMM,
23744 IX86_BUILTIN_VPROTQ_IMM,
23745
23746 IX86_BUILTIN_VPSHLB,
23747 IX86_BUILTIN_VPSHLW,
23748 IX86_BUILTIN_VPSHLD,
23749 IX86_BUILTIN_VPSHLQ,
23750 IX86_BUILTIN_VPSHAB,
23751 IX86_BUILTIN_VPSHAW,
23752 IX86_BUILTIN_VPSHAD,
23753 IX86_BUILTIN_VPSHAQ,
23754
23755 IX86_BUILTIN_VFRCZSS,
23756 IX86_BUILTIN_VFRCZSD,
23757 IX86_BUILTIN_VFRCZPS,
23758 IX86_BUILTIN_VFRCZPD,
23759 IX86_BUILTIN_VFRCZPS256,
23760 IX86_BUILTIN_VFRCZPD256,
23761
23762 IX86_BUILTIN_VPCOMEQUB,
23763 IX86_BUILTIN_VPCOMNEUB,
23764 IX86_BUILTIN_VPCOMLTUB,
23765 IX86_BUILTIN_VPCOMLEUB,
23766 IX86_BUILTIN_VPCOMGTUB,
23767 IX86_BUILTIN_VPCOMGEUB,
23768 IX86_BUILTIN_VPCOMFALSEUB,
23769 IX86_BUILTIN_VPCOMTRUEUB,
23770
23771 IX86_BUILTIN_VPCOMEQUW,
23772 IX86_BUILTIN_VPCOMNEUW,
23773 IX86_BUILTIN_VPCOMLTUW,
23774 IX86_BUILTIN_VPCOMLEUW,
23775 IX86_BUILTIN_VPCOMGTUW,
23776 IX86_BUILTIN_VPCOMGEUW,
23777 IX86_BUILTIN_VPCOMFALSEUW,
23778 IX86_BUILTIN_VPCOMTRUEUW,
23779
23780 IX86_BUILTIN_VPCOMEQUD,
23781 IX86_BUILTIN_VPCOMNEUD,
23782 IX86_BUILTIN_VPCOMLTUD,
23783 IX86_BUILTIN_VPCOMLEUD,
23784 IX86_BUILTIN_VPCOMGTUD,
23785 IX86_BUILTIN_VPCOMGEUD,
23786 IX86_BUILTIN_VPCOMFALSEUD,
23787 IX86_BUILTIN_VPCOMTRUEUD,
23788
23789 IX86_BUILTIN_VPCOMEQUQ,
23790 IX86_BUILTIN_VPCOMNEUQ,
23791 IX86_BUILTIN_VPCOMLTUQ,
23792 IX86_BUILTIN_VPCOMLEUQ,
23793 IX86_BUILTIN_VPCOMGTUQ,
23794 IX86_BUILTIN_VPCOMGEUQ,
23795 IX86_BUILTIN_VPCOMFALSEUQ,
23796 IX86_BUILTIN_VPCOMTRUEUQ,
23797
23798 IX86_BUILTIN_VPCOMEQB,
23799 IX86_BUILTIN_VPCOMNEB,
23800 IX86_BUILTIN_VPCOMLTB,
23801 IX86_BUILTIN_VPCOMLEB,
23802 IX86_BUILTIN_VPCOMGTB,
23803 IX86_BUILTIN_VPCOMGEB,
23804 IX86_BUILTIN_VPCOMFALSEB,
23805 IX86_BUILTIN_VPCOMTRUEB,
23806
23807 IX86_BUILTIN_VPCOMEQW,
23808 IX86_BUILTIN_VPCOMNEW,
23809 IX86_BUILTIN_VPCOMLTW,
23810 IX86_BUILTIN_VPCOMLEW,
23811 IX86_BUILTIN_VPCOMGTW,
23812 IX86_BUILTIN_VPCOMGEW,
23813 IX86_BUILTIN_VPCOMFALSEW,
23814 IX86_BUILTIN_VPCOMTRUEW,
23815
23816 IX86_BUILTIN_VPCOMEQD,
23817 IX86_BUILTIN_VPCOMNED,
23818 IX86_BUILTIN_VPCOMLTD,
23819 IX86_BUILTIN_VPCOMLED,
23820 IX86_BUILTIN_VPCOMGTD,
23821 IX86_BUILTIN_VPCOMGED,
23822 IX86_BUILTIN_VPCOMFALSED,
23823 IX86_BUILTIN_VPCOMTRUED,
23824
23825 IX86_BUILTIN_VPCOMEQQ,
23826 IX86_BUILTIN_VPCOMNEQ,
23827 IX86_BUILTIN_VPCOMLTQ,
23828 IX86_BUILTIN_VPCOMLEQ,
23829 IX86_BUILTIN_VPCOMGTQ,
23830 IX86_BUILTIN_VPCOMGEQ,
23831 IX86_BUILTIN_VPCOMFALSEQ,
23832 IX86_BUILTIN_VPCOMTRUEQ,
23833
23834 /* LWP instructions. */
23835 IX86_BUILTIN_LLWPCB,
23836 IX86_BUILTIN_SLWPCB,
23837 IX86_BUILTIN_LWPVAL32,
23838 IX86_BUILTIN_LWPVAL64,
23839 IX86_BUILTIN_LWPINS32,
23840 IX86_BUILTIN_LWPINS64,
23841
23842 IX86_BUILTIN_CLZS,
23843
23844 /* BMI instructions. */
23845 IX86_BUILTIN_BEXTR32,
23846 IX86_BUILTIN_BEXTR64,
23847 IX86_BUILTIN_CTZS,
23848
23849 /* TBM instructions. */
23850 IX86_BUILTIN_BEXTRI32,
23851 IX86_BUILTIN_BEXTRI64,
23852
23853
23854 /* FSGSBASE instructions. */
23855 IX86_BUILTIN_RDFSBASE32,
23856 IX86_BUILTIN_RDFSBASE64,
23857 IX86_BUILTIN_RDGSBASE32,
23858 IX86_BUILTIN_RDGSBASE64,
23859 IX86_BUILTIN_WRFSBASE32,
23860 IX86_BUILTIN_WRFSBASE64,
23861 IX86_BUILTIN_WRGSBASE32,
23862 IX86_BUILTIN_WRGSBASE64,
23863
23864 /* RDRND instructions. */
23865 IX86_BUILTIN_RDRAND16_STEP,
23866 IX86_BUILTIN_RDRAND32_STEP,
23867 IX86_BUILTIN_RDRAND64_STEP,
23868
23869 /* F16C instructions. */
23870 IX86_BUILTIN_CVTPH2PS,
23871 IX86_BUILTIN_CVTPH2PS256,
23872 IX86_BUILTIN_CVTPS2PH,
23873 IX86_BUILTIN_CVTPS2PH256,
23874
23875 /* CFString built-in for darwin */
23876 IX86_BUILTIN_CFSTRING,
23877
23878 IX86_BUILTIN_MAX
23879 };
23880
23881 /* Table for the ix86 builtin decls. */
23882 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
23883
23884 /* Table of all of the builtin functions that are possible with different ISA's
23885 but are waiting to be built until a function is declared to use that
23886 ISA. */
23887 struct builtin_isa {
23888 const char *name; /* function name */
23889 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
23890 int isa; /* isa_flags this builtin is defined for */
23891 bool const_p; /* true if the declaration is constant */
23892 bool set_and_not_built_p;
23893 };
23894
23895 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
23896
23897
23898 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
23899 of which isa_flags to use in the ix86_builtins_isa array. Stores the
23900 function decl in the ix86_builtins array. Returns the function decl or
23901 NULL_TREE, if the builtin was not added.
23902
23903 If the front end has a special hook for builtin functions, delay adding
23904 builtin functions that aren't in the current ISA until the ISA is changed
23905 with function specific optimization. Doing so, can save about 300K for the
23906 default compiler. When the builtin is expanded, check at that time whether
23907 it is valid.
23908
23909 If the front end doesn't have a special hook, record all builtins, even if
23910 it isn't an instruction set in the current ISA in case the user uses
23911 function specific options for a different ISA, so that we don't get scope
23912 errors if a builtin is added in the middle of a function scope. */
23913
23914 static inline tree
23915 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
23916 enum ix86_builtins code)
23917 {
23918 tree decl = NULL_TREE;
23919
23920 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
23921 {
23922 ix86_builtins_isa[(int) code].isa = mask;
23923
23924 mask &= ~OPTION_MASK_ISA_64BIT;
23925 if (mask == 0
23926 || (mask & ix86_isa_flags) != 0
23927 || (lang_hooks.builtin_function
23928 == lang_hooks.builtin_function_ext_scope))
23929
23930 {
23931 tree type = ix86_get_builtin_func_type (tcode);
23932 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
23933 NULL, NULL_TREE);
23934 ix86_builtins[(int) code] = decl;
23935 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23936 }
23937 else
23938 {
23939 ix86_builtins[(int) code] = NULL_TREE;
23940 ix86_builtins_isa[(int) code].tcode = tcode;
23941 ix86_builtins_isa[(int) code].name = name;
23942 ix86_builtins_isa[(int) code].const_p = false;
23943 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23944 }
23945 }
23946
23947 return decl;
23948 }
23949
23950 /* Like def_builtin, but also marks the function decl "const". */
23951
23952 static inline tree
23953 def_builtin_const (int mask, const char *name,
23954 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23955 {
23956 tree decl = def_builtin (mask, name, tcode, code);
23957 if (decl)
23958 TREE_READONLY (decl) = 1;
23959 else
23960 ix86_builtins_isa[(int) code].const_p = true;
23961
23962 return decl;
23963 }
23964
23965 /* Add any new builtin functions for a given ISA that may not have been
23966 declared. This saves a bit of space compared to adding all of the
23967 declarations to the tree, even if we didn't use them. */
23968
23969 static void
23970 ix86_add_new_builtins (int isa)
23971 {
23972 int i;
23973
23974 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23975 {
23976 if ((ix86_builtins_isa[i].isa & isa) != 0
23977 && ix86_builtins_isa[i].set_and_not_built_p)
23978 {
23979 tree decl, type;
23980
23981 /* Don't define the builtin again. */
23982 ix86_builtins_isa[i].set_and_not_built_p = false;
23983
23984 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23985 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23986 type, i, BUILT_IN_MD, NULL,
23987 NULL_TREE);
23988
23989 ix86_builtins[i] = decl;
23990 if (ix86_builtins_isa[i].const_p)
23991 TREE_READONLY (decl) = 1;
23992 }
23993 }
23994 }
23995
23996 /* Bits for builtin_description.flag. */
23997
23998 /* Set when we don't support the comparison natively, and should
23999 swap_comparison in order to support it. */
24000 #define BUILTIN_DESC_SWAP_OPERANDS 1
24001
24002 struct builtin_description
24003 {
24004 const unsigned int mask;
24005 const enum insn_code icode;
24006 const char *const name;
24007 const enum ix86_builtins code;
24008 const enum rtx_code comparison;
24009 const int flag;
24010 };
24011
24012 static const struct builtin_description bdesc_comi[] =
24013 {
24014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24038 };
24039
24040 static const struct builtin_description bdesc_pcmpestr[] =
24041 {
24042 /* SSE4.2 */
24043 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24044 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24045 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24046 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24047 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24048 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24050 };
24051
24052 static const struct builtin_description bdesc_pcmpistr[] =
24053 {
24054 /* SSE4.2 */
24055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24062 };
24063
24064 /* Special builtins with variable number of arguments. */
24065 static const struct builtin_description bdesc_special_args[] =
24066 {
24067 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24068 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24069 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24070
24071 /* MMX */
24072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24073
24074 /* 3DNow! */
24075 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24076
24077 /* SSE */
24078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24081
24082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24086
24087 /* SSE or 3DNow!A */
24088 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24089 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24090
24091 /* SSE2 */
24092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24101
24102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24104
24105 /* SSE3 */
24106 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24107
24108 /* SSE4.1 */
24109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24110
24111 /* SSE4A */
24112 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24113 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24114
24115 /* AVX */
24116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24118
24119 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24120 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24121 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24124
24125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24132
24133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24136
24137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24145
24146 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24147 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24148 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24152
24153 /* FSGSBASE */
24154 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24155 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24156 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24162 };
24163
24164 /* Builtins with variable number of arguments. */
24165 static const struct builtin_description bdesc_args[] =
24166 {
24167 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24168 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24169 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24170 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24171 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24172 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24173 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24174
24175 /* MMX */
24176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24182
24183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24191
24192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24194
24195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24199
24200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24206
24207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24213
24214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24217
24218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24219
24220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24226
24227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24233
24234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24238
24239 /* 3DNow! */
24240 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24241 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24242 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24243 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24244
24245 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24246 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24247 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24248 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24260
24261 /* 3DNow!A */
24262 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24263 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24264 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24265 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24266 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24267 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24268
24269 /* SSE */
24270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24272 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24274 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24278 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24281 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24282
24283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24284
24285 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24286 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24287 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24293
24294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24316
24317 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24318 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24321
24322 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24324 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24325 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24326
24327 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24328
24329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24332 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24333 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24334
24335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24337 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24338
24339 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24340
24341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24344
24345 /* SSE MMX or 3Dnow!A */
24346 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24347 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24348 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24349
24350 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24352 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24353 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24354
24355 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24357
24358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24359
24360 /* SSE2 */
24361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24362
24363 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24364 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24365 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24366 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24367 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24368 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24369 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24370 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24371 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24372 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24373 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24374 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24375
24376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24382
24383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24388
24389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24390
24391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24393 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24394 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24395
24396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24399
24400 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24401 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24402 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24403 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24408
24409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24429
24430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24431 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24434
24435 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24437 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24438 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24439
24440 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24441
24442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24443 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24444 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24445
24446 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24447
24448 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24449 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24450 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24451 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24452 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24453 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24454 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24455 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24456
24457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24465
24466 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24467 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24468
24469 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24471 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24472 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24473
24474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24476
24477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24483
24484 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24485 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24486 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24488
24489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24493 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24494 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24495 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24496 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24497
24498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24501
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24504
24505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24507
24508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24509
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24511 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24514
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24516 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24520 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24521 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24522
24523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24525 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24529 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24530
24531 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24535
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24539
24540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24541
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24543 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24544
24545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24546
24547 /* SSE2 MMX */
24548 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24550
24551 /* SSE3 */
24552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24554
24555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24556 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24558 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24559 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24560 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24561
24562 /* SSSE3 */
24563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24569
24570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24594
24595 /* SSSE3. */
24596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24598
24599 /* SSE4.1 */
24600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24610
24611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24624
24625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24637
24638 /* SSE4.1 */
24639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24643
24644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24648
24649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24653
24654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24657
24658 /* SSE4.2 */
24659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24660 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24661 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24662 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24663 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24664
24665 /* SSE4A */
24666 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24667 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24668 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24669 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24670
24671 /* AES */
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24673 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24674
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24676 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24677 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24679
24680 /* PCLMUL */
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24682
24683 /* AVX */
24684 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24685 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24688 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24689 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24692 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24698 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24699 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24700 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24701 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24702 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24703 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24704 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24705 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24706 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24707 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24708 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24709 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24710
24711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24715
24716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24750
24751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24754
24755 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24757 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24759 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24760
24761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24762
24763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24765
24766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24770
24771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24775
24776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24780
24781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24784 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24785 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24786 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24787
24788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24803
24804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24806
24807 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24808 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24809
24810 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24811
24812 /* BMI */
24813 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24814 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24815 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24816
24817 /* TBM */
24818 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24819 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24820
24821 /* F16C */
24822 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24823 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24824 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24825 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24826 };
24827
24828 /* FMA4 and XOP. */
24829 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24830 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24831 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24832 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24833 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24834 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24835 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24836 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24837 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24838 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24839 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24840 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24841 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24842 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24843 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24844 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24845 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24846 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24847 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24848 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24849 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24850 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24851 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24852 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24853 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24854 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24855 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24856 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24857 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24858 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24859 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24860 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24861 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24862 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24863 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24864 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24865 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24866 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24867 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
24868 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
24869 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
24870 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
24871 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
24872 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
24873 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
24874 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
24875 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
24876 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
24877 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
24878 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
24879 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
24880 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
24881
24882 static const struct builtin_description bdesc_multi_arg[] =
24883 {
24884 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
24885 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
24886 UNKNOWN, (int)MULTI_ARG_3_SF },
24887 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
24888 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
24889 UNKNOWN, (int)MULTI_ARG_3_DF },
24890
24891 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
24892 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
24893 UNKNOWN, (int)MULTI_ARG_3_SF },
24894 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
24895 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
24896 UNKNOWN, (int)MULTI_ARG_3_DF },
24897 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
24898 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
24899 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24900 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
24901 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
24902 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24903
24904 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
24905 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
24906 UNKNOWN, (int)MULTI_ARG_3_SF },
24907 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
24908 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
24909 UNKNOWN, (int)MULTI_ARG_3_DF },
24910 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
24911 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
24912 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24913 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
24914 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
24915 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24916
24917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
24918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
24919 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24923 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
24924
24925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24927 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
24928 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
24929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
24930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24932
24933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
24934
24935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24947
24948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24952 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
24954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
24955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
24956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24964
24965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24971
24972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24987
24988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24995
24996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25003
25004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25011
25012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25019
25020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25027
25028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25035
25036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25043
25044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25051
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25060
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25069
25070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25074
25075 };
25076
25077 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25078 in the current target ISA to allow the user to compile particular modules
25079 with different target specific options that differ from the command line
25080 options. */
25081 static void
25082 ix86_init_mmx_sse_builtins (void)
25083 {
25084 const struct builtin_description * d;
25085 enum ix86_builtin_func_type ftype;
25086 size_t i;
25087
25088 /* Add all special builtins with variable number of operands. */
25089 for (i = 0, d = bdesc_special_args;
25090 i < ARRAY_SIZE (bdesc_special_args);
25091 i++, d++)
25092 {
25093 if (d->name == 0)
25094 continue;
25095
25096 ftype = (enum ix86_builtin_func_type) d->flag;
25097 def_builtin (d->mask, d->name, ftype, d->code);
25098 }
25099
25100 /* Add all builtins with variable number of operands. */
25101 for (i = 0, d = bdesc_args;
25102 i < ARRAY_SIZE (bdesc_args);
25103 i++, d++)
25104 {
25105 if (d->name == 0)
25106 continue;
25107
25108 ftype = (enum ix86_builtin_func_type) d->flag;
25109 def_builtin_const (d->mask, d->name, ftype, d->code);
25110 }
25111
25112 /* pcmpestr[im] insns. */
25113 for (i = 0, d = bdesc_pcmpestr;
25114 i < ARRAY_SIZE (bdesc_pcmpestr);
25115 i++, d++)
25116 {
25117 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25118 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25119 else
25120 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25121 def_builtin_const (d->mask, d->name, ftype, d->code);
25122 }
25123
25124 /* pcmpistr[im] insns. */
25125 for (i = 0, d = bdesc_pcmpistr;
25126 i < ARRAY_SIZE (bdesc_pcmpistr);
25127 i++, d++)
25128 {
25129 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25130 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25131 else
25132 ftype = INT_FTYPE_V16QI_V16QI_INT;
25133 def_builtin_const (d->mask, d->name, ftype, d->code);
25134 }
25135
25136 /* comi/ucomi insns. */
25137 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25138 {
25139 if (d->mask == OPTION_MASK_ISA_SSE2)
25140 ftype = INT_FTYPE_V2DF_V2DF;
25141 else
25142 ftype = INT_FTYPE_V4SF_V4SF;
25143 def_builtin_const (d->mask, d->name, ftype, d->code);
25144 }
25145
25146 /* SSE */
25147 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25148 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25149 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25150 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25151
25152 /* SSE or 3DNow!A */
25153 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25154 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25155 IX86_BUILTIN_MASKMOVQ);
25156
25157 /* SSE2 */
25158 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25159 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25160
25161 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25162 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25163 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25164 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25165
25166 /* SSE3. */
25167 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25168 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25169 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25170 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25171
25172 /* AES */
25173 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25174 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25175 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25176 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25177 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25178 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25179 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25180 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25181 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25182 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25183 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25184 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25185
25186 /* PCLMUL */
25187 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25188 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25189
25190 /* RDRND */
25191 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25192 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25193 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25194 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25195 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25196 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25197 IX86_BUILTIN_RDRAND64_STEP);
25198
25199 /* MMX access to the vec_init patterns. */
25200 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25201 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25202
25203 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25204 V4HI_FTYPE_HI_HI_HI_HI,
25205 IX86_BUILTIN_VEC_INIT_V4HI);
25206
25207 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25208 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25209 IX86_BUILTIN_VEC_INIT_V8QI);
25210
25211 /* Access to the vec_extract patterns. */
25212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25213 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25215 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25216 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25217 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25219 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25221 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25222
25223 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25224 "__builtin_ia32_vec_ext_v4hi",
25225 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25226
25227 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25228 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25229
25230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25231 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25232
25233 /* Access to the vec_set patterns. */
25234 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25235 "__builtin_ia32_vec_set_v2di",
25236 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25237
25238 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25239 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25240
25241 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25242 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25243
25244 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25245 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25246
25247 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25248 "__builtin_ia32_vec_set_v4hi",
25249 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25250
25251 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25252 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25253
25254 /* Add FMA4 multi-arg argument instructions */
25255 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25256 {
25257 if (d->name == 0)
25258 continue;
25259
25260 ftype = (enum ix86_builtin_func_type) d->flag;
25261 def_builtin_const (d->mask, d->name, ftype, d->code);
25262 }
25263 }
25264
25265 /* Internal method for ix86_init_builtins. */
25266
25267 static void
25268 ix86_init_builtins_va_builtins_abi (void)
25269 {
25270 tree ms_va_ref, sysv_va_ref;
25271 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25272 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25273 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25274 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25275
25276 if (!TARGET_64BIT)
25277 return;
25278 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25279 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25280 ms_va_ref = build_reference_type (ms_va_list_type_node);
25281 sysv_va_ref =
25282 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25283
25284 fnvoid_va_end_ms =
25285 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25286 fnvoid_va_start_ms =
25287 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25288 fnvoid_va_end_sysv =
25289 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25290 fnvoid_va_start_sysv =
25291 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25292 NULL_TREE);
25293 fnvoid_va_copy_ms =
25294 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25295 NULL_TREE);
25296 fnvoid_va_copy_sysv =
25297 build_function_type_list (void_type_node, sysv_va_ref,
25298 sysv_va_ref, NULL_TREE);
25299
25300 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25301 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25302 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25303 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25304 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25305 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25306 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25307 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25308 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25309 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25310 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25311 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25312 }
25313
25314 static void
25315 ix86_init_builtin_types (void)
25316 {
25317 tree float128_type_node, float80_type_node;
25318
25319 /* The __float80 type. */
25320 float80_type_node = long_double_type_node;
25321 if (TYPE_MODE (float80_type_node) != XFmode)
25322 {
25323 /* The __float80 type. */
25324 float80_type_node = make_node (REAL_TYPE);
25325
25326 TYPE_PRECISION (float80_type_node) = 80;
25327 layout_type (float80_type_node);
25328 }
25329 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25330
25331 /* The __float128 type. */
25332 float128_type_node = make_node (REAL_TYPE);
25333 TYPE_PRECISION (float128_type_node) = 128;
25334 layout_type (float128_type_node);
25335 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25336
25337 /* This macro is built by i386-builtin-types.awk. */
25338 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25339 }
25340
25341 static void
25342 ix86_init_builtins (void)
25343 {
25344 tree t;
25345
25346 ix86_init_builtin_types ();
25347
25348 /* TFmode support builtins. */
25349 def_builtin_const (0, "__builtin_infq",
25350 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25351 def_builtin_const (0, "__builtin_huge_valq",
25352 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25353
25354 /* We will expand them to normal call if SSE2 isn't available since
25355 they are used by libgcc. */
25356 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25357 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25358 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25359 TREE_READONLY (t) = 1;
25360 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25361
25362 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25363 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25364 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25365 TREE_READONLY (t) = 1;
25366 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25367
25368 ix86_init_mmx_sse_builtins ();
25369
25370 if (TARGET_64BIT)
25371 ix86_init_builtins_va_builtins_abi ();
25372
25373 #ifdef SUBTARGET_INIT_BUILTINS
25374 SUBTARGET_INIT_BUILTINS;
25375 #endif
25376 }
25377
25378 /* Return the ix86 builtin for CODE. */
25379
25380 static tree
25381 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25382 {
25383 if (code >= IX86_BUILTIN_MAX)
25384 return error_mark_node;
25385
25386 return ix86_builtins[code];
25387 }
25388
25389 /* Errors in the source file can cause expand_expr to return const0_rtx
25390 where we expect a vector. To avoid crashing, use one of the vector
25391 clear instructions. */
25392 static rtx
25393 safe_vector_operand (rtx x, enum machine_mode mode)
25394 {
25395 if (x == const0_rtx)
25396 x = CONST0_RTX (mode);
25397 return x;
25398 }
25399
25400 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25401
25402 static rtx
25403 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25404 {
25405 rtx pat;
25406 tree arg0 = CALL_EXPR_ARG (exp, 0);
25407 tree arg1 = CALL_EXPR_ARG (exp, 1);
25408 rtx op0 = expand_normal (arg0);
25409 rtx op1 = expand_normal (arg1);
25410 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25411 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25412 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25413
25414 if (VECTOR_MODE_P (mode0))
25415 op0 = safe_vector_operand (op0, mode0);
25416 if (VECTOR_MODE_P (mode1))
25417 op1 = safe_vector_operand (op1, mode1);
25418
25419 if (optimize || !target
25420 || GET_MODE (target) != tmode
25421 || !insn_data[icode].operand[0].predicate (target, tmode))
25422 target = gen_reg_rtx (tmode);
25423
25424 if (GET_MODE (op1) == SImode && mode1 == TImode)
25425 {
25426 rtx x = gen_reg_rtx (V4SImode);
25427 emit_insn (gen_sse2_loadd (x, op1));
25428 op1 = gen_lowpart (TImode, x);
25429 }
25430
25431 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25432 op0 = copy_to_mode_reg (mode0, op0);
25433 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25434 op1 = copy_to_mode_reg (mode1, op1);
25435
25436 pat = GEN_FCN (icode) (target, op0, op1);
25437 if (! pat)
25438 return 0;
25439
25440 emit_insn (pat);
25441
25442 return target;
25443 }
25444
25445 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25446
25447 static rtx
25448 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25449 enum ix86_builtin_func_type m_type,
25450 enum rtx_code sub_code)
25451 {
25452 rtx pat;
25453 int i;
25454 int nargs;
25455 bool comparison_p = false;
25456 bool tf_p = false;
25457 bool last_arg_constant = false;
25458 int num_memory = 0;
25459 struct {
25460 rtx op;
25461 enum machine_mode mode;
25462 } args[4];
25463
25464 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25465
25466 switch (m_type)
25467 {
25468 case MULTI_ARG_4_DF2_DI_I:
25469 case MULTI_ARG_4_DF2_DI_I1:
25470 case MULTI_ARG_4_SF2_SI_I:
25471 case MULTI_ARG_4_SF2_SI_I1:
25472 nargs = 4;
25473 last_arg_constant = true;
25474 break;
25475
25476 case MULTI_ARG_3_SF:
25477 case MULTI_ARG_3_DF:
25478 case MULTI_ARG_3_SF2:
25479 case MULTI_ARG_3_DF2:
25480 case MULTI_ARG_3_DI:
25481 case MULTI_ARG_3_SI:
25482 case MULTI_ARG_3_SI_DI:
25483 case MULTI_ARG_3_HI:
25484 case MULTI_ARG_3_HI_SI:
25485 case MULTI_ARG_3_QI:
25486 case MULTI_ARG_3_DI2:
25487 case MULTI_ARG_3_SI2:
25488 case MULTI_ARG_3_HI2:
25489 case MULTI_ARG_3_QI2:
25490 nargs = 3;
25491 break;
25492
25493 case MULTI_ARG_2_SF:
25494 case MULTI_ARG_2_DF:
25495 case MULTI_ARG_2_DI:
25496 case MULTI_ARG_2_SI:
25497 case MULTI_ARG_2_HI:
25498 case MULTI_ARG_2_QI:
25499 nargs = 2;
25500 break;
25501
25502 case MULTI_ARG_2_DI_IMM:
25503 case MULTI_ARG_2_SI_IMM:
25504 case MULTI_ARG_2_HI_IMM:
25505 case MULTI_ARG_2_QI_IMM:
25506 nargs = 2;
25507 last_arg_constant = true;
25508 break;
25509
25510 case MULTI_ARG_1_SF:
25511 case MULTI_ARG_1_DF:
25512 case MULTI_ARG_1_SF2:
25513 case MULTI_ARG_1_DF2:
25514 case MULTI_ARG_1_DI:
25515 case MULTI_ARG_1_SI:
25516 case MULTI_ARG_1_HI:
25517 case MULTI_ARG_1_QI:
25518 case MULTI_ARG_1_SI_DI:
25519 case MULTI_ARG_1_HI_DI:
25520 case MULTI_ARG_1_HI_SI:
25521 case MULTI_ARG_1_QI_DI:
25522 case MULTI_ARG_1_QI_SI:
25523 case MULTI_ARG_1_QI_HI:
25524 nargs = 1;
25525 break;
25526
25527 case MULTI_ARG_2_DI_CMP:
25528 case MULTI_ARG_2_SI_CMP:
25529 case MULTI_ARG_2_HI_CMP:
25530 case MULTI_ARG_2_QI_CMP:
25531 nargs = 2;
25532 comparison_p = true;
25533 break;
25534
25535 case MULTI_ARG_2_SF_TF:
25536 case MULTI_ARG_2_DF_TF:
25537 case MULTI_ARG_2_DI_TF:
25538 case MULTI_ARG_2_SI_TF:
25539 case MULTI_ARG_2_HI_TF:
25540 case MULTI_ARG_2_QI_TF:
25541 nargs = 2;
25542 tf_p = true;
25543 break;
25544
25545 default:
25546 gcc_unreachable ();
25547 }
25548
25549 if (optimize || !target
25550 || GET_MODE (target) != tmode
25551 || !insn_data[icode].operand[0].predicate (target, tmode))
25552 target = gen_reg_rtx (tmode);
25553
25554 gcc_assert (nargs <= 4);
25555
25556 for (i = 0; i < nargs; i++)
25557 {
25558 tree arg = CALL_EXPR_ARG (exp, i);
25559 rtx op = expand_normal (arg);
25560 int adjust = (comparison_p) ? 1 : 0;
25561 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25562
25563 if (last_arg_constant && i == nargs-1)
25564 {
25565 if (!CONST_INT_P (op))
25566 {
25567 error ("last argument must be an immediate");
25568 return gen_reg_rtx (tmode);
25569 }
25570 }
25571 else
25572 {
25573 if (VECTOR_MODE_P (mode))
25574 op = safe_vector_operand (op, mode);
25575
25576 /* If we aren't optimizing, only allow one memory operand to be
25577 generated. */
25578 if (memory_operand (op, mode))
25579 num_memory++;
25580
25581 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25582
25583 if (optimize
25584 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25585 || num_memory > 1)
25586 op = force_reg (mode, op);
25587 }
25588
25589 args[i].op = op;
25590 args[i].mode = mode;
25591 }
25592
25593 switch (nargs)
25594 {
25595 case 1:
25596 pat = GEN_FCN (icode) (target, args[0].op);
25597 break;
25598
25599 case 2:
25600 if (tf_p)
25601 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25602 GEN_INT ((int)sub_code));
25603 else if (! comparison_p)
25604 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25605 else
25606 {
25607 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25608 args[0].op,
25609 args[1].op);
25610
25611 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25612 }
25613 break;
25614
25615 case 3:
25616 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25617 break;
25618
25619 case 4:
25620 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25621 break;
25622
25623 default:
25624 gcc_unreachable ();
25625 }
25626
25627 if (! pat)
25628 return 0;
25629
25630 emit_insn (pat);
25631 return target;
25632 }
25633
25634 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25635 insns with vec_merge. */
25636
25637 static rtx
25638 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25639 rtx target)
25640 {
25641 rtx pat;
25642 tree arg0 = CALL_EXPR_ARG (exp, 0);
25643 rtx op1, op0 = expand_normal (arg0);
25644 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25645 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25646
25647 if (optimize || !target
25648 || GET_MODE (target) != tmode
25649 || !insn_data[icode].operand[0].predicate (target, tmode))
25650 target = gen_reg_rtx (tmode);
25651
25652 if (VECTOR_MODE_P (mode0))
25653 op0 = safe_vector_operand (op0, mode0);
25654
25655 if ((optimize && !register_operand (op0, mode0))
25656 || !insn_data[icode].operand[1].predicate (op0, mode0))
25657 op0 = copy_to_mode_reg (mode0, op0);
25658
25659 op1 = op0;
25660 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25661 op1 = copy_to_mode_reg (mode0, op1);
25662
25663 pat = GEN_FCN (icode) (target, op0, op1);
25664 if (! pat)
25665 return 0;
25666 emit_insn (pat);
25667 return target;
25668 }
25669
25670 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25671
25672 static rtx
25673 ix86_expand_sse_compare (const struct builtin_description *d,
25674 tree exp, rtx target, bool swap)
25675 {
25676 rtx pat;
25677 tree arg0 = CALL_EXPR_ARG (exp, 0);
25678 tree arg1 = CALL_EXPR_ARG (exp, 1);
25679 rtx op0 = expand_normal (arg0);
25680 rtx op1 = expand_normal (arg1);
25681 rtx op2;
25682 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25683 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25684 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25685 enum rtx_code comparison = d->comparison;
25686
25687 if (VECTOR_MODE_P (mode0))
25688 op0 = safe_vector_operand (op0, mode0);
25689 if (VECTOR_MODE_P (mode1))
25690 op1 = safe_vector_operand (op1, mode1);
25691
25692 /* Swap operands if we have a comparison that isn't available in
25693 hardware. */
25694 if (swap)
25695 {
25696 rtx tmp = gen_reg_rtx (mode1);
25697 emit_move_insn (tmp, op1);
25698 op1 = op0;
25699 op0 = tmp;
25700 }
25701
25702 if (optimize || !target
25703 || GET_MODE (target) != tmode
25704 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25705 target = gen_reg_rtx (tmode);
25706
25707 if ((optimize && !register_operand (op0, mode0))
25708 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25709 op0 = copy_to_mode_reg (mode0, op0);
25710 if ((optimize && !register_operand (op1, mode1))
25711 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25712 op1 = copy_to_mode_reg (mode1, op1);
25713
25714 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25715 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25716 if (! pat)
25717 return 0;
25718 emit_insn (pat);
25719 return target;
25720 }
25721
25722 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25723
25724 static rtx
25725 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25726 rtx target)
25727 {
25728 rtx pat;
25729 tree arg0 = CALL_EXPR_ARG (exp, 0);
25730 tree arg1 = CALL_EXPR_ARG (exp, 1);
25731 rtx op0 = expand_normal (arg0);
25732 rtx op1 = expand_normal (arg1);
25733 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25734 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25735 enum rtx_code comparison = d->comparison;
25736
25737 if (VECTOR_MODE_P (mode0))
25738 op0 = safe_vector_operand (op0, mode0);
25739 if (VECTOR_MODE_P (mode1))
25740 op1 = safe_vector_operand (op1, mode1);
25741
25742 /* Swap operands if we have a comparison that isn't available in
25743 hardware. */
25744 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25745 {
25746 rtx tmp = op1;
25747 op1 = op0;
25748 op0 = tmp;
25749 }
25750
25751 target = gen_reg_rtx (SImode);
25752 emit_move_insn (target, const0_rtx);
25753 target = gen_rtx_SUBREG (QImode, target, 0);
25754
25755 if ((optimize && !register_operand (op0, mode0))
25756 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25757 op0 = copy_to_mode_reg (mode0, op0);
25758 if ((optimize && !register_operand (op1, mode1))
25759 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25760 op1 = copy_to_mode_reg (mode1, op1);
25761
25762 pat = GEN_FCN (d->icode) (op0, op1);
25763 if (! pat)
25764 return 0;
25765 emit_insn (pat);
25766 emit_insn (gen_rtx_SET (VOIDmode,
25767 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25768 gen_rtx_fmt_ee (comparison, QImode,
25769 SET_DEST (pat),
25770 const0_rtx)));
25771
25772 return SUBREG_REG (target);
25773 }
25774
25775 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
25776
25777 static rtx
25778 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
25779 rtx target)
25780 {
25781 rtx pat;
25782 tree arg0 = CALL_EXPR_ARG (exp, 0);
25783 rtx op1, op0 = expand_normal (arg0);
25784 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25785 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25786
25787 if (optimize || target == 0
25788 || GET_MODE (target) != tmode
25789 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25790 target = gen_reg_rtx (tmode);
25791
25792 if (VECTOR_MODE_P (mode0))
25793 op0 = safe_vector_operand (op0, mode0);
25794
25795 if ((optimize && !register_operand (op0, mode0))
25796 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25797 op0 = copy_to_mode_reg (mode0, op0);
25798
25799 op1 = GEN_INT (d->comparison);
25800
25801 pat = GEN_FCN (d->icode) (target, op0, op1);
25802 if (! pat)
25803 return 0;
25804 emit_insn (pat);
25805 return target;
25806 }
25807
25808 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25809
25810 static rtx
25811 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25812 rtx target)
25813 {
25814 rtx pat;
25815 tree arg0 = CALL_EXPR_ARG (exp, 0);
25816 tree arg1 = CALL_EXPR_ARG (exp, 1);
25817 rtx op0 = expand_normal (arg0);
25818 rtx op1 = expand_normal (arg1);
25819 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25820 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25821 enum rtx_code comparison = d->comparison;
25822
25823 if (VECTOR_MODE_P (mode0))
25824 op0 = safe_vector_operand (op0, mode0);
25825 if (VECTOR_MODE_P (mode1))
25826 op1 = safe_vector_operand (op1, mode1);
25827
25828 target = gen_reg_rtx (SImode);
25829 emit_move_insn (target, const0_rtx);
25830 target = gen_rtx_SUBREG (QImode, target, 0);
25831
25832 if ((optimize && !register_operand (op0, mode0))
25833 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25834 op0 = copy_to_mode_reg (mode0, op0);
25835 if ((optimize && !register_operand (op1, mode1))
25836 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25837 op1 = copy_to_mode_reg (mode1, op1);
25838
25839 pat = GEN_FCN (d->icode) (op0, op1);
25840 if (! pat)
25841 return 0;
25842 emit_insn (pat);
25843 emit_insn (gen_rtx_SET (VOIDmode,
25844 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25845 gen_rtx_fmt_ee (comparison, QImode,
25846 SET_DEST (pat),
25847 const0_rtx)));
25848
25849 return SUBREG_REG (target);
25850 }
25851
25852 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
25853
25854 static rtx
25855 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
25856 tree exp, rtx target)
25857 {
25858 rtx pat;
25859 tree arg0 = CALL_EXPR_ARG (exp, 0);
25860 tree arg1 = CALL_EXPR_ARG (exp, 1);
25861 tree arg2 = CALL_EXPR_ARG (exp, 2);
25862 tree arg3 = CALL_EXPR_ARG (exp, 3);
25863 tree arg4 = CALL_EXPR_ARG (exp, 4);
25864 rtx scratch0, scratch1;
25865 rtx op0 = expand_normal (arg0);
25866 rtx op1 = expand_normal (arg1);
25867 rtx op2 = expand_normal (arg2);
25868 rtx op3 = expand_normal (arg3);
25869 rtx op4 = expand_normal (arg4);
25870 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
25871
25872 tmode0 = insn_data[d->icode].operand[0].mode;
25873 tmode1 = insn_data[d->icode].operand[1].mode;
25874 modev2 = insn_data[d->icode].operand[2].mode;
25875 modei3 = insn_data[d->icode].operand[3].mode;
25876 modev4 = insn_data[d->icode].operand[4].mode;
25877 modei5 = insn_data[d->icode].operand[5].mode;
25878 modeimm = insn_data[d->icode].operand[6].mode;
25879
25880 if (VECTOR_MODE_P (modev2))
25881 op0 = safe_vector_operand (op0, modev2);
25882 if (VECTOR_MODE_P (modev4))
25883 op2 = safe_vector_operand (op2, modev4);
25884
25885 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25886 op0 = copy_to_mode_reg (modev2, op0);
25887 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
25888 op1 = copy_to_mode_reg (modei3, op1);
25889 if ((optimize && !register_operand (op2, modev4))
25890 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
25891 op2 = copy_to_mode_reg (modev4, op2);
25892 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
25893 op3 = copy_to_mode_reg (modei5, op3);
25894
25895 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
25896 {
25897 error ("the fifth argument must be a 8-bit immediate");
25898 return const0_rtx;
25899 }
25900
25901 if (d->code == IX86_BUILTIN_PCMPESTRI128)
25902 {
25903 if (optimize || !target
25904 || GET_MODE (target) != tmode0
25905 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25906 target = gen_reg_rtx (tmode0);
25907
25908 scratch1 = gen_reg_rtx (tmode1);
25909
25910 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
25911 }
25912 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
25913 {
25914 if (optimize || !target
25915 || GET_MODE (target) != tmode1
25916 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25917 target = gen_reg_rtx (tmode1);
25918
25919 scratch0 = gen_reg_rtx (tmode0);
25920
25921 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
25922 }
25923 else
25924 {
25925 gcc_assert (d->flag);
25926
25927 scratch0 = gen_reg_rtx (tmode0);
25928 scratch1 = gen_reg_rtx (tmode1);
25929
25930 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
25931 }
25932
25933 if (! pat)
25934 return 0;
25935
25936 emit_insn (pat);
25937
25938 if (d->flag)
25939 {
25940 target = gen_reg_rtx (SImode);
25941 emit_move_insn (target, const0_rtx);
25942 target = gen_rtx_SUBREG (QImode, target, 0);
25943
25944 emit_insn
25945 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25946 gen_rtx_fmt_ee (EQ, QImode,
25947 gen_rtx_REG ((enum machine_mode) d->flag,
25948 FLAGS_REG),
25949 const0_rtx)));
25950 return SUBREG_REG (target);
25951 }
25952 else
25953 return target;
25954 }
25955
25956
25957 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
25958
25959 static rtx
25960 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
25961 tree exp, rtx target)
25962 {
25963 rtx pat;
25964 tree arg0 = CALL_EXPR_ARG (exp, 0);
25965 tree arg1 = CALL_EXPR_ARG (exp, 1);
25966 tree arg2 = CALL_EXPR_ARG (exp, 2);
25967 rtx scratch0, scratch1;
25968 rtx op0 = expand_normal (arg0);
25969 rtx op1 = expand_normal (arg1);
25970 rtx op2 = expand_normal (arg2);
25971 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
25972
25973 tmode0 = insn_data[d->icode].operand[0].mode;
25974 tmode1 = insn_data[d->icode].operand[1].mode;
25975 modev2 = insn_data[d->icode].operand[2].mode;
25976 modev3 = insn_data[d->icode].operand[3].mode;
25977 modeimm = insn_data[d->icode].operand[4].mode;
25978
25979 if (VECTOR_MODE_P (modev2))
25980 op0 = safe_vector_operand (op0, modev2);
25981 if (VECTOR_MODE_P (modev3))
25982 op1 = safe_vector_operand (op1, modev3);
25983
25984 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25985 op0 = copy_to_mode_reg (modev2, op0);
25986 if ((optimize && !register_operand (op1, modev3))
25987 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
25988 op1 = copy_to_mode_reg (modev3, op1);
25989
25990 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
25991 {
25992 error ("the third argument must be a 8-bit immediate");
25993 return const0_rtx;
25994 }
25995
25996 if (d->code == IX86_BUILTIN_PCMPISTRI128)
25997 {
25998 if (optimize || !target
25999 || GET_MODE (target) != tmode0
26000 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26001 target = gen_reg_rtx (tmode0);
26002
26003 scratch1 = gen_reg_rtx (tmode1);
26004
26005 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26006 }
26007 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26008 {
26009 if (optimize || !target
26010 || GET_MODE (target) != tmode1
26011 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26012 target = gen_reg_rtx (tmode1);
26013
26014 scratch0 = gen_reg_rtx (tmode0);
26015
26016 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26017 }
26018 else
26019 {
26020 gcc_assert (d->flag);
26021
26022 scratch0 = gen_reg_rtx (tmode0);
26023 scratch1 = gen_reg_rtx (tmode1);
26024
26025 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26026 }
26027
26028 if (! pat)
26029 return 0;
26030
26031 emit_insn (pat);
26032
26033 if (d->flag)
26034 {
26035 target = gen_reg_rtx (SImode);
26036 emit_move_insn (target, const0_rtx);
26037 target = gen_rtx_SUBREG (QImode, target, 0);
26038
26039 emit_insn
26040 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26041 gen_rtx_fmt_ee (EQ, QImode,
26042 gen_rtx_REG ((enum machine_mode) d->flag,
26043 FLAGS_REG),
26044 const0_rtx)));
26045 return SUBREG_REG (target);
26046 }
26047 else
26048 return target;
26049 }
26050
26051 /* Subroutine of ix86_expand_builtin to take care of insns with
26052 variable number of operands. */
26053
26054 static rtx
26055 ix86_expand_args_builtin (const struct builtin_description *d,
26056 tree exp, rtx target)
26057 {
26058 rtx pat, real_target;
26059 unsigned int i, nargs;
26060 unsigned int nargs_constant = 0;
26061 int num_memory = 0;
26062 struct
26063 {
26064 rtx op;
26065 enum machine_mode mode;
26066 } args[4];
26067 bool last_arg_count = false;
26068 enum insn_code icode = d->icode;
26069 const struct insn_data_d *insn_p = &insn_data[icode];
26070 enum machine_mode tmode = insn_p->operand[0].mode;
26071 enum machine_mode rmode = VOIDmode;
26072 bool swap = false;
26073 enum rtx_code comparison = d->comparison;
26074
26075 switch ((enum ix86_builtin_func_type) d->flag)
26076 {
26077 case V2DF_FTYPE_V2DF_ROUND:
26078 case V4DF_FTYPE_V4DF_ROUND:
26079 case V4SF_FTYPE_V4SF_ROUND:
26080 case V8SF_FTYPE_V8SF_ROUND:
26081 return ix86_expand_sse_round (d, exp, target);
26082 case INT_FTYPE_V8SF_V8SF_PTEST:
26083 case INT_FTYPE_V4DI_V4DI_PTEST:
26084 case INT_FTYPE_V4DF_V4DF_PTEST:
26085 case INT_FTYPE_V4SF_V4SF_PTEST:
26086 case INT_FTYPE_V2DI_V2DI_PTEST:
26087 case INT_FTYPE_V2DF_V2DF_PTEST:
26088 return ix86_expand_sse_ptest (d, exp, target);
26089 case FLOAT128_FTYPE_FLOAT128:
26090 case FLOAT_FTYPE_FLOAT:
26091 case INT_FTYPE_INT:
26092 case UINT64_FTYPE_INT:
26093 case UINT16_FTYPE_UINT16:
26094 case INT64_FTYPE_INT64:
26095 case INT64_FTYPE_V4SF:
26096 case INT64_FTYPE_V2DF:
26097 case INT_FTYPE_V16QI:
26098 case INT_FTYPE_V8QI:
26099 case INT_FTYPE_V8SF:
26100 case INT_FTYPE_V4DF:
26101 case INT_FTYPE_V4SF:
26102 case INT_FTYPE_V2DF:
26103 case V16QI_FTYPE_V16QI:
26104 case V8SI_FTYPE_V8SF:
26105 case V8SI_FTYPE_V4SI:
26106 case V8HI_FTYPE_V8HI:
26107 case V8HI_FTYPE_V16QI:
26108 case V8QI_FTYPE_V8QI:
26109 case V8SF_FTYPE_V8SF:
26110 case V8SF_FTYPE_V8SI:
26111 case V8SF_FTYPE_V4SF:
26112 case V8SF_FTYPE_V8HI:
26113 case V4SI_FTYPE_V4SI:
26114 case V4SI_FTYPE_V16QI:
26115 case V4SI_FTYPE_V4SF:
26116 case V4SI_FTYPE_V8SI:
26117 case V4SI_FTYPE_V8HI:
26118 case V4SI_FTYPE_V4DF:
26119 case V4SI_FTYPE_V2DF:
26120 case V4HI_FTYPE_V4HI:
26121 case V4DF_FTYPE_V4DF:
26122 case V4DF_FTYPE_V4SI:
26123 case V4DF_FTYPE_V4SF:
26124 case V4DF_FTYPE_V2DF:
26125 case V4SF_FTYPE_V4SF:
26126 case V4SF_FTYPE_V4SI:
26127 case V4SF_FTYPE_V8SF:
26128 case V4SF_FTYPE_V4DF:
26129 case V4SF_FTYPE_V8HI:
26130 case V4SF_FTYPE_V2DF:
26131 case V2DI_FTYPE_V2DI:
26132 case V2DI_FTYPE_V16QI:
26133 case V2DI_FTYPE_V8HI:
26134 case V2DI_FTYPE_V4SI:
26135 case V2DF_FTYPE_V2DF:
26136 case V2DF_FTYPE_V4SI:
26137 case V2DF_FTYPE_V4DF:
26138 case V2DF_FTYPE_V4SF:
26139 case V2DF_FTYPE_V2SI:
26140 case V2SI_FTYPE_V2SI:
26141 case V2SI_FTYPE_V4SF:
26142 case V2SI_FTYPE_V2SF:
26143 case V2SI_FTYPE_V2DF:
26144 case V2SF_FTYPE_V2SF:
26145 case V2SF_FTYPE_V2SI:
26146 nargs = 1;
26147 break;
26148 case V4SF_FTYPE_V4SF_VEC_MERGE:
26149 case V2DF_FTYPE_V2DF_VEC_MERGE:
26150 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26151 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26152 case V16QI_FTYPE_V16QI_V16QI:
26153 case V16QI_FTYPE_V8HI_V8HI:
26154 case V8QI_FTYPE_V8QI_V8QI:
26155 case V8QI_FTYPE_V4HI_V4HI:
26156 case V8HI_FTYPE_V8HI_V8HI:
26157 case V8HI_FTYPE_V16QI_V16QI:
26158 case V8HI_FTYPE_V4SI_V4SI:
26159 case V8SF_FTYPE_V8SF_V8SF:
26160 case V8SF_FTYPE_V8SF_V8SI:
26161 case V4SI_FTYPE_V4SI_V4SI:
26162 case V4SI_FTYPE_V8HI_V8HI:
26163 case V4SI_FTYPE_V4SF_V4SF:
26164 case V4SI_FTYPE_V2DF_V2DF:
26165 case V4HI_FTYPE_V4HI_V4HI:
26166 case V4HI_FTYPE_V8QI_V8QI:
26167 case V4HI_FTYPE_V2SI_V2SI:
26168 case V4DF_FTYPE_V4DF_V4DF:
26169 case V4DF_FTYPE_V4DF_V4DI:
26170 case V4SF_FTYPE_V4SF_V4SF:
26171 case V4SF_FTYPE_V4SF_V4SI:
26172 case V4SF_FTYPE_V4SF_V2SI:
26173 case V4SF_FTYPE_V4SF_V2DF:
26174 case V4SF_FTYPE_V4SF_DI:
26175 case V4SF_FTYPE_V4SF_SI:
26176 case V2DI_FTYPE_V2DI_V2DI:
26177 case V2DI_FTYPE_V16QI_V16QI:
26178 case V2DI_FTYPE_V4SI_V4SI:
26179 case V2DI_FTYPE_V2DI_V16QI:
26180 case V2DI_FTYPE_V2DF_V2DF:
26181 case V2SI_FTYPE_V2SI_V2SI:
26182 case V2SI_FTYPE_V4HI_V4HI:
26183 case V2SI_FTYPE_V2SF_V2SF:
26184 case V2DF_FTYPE_V2DF_V2DF:
26185 case V2DF_FTYPE_V2DF_V4SF:
26186 case V2DF_FTYPE_V2DF_V2DI:
26187 case V2DF_FTYPE_V2DF_DI:
26188 case V2DF_FTYPE_V2DF_SI:
26189 case V2SF_FTYPE_V2SF_V2SF:
26190 case V1DI_FTYPE_V1DI_V1DI:
26191 case V1DI_FTYPE_V8QI_V8QI:
26192 case V1DI_FTYPE_V2SI_V2SI:
26193 if (comparison == UNKNOWN)
26194 return ix86_expand_binop_builtin (icode, exp, target);
26195 nargs = 2;
26196 break;
26197 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26198 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26199 gcc_assert (comparison != UNKNOWN);
26200 nargs = 2;
26201 swap = true;
26202 break;
26203 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26204 case V8HI_FTYPE_V8HI_SI_COUNT:
26205 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26206 case V4SI_FTYPE_V4SI_SI_COUNT:
26207 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26208 case V4HI_FTYPE_V4HI_SI_COUNT:
26209 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26210 case V2DI_FTYPE_V2DI_SI_COUNT:
26211 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26212 case V2SI_FTYPE_V2SI_SI_COUNT:
26213 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26214 case V1DI_FTYPE_V1DI_SI_COUNT:
26215 nargs = 2;
26216 last_arg_count = true;
26217 break;
26218 case UINT64_FTYPE_UINT64_UINT64:
26219 case UINT_FTYPE_UINT_UINT:
26220 case UINT_FTYPE_UINT_USHORT:
26221 case UINT_FTYPE_UINT_UCHAR:
26222 case UINT16_FTYPE_UINT16_INT:
26223 case UINT8_FTYPE_UINT8_INT:
26224 nargs = 2;
26225 break;
26226 case V2DI_FTYPE_V2DI_INT_CONVERT:
26227 nargs = 2;
26228 rmode = V1TImode;
26229 nargs_constant = 1;
26230 break;
26231 case V8HI_FTYPE_V8HI_INT:
26232 case V8HI_FTYPE_V8SF_INT:
26233 case V8HI_FTYPE_V4SF_INT:
26234 case V8SF_FTYPE_V8SF_INT:
26235 case V4SI_FTYPE_V4SI_INT:
26236 case V4SI_FTYPE_V8SI_INT:
26237 case V4HI_FTYPE_V4HI_INT:
26238 case V4DF_FTYPE_V4DF_INT:
26239 case V4SF_FTYPE_V4SF_INT:
26240 case V4SF_FTYPE_V8SF_INT:
26241 case V2DI_FTYPE_V2DI_INT:
26242 case V2DF_FTYPE_V2DF_INT:
26243 case V2DF_FTYPE_V4DF_INT:
26244 nargs = 2;
26245 nargs_constant = 1;
26246 break;
26247 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26248 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26249 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26250 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26251 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26252 nargs = 3;
26253 break;
26254 case V16QI_FTYPE_V16QI_V16QI_INT:
26255 case V8HI_FTYPE_V8HI_V8HI_INT:
26256 case V8SI_FTYPE_V8SI_V8SI_INT:
26257 case V8SI_FTYPE_V8SI_V4SI_INT:
26258 case V8SF_FTYPE_V8SF_V8SF_INT:
26259 case V8SF_FTYPE_V8SF_V4SF_INT:
26260 case V4SI_FTYPE_V4SI_V4SI_INT:
26261 case V4DF_FTYPE_V4DF_V4DF_INT:
26262 case V4DF_FTYPE_V4DF_V2DF_INT:
26263 case V4SF_FTYPE_V4SF_V4SF_INT:
26264 case V2DI_FTYPE_V2DI_V2DI_INT:
26265 case V2DF_FTYPE_V2DF_V2DF_INT:
26266 nargs = 3;
26267 nargs_constant = 1;
26268 break;
26269 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26270 nargs = 3;
26271 rmode = V2DImode;
26272 nargs_constant = 1;
26273 break;
26274 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26275 nargs = 3;
26276 rmode = DImode;
26277 nargs_constant = 1;
26278 break;
26279 case V2DI_FTYPE_V2DI_UINT_UINT:
26280 nargs = 3;
26281 nargs_constant = 2;
26282 break;
26283 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26284 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26285 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26286 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26287 nargs = 4;
26288 nargs_constant = 1;
26289 break;
26290 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26291 nargs = 4;
26292 nargs_constant = 2;
26293 break;
26294 default:
26295 gcc_unreachable ();
26296 }
26297
26298 gcc_assert (nargs <= ARRAY_SIZE (args));
26299
26300 if (comparison != UNKNOWN)
26301 {
26302 gcc_assert (nargs == 2);
26303 return ix86_expand_sse_compare (d, exp, target, swap);
26304 }
26305
26306 if (rmode == VOIDmode || rmode == tmode)
26307 {
26308 if (optimize
26309 || target == 0
26310 || GET_MODE (target) != tmode
26311 || !insn_p->operand[0].predicate (target, tmode))
26312 target = gen_reg_rtx (tmode);
26313 real_target = target;
26314 }
26315 else
26316 {
26317 target = gen_reg_rtx (rmode);
26318 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26319 }
26320
26321 for (i = 0; i < nargs; i++)
26322 {
26323 tree arg = CALL_EXPR_ARG (exp, i);
26324 rtx op = expand_normal (arg);
26325 enum machine_mode mode = insn_p->operand[i + 1].mode;
26326 bool match = insn_p->operand[i + 1].predicate (op, mode);
26327
26328 if (last_arg_count && (i + 1) == nargs)
26329 {
26330 /* SIMD shift insns take either an 8-bit immediate or
26331 register as count. But builtin functions take int as
26332 count. If count doesn't match, we put it in register. */
26333 if (!match)
26334 {
26335 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26336 if (!insn_p->operand[i + 1].predicate (op, mode))
26337 op = copy_to_reg (op);
26338 }
26339 }
26340 else if ((nargs - i) <= nargs_constant)
26341 {
26342 if (!match)
26343 switch (icode)
26344 {
26345 case CODE_FOR_sse4_1_roundpd:
26346 case CODE_FOR_sse4_1_roundps:
26347 case CODE_FOR_sse4_1_roundsd:
26348 case CODE_FOR_sse4_1_roundss:
26349 case CODE_FOR_sse4_1_blendps:
26350 case CODE_FOR_avx_blendpd256:
26351 case CODE_FOR_avx_vpermilv4df:
26352 case CODE_FOR_avx_roundpd256:
26353 case CODE_FOR_avx_roundps256:
26354 error ("the last argument must be a 4-bit immediate");
26355 return const0_rtx;
26356
26357 case CODE_FOR_sse4_1_blendpd:
26358 case CODE_FOR_avx_vpermilv2df:
26359 case CODE_FOR_xop_vpermil2v2df3:
26360 case CODE_FOR_xop_vpermil2v4sf3:
26361 case CODE_FOR_xop_vpermil2v4df3:
26362 case CODE_FOR_xop_vpermil2v8sf3:
26363 error ("the last argument must be a 2-bit immediate");
26364 return const0_rtx;
26365
26366 case CODE_FOR_avx_vextractf128v4df:
26367 case CODE_FOR_avx_vextractf128v8sf:
26368 case CODE_FOR_avx_vextractf128v8si:
26369 case CODE_FOR_avx_vinsertf128v4df:
26370 case CODE_FOR_avx_vinsertf128v8sf:
26371 case CODE_FOR_avx_vinsertf128v8si:
26372 error ("the last argument must be a 1-bit immediate");
26373 return const0_rtx;
26374
26375 case CODE_FOR_avx_vmcmpv2df3:
26376 case CODE_FOR_avx_vmcmpv4sf3:
26377 case CODE_FOR_avx_cmpv2df3:
26378 case CODE_FOR_avx_cmpv4sf3:
26379 case CODE_FOR_avx_cmpv4df3:
26380 case CODE_FOR_avx_cmpv8sf3:
26381 error ("the last argument must be a 5-bit immediate");
26382 return const0_rtx;
26383
26384 default:
26385 switch (nargs_constant)
26386 {
26387 case 2:
26388 if ((nargs - i) == nargs_constant)
26389 {
26390 error ("the next to last argument must be an 8-bit immediate");
26391 break;
26392 }
26393 case 1:
26394 error ("the last argument must be an 8-bit immediate");
26395 break;
26396 default:
26397 gcc_unreachable ();
26398 }
26399 return const0_rtx;
26400 }
26401 }
26402 else
26403 {
26404 if (VECTOR_MODE_P (mode))
26405 op = safe_vector_operand (op, mode);
26406
26407 /* If we aren't optimizing, only allow one memory operand to
26408 be generated. */
26409 if (memory_operand (op, mode))
26410 num_memory++;
26411
26412 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26413 {
26414 if (optimize || !match || num_memory > 1)
26415 op = copy_to_mode_reg (mode, op);
26416 }
26417 else
26418 {
26419 op = copy_to_reg (op);
26420 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26421 }
26422 }
26423
26424 args[i].op = op;
26425 args[i].mode = mode;
26426 }
26427
26428 switch (nargs)
26429 {
26430 case 1:
26431 pat = GEN_FCN (icode) (real_target, args[0].op);
26432 break;
26433 case 2:
26434 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26435 break;
26436 case 3:
26437 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26438 args[2].op);
26439 break;
26440 case 4:
26441 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26442 args[2].op, args[3].op);
26443 break;
26444 default:
26445 gcc_unreachable ();
26446 }
26447
26448 if (! pat)
26449 return 0;
26450
26451 emit_insn (pat);
26452 return target;
26453 }
26454
26455 /* Subroutine of ix86_expand_builtin to take care of special insns
26456 with variable number of operands. */
26457
26458 static rtx
26459 ix86_expand_special_args_builtin (const struct builtin_description *d,
26460 tree exp, rtx target)
26461 {
26462 tree arg;
26463 rtx pat, op;
26464 unsigned int i, nargs, arg_adjust, memory;
26465 struct
26466 {
26467 rtx op;
26468 enum machine_mode mode;
26469 } args[3];
26470 enum insn_code icode = d->icode;
26471 bool last_arg_constant = false;
26472 const struct insn_data_d *insn_p = &insn_data[icode];
26473 enum machine_mode tmode = insn_p->operand[0].mode;
26474 enum { load, store } klass;
26475
26476 switch ((enum ix86_builtin_func_type) d->flag)
26477 {
26478 case VOID_FTYPE_VOID:
26479 if (icode == CODE_FOR_avx_vzeroupper)
26480 target = GEN_INT (vzeroupper_intrinsic);
26481 emit_insn (GEN_FCN (icode) (target));
26482 return 0;
26483 case VOID_FTYPE_UINT64:
26484 case VOID_FTYPE_UNSIGNED:
26485 nargs = 0;
26486 klass = store;
26487 memory = 0;
26488 break;
26489 break;
26490 case UINT64_FTYPE_VOID:
26491 case UNSIGNED_FTYPE_VOID:
26492 nargs = 0;
26493 klass = load;
26494 memory = 0;
26495 break;
26496 case UINT64_FTYPE_PUNSIGNED:
26497 case V2DI_FTYPE_PV2DI:
26498 case V32QI_FTYPE_PCCHAR:
26499 case V16QI_FTYPE_PCCHAR:
26500 case V8SF_FTYPE_PCV4SF:
26501 case V8SF_FTYPE_PCFLOAT:
26502 case V4SF_FTYPE_PCFLOAT:
26503 case V4DF_FTYPE_PCV2DF:
26504 case V4DF_FTYPE_PCDOUBLE:
26505 case V2DF_FTYPE_PCDOUBLE:
26506 case VOID_FTYPE_PVOID:
26507 nargs = 1;
26508 klass = load;
26509 memory = 0;
26510 break;
26511 case VOID_FTYPE_PV2SF_V4SF:
26512 case VOID_FTYPE_PV4DI_V4DI:
26513 case VOID_FTYPE_PV2DI_V2DI:
26514 case VOID_FTYPE_PCHAR_V32QI:
26515 case VOID_FTYPE_PCHAR_V16QI:
26516 case VOID_FTYPE_PFLOAT_V8SF:
26517 case VOID_FTYPE_PFLOAT_V4SF:
26518 case VOID_FTYPE_PDOUBLE_V4DF:
26519 case VOID_FTYPE_PDOUBLE_V2DF:
26520 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26521 case VOID_FTYPE_PINT_INT:
26522 nargs = 1;
26523 klass = store;
26524 /* Reserve memory operand for target. */
26525 memory = ARRAY_SIZE (args);
26526 break;
26527 case V4SF_FTYPE_V4SF_PCV2SF:
26528 case V2DF_FTYPE_V2DF_PCDOUBLE:
26529 nargs = 2;
26530 klass = load;
26531 memory = 1;
26532 break;
26533 case V8SF_FTYPE_PCV8SF_V8SI:
26534 case V4DF_FTYPE_PCV4DF_V4DI:
26535 case V4SF_FTYPE_PCV4SF_V4SI:
26536 case V2DF_FTYPE_PCV2DF_V2DI:
26537 nargs = 2;
26538 klass = load;
26539 memory = 0;
26540 break;
26541 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26542 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26543 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26544 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26545 nargs = 2;
26546 klass = store;
26547 /* Reserve memory operand for target. */
26548 memory = ARRAY_SIZE (args);
26549 break;
26550 case VOID_FTYPE_UINT_UINT_UINT:
26551 case VOID_FTYPE_UINT64_UINT_UINT:
26552 case UCHAR_FTYPE_UINT_UINT_UINT:
26553 case UCHAR_FTYPE_UINT64_UINT_UINT:
26554 nargs = 3;
26555 klass = load;
26556 memory = ARRAY_SIZE (args);
26557 last_arg_constant = true;
26558 break;
26559 default:
26560 gcc_unreachable ();
26561 }
26562
26563 gcc_assert (nargs <= ARRAY_SIZE (args));
26564
26565 if (klass == store)
26566 {
26567 arg = CALL_EXPR_ARG (exp, 0);
26568 op = expand_normal (arg);
26569 gcc_assert (target == 0);
26570 if (memory)
26571 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26572 else
26573 target = force_reg (tmode, op);
26574 arg_adjust = 1;
26575 }
26576 else
26577 {
26578 arg_adjust = 0;
26579 if (optimize
26580 || target == 0
26581 || GET_MODE (target) != tmode
26582 || !insn_p->operand[0].predicate (target, tmode))
26583 target = gen_reg_rtx (tmode);
26584 }
26585
26586 for (i = 0; i < nargs; i++)
26587 {
26588 enum machine_mode mode = insn_p->operand[i + 1].mode;
26589 bool match;
26590
26591 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26592 op = expand_normal (arg);
26593 match = insn_p->operand[i + 1].predicate (op, mode);
26594
26595 if (last_arg_constant && (i + 1) == nargs)
26596 {
26597 if (!match)
26598 {
26599 if (icode == CODE_FOR_lwp_lwpvalsi3
26600 || icode == CODE_FOR_lwp_lwpinssi3
26601 || icode == CODE_FOR_lwp_lwpvaldi3
26602 || icode == CODE_FOR_lwp_lwpinsdi3)
26603 error ("the last argument must be a 32-bit immediate");
26604 else
26605 error ("the last argument must be an 8-bit immediate");
26606 return const0_rtx;
26607 }
26608 }
26609 else
26610 {
26611 if (i == memory)
26612 {
26613 /* This must be the memory operand. */
26614 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26615 gcc_assert (GET_MODE (op) == mode
26616 || GET_MODE (op) == VOIDmode);
26617 }
26618 else
26619 {
26620 /* This must be register. */
26621 if (VECTOR_MODE_P (mode))
26622 op = safe_vector_operand (op, mode);
26623
26624 gcc_assert (GET_MODE (op) == mode
26625 || GET_MODE (op) == VOIDmode);
26626 op = copy_to_mode_reg (mode, op);
26627 }
26628 }
26629
26630 args[i].op = op;
26631 args[i].mode = mode;
26632 }
26633
26634 switch (nargs)
26635 {
26636 case 0:
26637 pat = GEN_FCN (icode) (target);
26638 break;
26639 case 1:
26640 pat = GEN_FCN (icode) (target, args[0].op);
26641 break;
26642 case 2:
26643 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26644 break;
26645 case 3:
26646 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26647 break;
26648 default:
26649 gcc_unreachable ();
26650 }
26651
26652 if (! pat)
26653 return 0;
26654 emit_insn (pat);
26655 return klass == store ? 0 : target;
26656 }
26657
26658 /* Return the integer constant in ARG. Constrain it to be in the range
26659 of the subparts of VEC_TYPE; issue an error if not. */
26660
26661 static int
26662 get_element_number (tree vec_type, tree arg)
26663 {
26664 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26665
26666 if (!host_integerp (arg, 1)
26667 || (elt = tree_low_cst (arg, 1), elt > max))
26668 {
26669 error ("selector must be an integer constant in the range 0..%wi", max);
26670 return 0;
26671 }
26672
26673 return elt;
26674 }
26675
26676 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26677 ix86_expand_vector_init. We DO have language-level syntax for this, in
26678 the form of (type){ init-list }. Except that since we can't place emms
26679 instructions from inside the compiler, we can't allow the use of MMX
26680 registers unless the user explicitly asks for it. So we do *not* define
26681 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26682 we have builtins invoked by mmintrin.h that gives us license to emit
26683 these sorts of instructions. */
26684
26685 static rtx
26686 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26687 {
26688 enum machine_mode tmode = TYPE_MODE (type);
26689 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26690 int i, n_elt = GET_MODE_NUNITS (tmode);
26691 rtvec v = rtvec_alloc (n_elt);
26692
26693 gcc_assert (VECTOR_MODE_P (tmode));
26694 gcc_assert (call_expr_nargs (exp) == n_elt);
26695
26696 for (i = 0; i < n_elt; ++i)
26697 {
26698 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26699 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26700 }
26701
26702 if (!target || !register_operand (target, tmode))
26703 target = gen_reg_rtx (tmode);
26704
26705 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26706 return target;
26707 }
26708
26709 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26710 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26711 had a language-level syntax for referencing vector elements. */
26712
26713 static rtx
26714 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26715 {
26716 enum machine_mode tmode, mode0;
26717 tree arg0, arg1;
26718 int elt;
26719 rtx op0;
26720
26721 arg0 = CALL_EXPR_ARG (exp, 0);
26722 arg1 = CALL_EXPR_ARG (exp, 1);
26723
26724 op0 = expand_normal (arg0);
26725 elt = get_element_number (TREE_TYPE (arg0), arg1);
26726
26727 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26728 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26729 gcc_assert (VECTOR_MODE_P (mode0));
26730
26731 op0 = force_reg (mode0, op0);
26732
26733 if (optimize || !target || !register_operand (target, tmode))
26734 target = gen_reg_rtx (tmode);
26735
26736 ix86_expand_vector_extract (true, target, op0, elt);
26737
26738 return target;
26739 }
26740
26741 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26742 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26743 a language-level syntax for referencing vector elements. */
26744
26745 static rtx
26746 ix86_expand_vec_set_builtin (tree exp)
26747 {
26748 enum machine_mode tmode, mode1;
26749 tree arg0, arg1, arg2;
26750 int elt;
26751 rtx op0, op1, target;
26752
26753 arg0 = CALL_EXPR_ARG (exp, 0);
26754 arg1 = CALL_EXPR_ARG (exp, 1);
26755 arg2 = CALL_EXPR_ARG (exp, 2);
26756
26757 tmode = TYPE_MODE (TREE_TYPE (arg0));
26758 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26759 gcc_assert (VECTOR_MODE_P (tmode));
26760
26761 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26762 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26763 elt = get_element_number (TREE_TYPE (arg0), arg2);
26764
26765 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26766 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26767
26768 op0 = force_reg (tmode, op0);
26769 op1 = force_reg (mode1, op1);
26770
26771 /* OP0 is the source of these builtin functions and shouldn't be
26772 modified. Create a copy, use it and return it as target. */
26773 target = gen_reg_rtx (tmode);
26774 emit_move_insn (target, op0);
26775 ix86_expand_vector_set (true, target, op1, elt);
26776
26777 return target;
26778 }
26779
26780 /* Expand an expression EXP that calls a built-in function,
26781 with result going to TARGET if that's convenient
26782 (and in mode MODE if that's convenient).
26783 SUBTARGET may be used as the target for computing one of EXP's operands.
26784 IGNORE is nonzero if the value is to be ignored. */
26785
26786 static rtx
26787 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26788 enum machine_mode mode ATTRIBUTE_UNUSED,
26789 int ignore ATTRIBUTE_UNUSED)
26790 {
26791 const struct builtin_description *d;
26792 size_t i;
26793 enum insn_code icode;
26794 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26795 tree arg0, arg1, arg2;
26796 rtx op0, op1, op2, pat;
26797 enum machine_mode mode0, mode1, mode2;
26798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26799
26800 /* Determine whether the builtin function is available under the current ISA.
26801 Originally the builtin was not created if it wasn't applicable to the
26802 current ISA based on the command line switches. With function specific
26803 options, we need to check in the context of the function making the call
26804 whether it is supported. */
26805 if (ix86_builtins_isa[fcode].isa
26806 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26807 {
26808 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26809 NULL, (enum fpmath_unit) 0, false);
26810
26811 if (!opts)
26812 error ("%qE needs unknown isa option", fndecl);
26813 else
26814 {
26815 gcc_assert (opts != NULL);
26816 error ("%qE needs isa option %s", fndecl, opts);
26817 free (opts);
26818 }
26819 return const0_rtx;
26820 }
26821
26822 switch (fcode)
26823 {
26824 case IX86_BUILTIN_MASKMOVQ:
26825 case IX86_BUILTIN_MASKMOVDQU:
26826 icode = (fcode == IX86_BUILTIN_MASKMOVQ
26827 ? CODE_FOR_mmx_maskmovq
26828 : CODE_FOR_sse2_maskmovdqu);
26829 /* Note the arg order is different from the operand order. */
26830 arg1 = CALL_EXPR_ARG (exp, 0);
26831 arg2 = CALL_EXPR_ARG (exp, 1);
26832 arg0 = CALL_EXPR_ARG (exp, 2);
26833 op0 = expand_normal (arg0);
26834 op1 = expand_normal (arg1);
26835 op2 = expand_normal (arg2);
26836 mode0 = insn_data[icode].operand[0].mode;
26837 mode1 = insn_data[icode].operand[1].mode;
26838 mode2 = insn_data[icode].operand[2].mode;
26839
26840 op0 = force_reg (Pmode, op0);
26841 op0 = gen_rtx_MEM (mode1, op0);
26842
26843 if (!insn_data[icode].operand[0].predicate (op0, mode0))
26844 op0 = copy_to_mode_reg (mode0, op0);
26845 if (!insn_data[icode].operand[1].predicate (op1, mode1))
26846 op1 = copy_to_mode_reg (mode1, op1);
26847 if (!insn_data[icode].operand[2].predicate (op2, mode2))
26848 op2 = copy_to_mode_reg (mode2, op2);
26849 pat = GEN_FCN (icode) (op0, op1, op2);
26850 if (! pat)
26851 return 0;
26852 emit_insn (pat);
26853 return 0;
26854
26855 case IX86_BUILTIN_LDMXCSR:
26856 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
26857 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26858 emit_move_insn (target, op0);
26859 emit_insn (gen_sse_ldmxcsr (target));
26860 return 0;
26861
26862 case IX86_BUILTIN_STMXCSR:
26863 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26864 emit_insn (gen_sse_stmxcsr (target));
26865 return copy_to_mode_reg (SImode, target);
26866
26867 case IX86_BUILTIN_CLFLUSH:
26868 arg0 = CALL_EXPR_ARG (exp, 0);
26869 op0 = expand_normal (arg0);
26870 icode = CODE_FOR_sse2_clflush;
26871 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26872 op0 = copy_to_mode_reg (Pmode, op0);
26873
26874 emit_insn (gen_sse2_clflush (op0));
26875 return 0;
26876
26877 case IX86_BUILTIN_MONITOR:
26878 arg0 = CALL_EXPR_ARG (exp, 0);
26879 arg1 = CALL_EXPR_ARG (exp, 1);
26880 arg2 = CALL_EXPR_ARG (exp, 2);
26881 op0 = expand_normal (arg0);
26882 op1 = expand_normal (arg1);
26883 op2 = expand_normal (arg2);
26884 if (!REG_P (op0))
26885 op0 = copy_to_mode_reg (Pmode, op0);
26886 if (!REG_P (op1))
26887 op1 = copy_to_mode_reg (SImode, op1);
26888 if (!REG_P (op2))
26889 op2 = copy_to_mode_reg (SImode, op2);
26890 emit_insn (ix86_gen_monitor (op0, op1, op2));
26891 return 0;
26892
26893 case IX86_BUILTIN_MWAIT:
26894 arg0 = CALL_EXPR_ARG (exp, 0);
26895 arg1 = CALL_EXPR_ARG (exp, 1);
26896 op0 = expand_normal (arg0);
26897 op1 = expand_normal (arg1);
26898 if (!REG_P (op0))
26899 op0 = copy_to_mode_reg (SImode, op0);
26900 if (!REG_P (op1))
26901 op1 = copy_to_mode_reg (SImode, op1);
26902 emit_insn (gen_sse3_mwait (op0, op1));
26903 return 0;
26904
26905 case IX86_BUILTIN_VEC_INIT_V2SI:
26906 case IX86_BUILTIN_VEC_INIT_V4HI:
26907 case IX86_BUILTIN_VEC_INIT_V8QI:
26908 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
26909
26910 case IX86_BUILTIN_VEC_EXT_V2DF:
26911 case IX86_BUILTIN_VEC_EXT_V2DI:
26912 case IX86_BUILTIN_VEC_EXT_V4SF:
26913 case IX86_BUILTIN_VEC_EXT_V4SI:
26914 case IX86_BUILTIN_VEC_EXT_V8HI:
26915 case IX86_BUILTIN_VEC_EXT_V2SI:
26916 case IX86_BUILTIN_VEC_EXT_V4HI:
26917 case IX86_BUILTIN_VEC_EXT_V16QI:
26918 return ix86_expand_vec_ext_builtin (exp, target);
26919
26920 case IX86_BUILTIN_VEC_SET_V2DI:
26921 case IX86_BUILTIN_VEC_SET_V4SF:
26922 case IX86_BUILTIN_VEC_SET_V4SI:
26923 case IX86_BUILTIN_VEC_SET_V8HI:
26924 case IX86_BUILTIN_VEC_SET_V4HI:
26925 case IX86_BUILTIN_VEC_SET_V16QI:
26926 return ix86_expand_vec_set_builtin (exp);
26927
26928 case IX86_BUILTIN_VEC_PERM_V2DF:
26929 case IX86_BUILTIN_VEC_PERM_V4SF:
26930 case IX86_BUILTIN_VEC_PERM_V2DI:
26931 case IX86_BUILTIN_VEC_PERM_V4SI:
26932 case IX86_BUILTIN_VEC_PERM_V8HI:
26933 case IX86_BUILTIN_VEC_PERM_V16QI:
26934 case IX86_BUILTIN_VEC_PERM_V2DI_U:
26935 case IX86_BUILTIN_VEC_PERM_V4SI_U:
26936 case IX86_BUILTIN_VEC_PERM_V8HI_U:
26937 case IX86_BUILTIN_VEC_PERM_V16QI_U:
26938 case IX86_BUILTIN_VEC_PERM_V4DF:
26939 case IX86_BUILTIN_VEC_PERM_V8SF:
26940 return ix86_expand_vec_perm_builtin (exp);
26941
26942 case IX86_BUILTIN_INFQ:
26943 case IX86_BUILTIN_HUGE_VALQ:
26944 {
26945 REAL_VALUE_TYPE inf;
26946 rtx tmp;
26947
26948 real_inf (&inf);
26949 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
26950
26951 tmp = validize_mem (force_const_mem (mode, tmp));
26952
26953 if (target == 0)
26954 target = gen_reg_rtx (mode);
26955
26956 emit_move_insn (target, tmp);
26957 return target;
26958 }
26959
26960 case IX86_BUILTIN_LLWPCB:
26961 arg0 = CALL_EXPR_ARG (exp, 0);
26962 op0 = expand_normal (arg0);
26963 icode = CODE_FOR_lwp_llwpcb;
26964 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26965 op0 = copy_to_mode_reg (Pmode, op0);
26966 emit_insn (gen_lwp_llwpcb (op0));
26967 return 0;
26968
26969 case IX86_BUILTIN_SLWPCB:
26970 icode = CODE_FOR_lwp_slwpcb;
26971 if (!target
26972 || !insn_data[icode].operand[0].predicate (target, Pmode))
26973 target = gen_reg_rtx (Pmode);
26974 emit_insn (gen_lwp_slwpcb (target));
26975 return target;
26976
26977 case IX86_BUILTIN_BEXTRI32:
26978 case IX86_BUILTIN_BEXTRI64:
26979 arg0 = CALL_EXPR_ARG (exp, 0);
26980 arg1 = CALL_EXPR_ARG (exp, 1);
26981 op0 = expand_normal (arg0);
26982 op1 = expand_normal (arg1);
26983 icode = (fcode == IX86_BUILTIN_BEXTRI32
26984 ? CODE_FOR_tbm_bextri_si
26985 : CODE_FOR_tbm_bextri_di);
26986 if (!CONST_INT_P (op1))
26987 {
26988 error ("last argument must be an immediate");
26989 return const0_rtx;
26990 }
26991 else
26992 {
26993 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
26994 unsigned char lsb_index = INTVAL (op1) & 0xFF;
26995 op1 = GEN_INT (length);
26996 op2 = GEN_INT (lsb_index);
26997 pat = GEN_FCN (icode) (target, op0, op1, op2);
26998 if (pat)
26999 emit_insn (pat);
27000 return target;
27001 }
27002
27003 case IX86_BUILTIN_RDRAND16_STEP:
27004 icode = CODE_FOR_rdrandhi_1;
27005 mode0 = HImode;
27006 goto rdrand_step;
27007
27008 case IX86_BUILTIN_RDRAND32_STEP:
27009 icode = CODE_FOR_rdrandsi_1;
27010 mode0 = SImode;
27011 goto rdrand_step;
27012
27013 case IX86_BUILTIN_RDRAND64_STEP:
27014 icode = CODE_FOR_rdranddi_1;
27015 mode0 = DImode;
27016
27017 rdrand_step:
27018 op0 = gen_reg_rtx (mode0);
27019 emit_insn (GEN_FCN (icode) (op0));
27020
27021 arg0 = CALL_EXPR_ARG (exp, 0);
27022 op1 = expand_normal (arg0);
27023 if (!address_operand (op1, VOIDmode))
27024 op1 = copy_addr_to_reg (op1);
27025 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27026
27027 op1 = gen_reg_rtx (SImode);
27028 emit_move_insn (op1, CONST1_RTX (SImode));
27029
27030 /* Emit SImode conditional move. */
27031 if (mode0 == HImode)
27032 {
27033 op2 = gen_reg_rtx (SImode);
27034 emit_insn (gen_zero_extendhisi2 (op2, op0));
27035 }
27036 else if (mode0 == SImode)
27037 op2 = op0;
27038 else
27039 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27040
27041 if (target == 0)
27042 target = gen_reg_rtx (SImode);
27043
27044 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27045 const0_rtx);
27046 emit_insn (gen_rtx_SET (VOIDmode, target,
27047 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27048 return target;
27049
27050 default:
27051 break;
27052 }
27053
27054 for (i = 0, d = bdesc_special_args;
27055 i < ARRAY_SIZE (bdesc_special_args);
27056 i++, d++)
27057 if (d->code == fcode)
27058 return ix86_expand_special_args_builtin (d, exp, target);
27059
27060 for (i = 0, d = bdesc_args;
27061 i < ARRAY_SIZE (bdesc_args);
27062 i++, d++)
27063 if (d->code == fcode)
27064 switch (fcode)
27065 {
27066 case IX86_BUILTIN_FABSQ:
27067 case IX86_BUILTIN_COPYSIGNQ:
27068 if (!TARGET_SSE2)
27069 /* Emit a normal call if SSE2 isn't available. */
27070 return expand_call (exp, target, ignore);
27071 default:
27072 return ix86_expand_args_builtin (d, exp, target);
27073 }
27074
27075 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27076 if (d->code == fcode)
27077 return ix86_expand_sse_comi (d, exp, target);
27078
27079 for (i = 0, d = bdesc_pcmpestr;
27080 i < ARRAY_SIZE (bdesc_pcmpestr);
27081 i++, d++)
27082 if (d->code == fcode)
27083 return ix86_expand_sse_pcmpestr (d, exp, target);
27084
27085 for (i = 0, d = bdesc_pcmpistr;
27086 i < ARRAY_SIZE (bdesc_pcmpistr);
27087 i++, d++)
27088 if (d->code == fcode)
27089 return ix86_expand_sse_pcmpistr (d, exp, target);
27090
27091 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27092 if (d->code == fcode)
27093 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27094 (enum ix86_builtin_func_type)
27095 d->flag, d->comparison);
27096
27097 gcc_unreachable ();
27098 }
27099
27100 /* Returns a function decl for a vectorized version of the builtin function
27101 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27102 if it is not available. */
27103
27104 static tree
27105 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27106 tree type_in)
27107 {
27108 enum machine_mode in_mode, out_mode;
27109 int in_n, out_n;
27110 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27111
27112 if (TREE_CODE (type_out) != VECTOR_TYPE
27113 || TREE_CODE (type_in) != VECTOR_TYPE
27114 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27115 return NULL_TREE;
27116
27117 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27118 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27119 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27120 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27121
27122 switch (fn)
27123 {
27124 case BUILT_IN_SQRT:
27125 if (out_mode == DFmode && in_mode == DFmode)
27126 {
27127 if (out_n == 2 && in_n == 2)
27128 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27129 else if (out_n == 4 && in_n == 4)
27130 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27131 }
27132 break;
27133
27134 case BUILT_IN_SQRTF:
27135 if (out_mode == SFmode && in_mode == SFmode)
27136 {
27137 if (out_n == 4 && in_n == 4)
27138 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27139 else if (out_n == 8 && in_n == 8)
27140 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27141 }
27142 break;
27143
27144 case BUILT_IN_LRINT:
27145 if (out_mode == SImode && out_n == 4
27146 && in_mode == DFmode && in_n == 2)
27147 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27148 break;
27149
27150 case BUILT_IN_LRINTF:
27151 if (out_mode == SImode && in_mode == SFmode)
27152 {
27153 if (out_n == 4 && in_n == 4)
27154 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27155 else if (out_n == 8 && in_n == 8)
27156 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27157 }
27158 break;
27159
27160 case BUILT_IN_COPYSIGN:
27161 if (out_mode == DFmode && in_mode == DFmode)
27162 {
27163 if (out_n == 2 && in_n == 2)
27164 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27165 else if (out_n == 4 && in_n == 4)
27166 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27167 }
27168 break;
27169
27170 case BUILT_IN_COPYSIGNF:
27171 if (out_mode == SFmode && in_mode == SFmode)
27172 {
27173 if (out_n == 4 && in_n == 4)
27174 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27175 else if (out_n == 8 && in_n == 8)
27176 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27177 }
27178 break;
27179
27180 case BUILT_IN_FLOOR:
27181 /* The round insn does not trap on denormals. */
27182 if (flag_trapping_math || !TARGET_ROUND)
27183 break;
27184
27185 if (out_mode == DFmode && in_mode == DFmode)
27186 {
27187 if (out_n == 2 && in_n == 2)
27188 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27189 else if (out_n == 4 && in_n == 4)
27190 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27191 }
27192 break;
27193
27194 case BUILT_IN_FLOORF:
27195 /* The round insn does not trap on denormals. */
27196 if (flag_trapping_math || !TARGET_ROUND)
27197 break;
27198
27199 if (out_mode == SFmode && in_mode == SFmode)
27200 {
27201 if (out_n == 4 && in_n == 4)
27202 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27203 else if (out_n == 8 && in_n == 8)
27204 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27205 }
27206 break;
27207
27208 case BUILT_IN_CEIL:
27209 /* The round insn does not trap on denormals. */
27210 if (flag_trapping_math || !TARGET_ROUND)
27211 break;
27212
27213 if (out_mode == DFmode && in_mode == DFmode)
27214 {
27215 if (out_n == 2 && in_n == 2)
27216 return ix86_builtins[IX86_BUILTIN_CEILPD];
27217 else if (out_n == 4 && in_n == 4)
27218 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27219 }
27220 break;
27221
27222 case BUILT_IN_CEILF:
27223 /* The round insn does not trap on denormals. */
27224 if (flag_trapping_math || !TARGET_ROUND)
27225 break;
27226
27227 if (out_mode == SFmode && in_mode == SFmode)
27228 {
27229 if (out_n == 4 && in_n == 4)
27230 return ix86_builtins[IX86_BUILTIN_CEILPS];
27231 else if (out_n == 8 && in_n == 8)
27232 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27233 }
27234 break;
27235
27236 case BUILT_IN_TRUNC:
27237 /* The round insn does not trap on denormals. */
27238 if (flag_trapping_math || !TARGET_ROUND)
27239 break;
27240
27241 if (out_mode == DFmode && in_mode == DFmode)
27242 {
27243 if (out_n == 2 && in_n == 2)
27244 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27245 else if (out_n == 4 && in_n == 4)
27246 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27247 }
27248 break;
27249
27250 case BUILT_IN_TRUNCF:
27251 /* The round insn does not trap on denormals. */
27252 if (flag_trapping_math || !TARGET_ROUND)
27253 break;
27254
27255 if (out_mode == SFmode && in_mode == SFmode)
27256 {
27257 if (out_n == 4 && in_n == 4)
27258 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27259 else if (out_n == 8 && in_n == 8)
27260 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27261 }
27262 break;
27263
27264 case BUILT_IN_RINT:
27265 /* The round insn does not trap on denormals. */
27266 if (flag_trapping_math || !TARGET_ROUND)
27267 break;
27268
27269 if (out_mode == DFmode && in_mode == DFmode)
27270 {
27271 if (out_n == 2 && in_n == 2)
27272 return ix86_builtins[IX86_BUILTIN_RINTPD];
27273 else if (out_n == 4 && in_n == 4)
27274 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27275 }
27276 break;
27277
27278 case BUILT_IN_RINTF:
27279 /* The round insn does not trap on denormals. */
27280 if (flag_trapping_math || !TARGET_ROUND)
27281 break;
27282
27283 if (out_mode == SFmode && in_mode == SFmode)
27284 {
27285 if (out_n == 4 && in_n == 4)
27286 return ix86_builtins[IX86_BUILTIN_RINTPS];
27287 else if (out_n == 8 && in_n == 8)
27288 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27289 }
27290 break;
27291
27292 case BUILT_IN_FMA:
27293 if (out_mode == DFmode && in_mode == DFmode)
27294 {
27295 if (out_n == 2 && in_n == 2)
27296 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27297 if (out_n == 4 && in_n == 4)
27298 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27299 }
27300 break;
27301
27302 case BUILT_IN_FMAF:
27303 if (out_mode == SFmode && in_mode == SFmode)
27304 {
27305 if (out_n == 4 && in_n == 4)
27306 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27307 if (out_n == 8 && in_n == 8)
27308 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27309 }
27310 break;
27311
27312 default:
27313 break;
27314 }
27315
27316 /* Dispatch to a handler for a vectorization library. */
27317 if (ix86_veclib_handler)
27318 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27319 type_in);
27320
27321 return NULL_TREE;
27322 }
27323
27324 /* Handler for an SVML-style interface to
27325 a library with vectorized intrinsics. */
27326
27327 static tree
27328 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27329 {
27330 char name[20];
27331 tree fntype, new_fndecl, args;
27332 unsigned arity;
27333 const char *bname;
27334 enum machine_mode el_mode, in_mode;
27335 int n, in_n;
27336
27337 /* The SVML is suitable for unsafe math only. */
27338 if (!flag_unsafe_math_optimizations)
27339 return NULL_TREE;
27340
27341 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27342 n = TYPE_VECTOR_SUBPARTS (type_out);
27343 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27344 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27345 if (el_mode != in_mode
27346 || n != in_n)
27347 return NULL_TREE;
27348
27349 switch (fn)
27350 {
27351 case BUILT_IN_EXP:
27352 case BUILT_IN_LOG:
27353 case BUILT_IN_LOG10:
27354 case BUILT_IN_POW:
27355 case BUILT_IN_TANH:
27356 case BUILT_IN_TAN:
27357 case BUILT_IN_ATAN:
27358 case BUILT_IN_ATAN2:
27359 case BUILT_IN_ATANH:
27360 case BUILT_IN_CBRT:
27361 case BUILT_IN_SINH:
27362 case BUILT_IN_SIN:
27363 case BUILT_IN_ASINH:
27364 case BUILT_IN_ASIN:
27365 case BUILT_IN_COSH:
27366 case BUILT_IN_COS:
27367 case BUILT_IN_ACOSH:
27368 case BUILT_IN_ACOS:
27369 if (el_mode != DFmode || n != 2)
27370 return NULL_TREE;
27371 break;
27372
27373 case BUILT_IN_EXPF:
27374 case BUILT_IN_LOGF:
27375 case BUILT_IN_LOG10F:
27376 case BUILT_IN_POWF:
27377 case BUILT_IN_TANHF:
27378 case BUILT_IN_TANF:
27379 case BUILT_IN_ATANF:
27380 case BUILT_IN_ATAN2F:
27381 case BUILT_IN_ATANHF:
27382 case BUILT_IN_CBRTF:
27383 case BUILT_IN_SINHF:
27384 case BUILT_IN_SINF:
27385 case BUILT_IN_ASINHF:
27386 case BUILT_IN_ASINF:
27387 case BUILT_IN_COSHF:
27388 case BUILT_IN_COSF:
27389 case BUILT_IN_ACOSHF:
27390 case BUILT_IN_ACOSF:
27391 if (el_mode != SFmode || n != 4)
27392 return NULL_TREE;
27393 break;
27394
27395 default:
27396 return NULL_TREE;
27397 }
27398
27399 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27400
27401 if (fn == BUILT_IN_LOGF)
27402 strcpy (name, "vmlsLn4");
27403 else if (fn == BUILT_IN_LOG)
27404 strcpy (name, "vmldLn2");
27405 else if (n == 4)
27406 {
27407 sprintf (name, "vmls%s", bname+10);
27408 name[strlen (name)-1] = '4';
27409 }
27410 else
27411 sprintf (name, "vmld%s2", bname+10);
27412
27413 /* Convert to uppercase. */
27414 name[4] &= ~0x20;
27415
27416 arity = 0;
27417 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27418 args = TREE_CHAIN (args))
27419 arity++;
27420
27421 if (arity == 1)
27422 fntype = build_function_type_list (type_out, type_in, NULL);
27423 else
27424 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27425
27426 /* Build a function declaration for the vectorized function. */
27427 new_fndecl = build_decl (BUILTINS_LOCATION,
27428 FUNCTION_DECL, get_identifier (name), fntype);
27429 TREE_PUBLIC (new_fndecl) = 1;
27430 DECL_EXTERNAL (new_fndecl) = 1;
27431 DECL_IS_NOVOPS (new_fndecl) = 1;
27432 TREE_READONLY (new_fndecl) = 1;
27433
27434 return new_fndecl;
27435 }
27436
27437 /* Handler for an ACML-style interface to
27438 a library with vectorized intrinsics. */
27439
27440 static tree
27441 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27442 {
27443 char name[20] = "__vr.._";
27444 tree fntype, new_fndecl, args;
27445 unsigned arity;
27446 const char *bname;
27447 enum machine_mode el_mode, in_mode;
27448 int n, in_n;
27449
27450 /* The ACML is 64bits only and suitable for unsafe math only as
27451 it does not correctly support parts of IEEE with the required
27452 precision such as denormals. */
27453 if (!TARGET_64BIT
27454 || !flag_unsafe_math_optimizations)
27455 return NULL_TREE;
27456
27457 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27458 n = TYPE_VECTOR_SUBPARTS (type_out);
27459 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27460 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27461 if (el_mode != in_mode
27462 || n != in_n)
27463 return NULL_TREE;
27464
27465 switch (fn)
27466 {
27467 case BUILT_IN_SIN:
27468 case BUILT_IN_COS:
27469 case BUILT_IN_EXP:
27470 case BUILT_IN_LOG:
27471 case BUILT_IN_LOG2:
27472 case BUILT_IN_LOG10:
27473 name[4] = 'd';
27474 name[5] = '2';
27475 if (el_mode != DFmode
27476 || n != 2)
27477 return NULL_TREE;
27478 break;
27479
27480 case BUILT_IN_SINF:
27481 case BUILT_IN_COSF:
27482 case BUILT_IN_EXPF:
27483 case BUILT_IN_POWF:
27484 case BUILT_IN_LOGF:
27485 case BUILT_IN_LOG2F:
27486 case BUILT_IN_LOG10F:
27487 name[4] = 's';
27488 name[5] = '4';
27489 if (el_mode != SFmode
27490 || n != 4)
27491 return NULL_TREE;
27492 break;
27493
27494 default:
27495 return NULL_TREE;
27496 }
27497
27498 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27499 sprintf (name + 7, "%s", bname+10);
27500
27501 arity = 0;
27502 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27503 args = TREE_CHAIN (args))
27504 arity++;
27505
27506 if (arity == 1)
27507 fntype = build_function_type_list (type_out, type_in, NULL);
27508 else
27509 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27510
27511 /* Build a function declaration for the vectorized function. */
27512 new_fndecl = build_decl (BUILTINS_LOCATION,
27513 FUNCTION_DECL, get_identifier (name), fntype);
27514 TREE_PUBLIC (new_fndecl) = 1;
27515 DECL_EXTERNAL (new_fndecl) = 1;
27516 DECL_IS_NOVOPS (new_fndecl) = 1;
27517 TREE_READONLY (new_fndecl) = 1;
27518
27519 return new_fndecl;
27520 }
27521
27522
27523 /* Returns a decl of a function that implements conversion of an integer vector
27524 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27525 are the types involved when converting according to CODE.
27526 Return NULL_TREE if it is not available. */
27527
27528 static tree
27529 ix86_vectorize_builtin_conversion (unsigned int code,
27530 tree dest_type, tree src_type)
27531 {
27532 if (! TARGET_SSE2)
27533 return NULL_TREE;
27534
27535 switch (code)
27536 {
27537 case FLOAT_EXPR:
27538 switch (TYPE_MODE (src_type))
27539 {
27540 case V4SImode:
27541 switch (TYPE_MODE (dest_type))
27542 {
27543 case V4SFmode:
27544 return (TYPE_UNSIGNED (src_type)
27545 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27546 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27547 case V4DFmode:
27548 return (TYPE_UNSIGNED (src_type)
27549 ? NULL_TREE
27550 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27551 default:
27552 return NULL_TREE;
27553 }
27554 break;
27555 case V8SImode:
27556 switch (TYPE_MODE (dest_type))
27557 {
27558 case V8SFmode:
27559 return (TYPE_UNSIGNED (src_type)
27560 ? NULL_TREE
27561 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27562 default:
27563 return NULL_TREE;
27564 }
27565 break;
27566 default:
27567 return NULL_TREE;
27568 }
27569
27570 case FIX_TRUNC_EXPR:
27571 switch (TYPE_MODE (dest_type))
27572 {
27573 case V4SImode:
27574 switch (TYPE_MODE (src_type))
27575 {
27576 case V4SFmode:
27577 return (TYPE_UNSIGNED (dest_type)
27578 ? NULL_TREE
27579 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27580 case V4DFmode:
27581 return (TYPE_UNSIGNED (dest_type)
27582 ? NULL_TREE
27583 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27584 default:
27585 return NULL_TREE;
27586 }
27587 break;
27588
27589 case V8SImode:
27590 switch (TYPE_MODE (src_type))
27591 {
27592 case V8SFmode:
27593 return (TYPE_UNSIGNED (dest_type)
27594 ? NULL_TREE
27595 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27596 default:
27597 return NULL_TREE;
27598 }
27599 break;
27600
27601 default:
27602 return NULL_TREE;
27603 }
27604
27605 default:
27606 return NULL_TREE;
27607 }
27608
27609 return NULL_TREE;
27610 }
27611
27612 /* Returns a code for a target-specific builtin that implements
27613 reciprocal of the function, or NULL_TREE if not available. */
27614
27615 static tree
27616 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27617 bool sqrt ATTRIBUTE_UNUSED)
27618 {
27619 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27620 && flag_finite_math_only && !flag_trapping_math
27621 && flag_unsafe_math_optimizations))
27622 return NULL_TREE;
27623
27624 if (md_fn)
27625 /* Machine dependent builtins. */
27626 switch (fn)
27627 {
27628 /* Vectorized version of sqrt to rsqrt conversion. */
27629 case IX86_BUILTIN_SQRTPS_NR:
27630 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27631
27632 case IX86_BUILTIN_SQRTPS_NR256:
27633 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27634
27635 default:
27636 return NULL_TREE;
27637 }
27638 else
27639 /* Normal builtins. */
27640 switch (fn)
27641 {
27642 /* Sqrt to rsqrt conversion. */
27643 case BUILT_IN_SQRTF:
27644 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27645
27646 default:
27647 return NULL_TREE;
27648 }
27649 }
27650 \f
27651 /* Helper for avx_vpermilps256_operand et al. This is also used by
27652 the expansion functions to turn the parallel back into a mask.
27653 The return value is 0 for no match and the imm8+1 for a match. */
27654
27655 int
27656 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27657 {
27658 unsigned i, nelt = GET_MODE_NUNITS (mode);
27659 unsigned mask = 0;
27660 unsigned char ipar[8];
27661
27662 if (XVECLEN (par, 0) != (int) nelt)
27663 return 0;
27664
27665 /* Validate that all of the elements are constants, and not totally
27666 out of range. Copy the data into an integral array to make the
27667 subsequent checks easier. */
27668 for (i = 0; i < nelt; ++i)
27669 {
27670 rtx er = XVECEXP (par, 0, i);
27671 unsigned HOST_WIDE_INT ei;
27672
27673 if (!CONST_INT_P (er))
27674 return 0;
27675 ei = INTVAL (er);
27676 if (ei >= nelt)
27677 return 0;
27678 ipar[i] = ei;
27679 }
27680
27681 switch (mode)
27682 {
27683 case V4DFmode:
27684 /* In the 256-bit DFmode case, we can only move elements within
27685 a 128-bit lane. */
27686 for (i = 0; i < 2; ++i)
27687 {
27688 if (ipar[i] >= 2)
27689 return 0;
27690 mask |= ipar[i] << i;
27691 }
27692 for (i = 2; i < 4; ++i)
27693 {
27694 if (ipar[i] < 2)
27695 return 0;
27696 mask |= (ipar[i] - 2) << i;
27697 }
27698 break;
27699
27700 case V8SFmode:
27701 /* In the 256-bit SFmode case, we have full freedom of movement
27702 within the low 128-bit lane, but the high 128-bit lane must
27703 mirror the exact same pattern. */
27704 for (i = 0; i < 4; ++i)
27705 if (ipar[i] + 4 != ipar[i + 4])
27706 return 0;
27707 nelt = 4;
27708 /* FALLTHRU */
27709
27710 case V2DFmode:
27711 case V4SFmode:
27712 /* In the 128-bit case, we've full freedom in the placement of
27713 the elements from the source operand. */
27714 for (i = 0; i < nelt; ++i)
27715 mask |= ipar[i] << (i * (nelt / 2));
27716 break;
27717
27718 default:
27719 gcc_unreachable ();
27720 }
27721
27722 /* Make sure success has a non-zero value by adding one. */
27723 return mask + 1;
27724 }
27725
27726 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27727 the expansion functions to turn the parallel back into a mask.
27728 The return value is 0 for no match and the imm8+1 for a match. */
27729
27730 int
27731 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27732 {
27733 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27734 unsigned mask = 0;
27735 unsigned char ipar[8];
27736
27737 if (XVECLEN (par, 0) != (int) nelt)
27738 return 0;
27739
27740 /* Validate that all of the elements are constants, and not totally
27741 out of range. Copy the data into an integral array to make the
27742 subsequent checks easier. */
27743 for (i = 0; i < nelt; ++i)
27744 {
27745 rtx er = XVECEXP (par, 0, i);
27746 unsigned HOST_WIDE_INT ei;
27747
27748 if (!CONST_INT_P (er))
27749 return 0;
27750 ei = INTVAL (er);
27751 if (ei >= 2 * nelt)
27752 return 0;
27753 ipar[i] = ei;
27754 }
27755
27756 /* Validate that the halves of the permute are halves. */
27757 for (i = 0; i < nelt2 - 1; ++i)
27758 if (ipar[i] + 1 != ipar[i + 1])
27759 return 0;
27760 for (i = nelt2; i < nelt - 1; ++i)
27761 if (ipar[i] + 1 != ipar[i + 1])
27762 return 0;
27763
27764 /* Reconstruct the mask. */
27765 for (i = 0; i < 2; ++i)
27766 {
27767 unsigned e = ipar[i * nelt2];
27768 if (e % nelt2)
27769 return 0;
27770 e /= nelt2;
27771 mask |= e << (i * 4);
27772 }
27773
27774 /* Make sure success has a non-zero value by adding one. */
27775 return mask + 1;
27776 }
27777 \f
27778
27779 /* Store OPERAND to the memory after reload is completed. This means
27780 that we can't easily use assign_stack_local. */
27781 rtx
27782 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27783 {
27784 rtx result;
27785
27786 gcc_assert (reload_completed);
27787 if (ix86_using_red_zone ())
27788 {
27789 result = gen_rtx_MEM (mode,
27790 gen_rtx_PLUS (Pmode,
27791 stack_pointer_rtx,
27792 GEN_INT (-RED_ZONE_SIZE)));
27793 emit_move_insn (result, operand);
27794 }
27795 else if (TARGET_64BIT)
27796 {
27797 switch (mode)
27798 {
27799 case HImode:
27800 case SImode:
27801 operand = gen_lowpart (DImode, operand);
27802 /* FALLTHRU */
27803 case DImode:
27804 emit_insn (
27805 gen_rtx_SET (VOIDmode,
27806 gen_rtx_MEM (DImode,
27807 gen_rtx_PRE_DEC (DImode,
27808 stack_pointer_rtx)),
27809 operand));
27810 break;
27811 default:
27812 gcc_unreachable ();
27813 }
27814 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27815 }
27816 else
27817 {
27818 switch (mode)
27819 {
27820 case DImode:
27821 {
27822 rtx operands[2];
27823 split_double_mode (mode, &operand, 1, operands, operands + 1);
27824 emit_insn (
27825 gen_rtx_SET (VOIDmode,
27826 gen_rtx_MEM (SImode,
27827 gen_rtx_PRE_DEC (Pmode,
27828 stack_pointer_rtx)),
27829 operands[1]));
27830 emit_insn (
27831 gen_rtx_SET (VOIDmode,
27832 gen_rtx_MEM (SImode,
27833 gen_rtx_PRE_DEC (Pmode,
27834 stack_pointer_rtx)),
27835 operands[0]));
27836 }
27837 break;
27838 case HImode:
27839 /* Store HImodes as SImodes. */
27840 operand = gen_lowpart (SImode, operand);
27841 /* FALLTHRU */
27842 case SImode:
27843 emit_insn (
27844 gen_rtx_SET (VOIDmode,
27845 gen_rtx_MEM (GET_MODE (operand),
27846 gen_rtx_PRE_DEC (SImode,
27847 stack_pointer_rtx)),
27848 operand));
27849 break;
27850 default:
27851 gcc_unreachable ();
27852 }
27853 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27854 }
27855 return result;
27856 }
27857
27858 /* Free operand from the memory. */
27859 void
27860 ix86_free_from_memory (enum machine_mode mode)
27861 {
27862 if (!ix86_using_red_zone ())
27863 {
27864 int size;
27865
27866 if (mode == DImode || TARGET_64BIT)
27867 size = 8;
27868 else
27869 size = 4;
27870 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27871 to pop or add instruction if registers are available. */
27872 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27873 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27874 GEN_INT (size))));
27875 }
27876 }
27877
27878 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
27879
27880 Put float CONST_DOUBLE in the constant pool instead of fp regs.
27881 QImode must go into class Q_REGS.
27882 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
27883 movdf to do mem-to-mem moves through integer regs. */
27884
27885 static reg_class_t
27886 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
27887 {
27888 enum machine_mode mode = GET_MODE (x);
27889
27890 /* We're only allowed to return a subclass of CLASS. Many of the
27891 following checks fail for NO_REGS, so eliminate that early. */
27892 if (regclass == NO_REGS)
27893 return NO_REGS;
27894
27895 /* All classes can load zeros. */
27896 if (x == CONST0_RTX (mode))
27897 return regclass;
27898
27899 /* Force constants into memory if we are loading a (nonzero) constant into
27900 an MMX or SSE register. This is because there are no MMX/SSE instructions
27901 to load from a constant. */
27902 if (CONSTANT_P (x)
27903 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
27904 return NO_REGS;
27905
27906 /* Prefer SSE regs only, if we can use them for math. */
27907 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
27908 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
27909
27910 /* Floating-point constants need more complex checks. */
27911 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
27912 {
27913 /* General regs can load everything. */
27914 if (reg_class_subset_p (regclass, GENERAL_REGS))
27915 return regclass;
27916
27917 /* Floats can load 0 and 1 plus some others. Note that we eliminated
27918 zero above. We only want to wind up preferring 80387 registers if
27919 we plan on doing computation with them. */
27920 if (TARGET_80387
27921 && standard_80387_constant_p (x) > 0)
27922 {
27923 /* Limit class to non-sse. */
27924 if (regclass == FLOAT_SSE_REGS)
27925 return FLOAT_REGS;
27926 if (regclass == FP_TOP_SSE_REGS)
27927 return FP_TOP_REG;
27928 if (regclass == FP_SECOND_SSE_REGS)
27929 return FP_SECOND_REG;
27930 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
27931 return regclass;
27932 }
27933
27934 return NO_REGS;
27935 }
27936
27937 /* Generally when we see PLUS here, it's the function invariant
27938 (plus soft-fp const_int). Which can only be computed into general
27939 regs. */
27940 if (GET_CODE (x) == PLUS)
27941 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
27942
27943 /* QImode constants are easy to load, but non-constant QImode data
27944 must go into Q_REGS. */
27945 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
27946 {
27947 if (reg_class_subset_p (regclass, Q_REGS))
27948 return regclass;
27949 if (reg_class_subset_p (Q_REGS, regclass))
27950 return Q_REGS;
27951 return NO_REGS;
27952 }
27953
27954 return regclass;
27955 }
27956
27957 /* Discourage putting floating-point values in SSE registers unless
27958 SSE math is being used, and likewise for the 387 registers. */
27959 static reg_class_t
27960 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
27961 {
27962 enum machine_mode mode = GET_MODE (x);
27963
27964 /* Restrict the output reload class to the register bank that we are doing
27965 math on. If we would like not to return a subset of CLASS, reject this
27966 alternative: if reload cannot do this, it will still use its choice. */
27967 mode = GET_MODE (x);
27968 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
27969 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
27970
27971 if (X87_FLOAT_MODE_P (mode))
27972 {
27973 if (regclass == FP_TOP_SSE_REGS)
27974 return FP_TOP_REG;
27975 else if (regclass == FP_SECOND_SSE_REGS)
27976 return FP_SECOND_REG;
27977 else
27978 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
27979 }
27980
27981 return regclass;
27982 }
27983
27984 static reg_class_t
27985 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
27986 enum machine_mode mode,
27987 secondary_reload_info *sri ATTRIBUTE_UNUSED)
27988 {
27989 /* QImode spills from non-QI registers require
27990 intermediate register on 32bit targets. */
27991 if (!TARGET_64BIT
27992 && !in_p && mode == QImode
27993 && (rclass == GENERAL_REGS
27994 || rclass == LEGACY_REGS
27995 || rclass == INDEX_REGS))
27996 {
27997 int regno;
27998
27999 if (REG_P (x))
28000 regno = REGNO (x);
28001 else
28002 regno = -1;
28003
28004 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28005 regno = true_regnum (x);
28006
28007 /* Return Q_REGS if the operand is in memory. */
28008 if (regno == -1)
28009 return Q_REGS;
28010 }
28011
28012 /* This condition handles corner case where an expression involving
28013 pointers gets vectorized. We're trying to use the address of a
28014 stack slot as a vector initializer.
28015
28016 (set (reg:V2DI 74 [ vect_cst_.2 ])
28017 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28018
28019 Eventually frame gets turned into sp+offset like this:
28020
28021 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28022 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28023 (const_int 392 [0x188]))))
28024
28025 That later gets turned into:
28026
28027 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28028 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28029 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28030
28031 We'll have the following reload recorded:
28032
28033 Reload 0: reload_in (DI) =
28034 (plus:DI (reg/f:DI 7 sp)
28035 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28036 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28037 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28038 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28039 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28040 reload_reg_rtx: (reg:V2DI 22 xmm1)
28041
28042 Which isn't going to work since SSE instructions can't handle scalar
28043 additions. Returning GENERAL_REGS forces the addition into integer
28044 register and reload can handle subsequent reloads without problems. */
28045
28046 if (in_p && GET_CODE (x) == PLUS
28047 && SSE_CLASS_P (rclass)
28048 && SCALAR_INT_MODE_P (mode))
28049 return GENERAL_REGS;
28050
28051 return NO_REGS;
28052 }
28053
28054 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28055
28056 static bool
28057 ix86_class_likely_spilled_p (reg_class_t rclass)
28058 {
28059 switch (rclass)
28060 {
28061 case AREG:
28062 case DREG:
28063 case CREG:
28064 case BREG:
28065 case AD_REGS:
28066 case SIREG:
28067 case DIREG:
28068 case SSE_FIRST_REG:
28069 case FP_TOP_REG:
28070 case FP_SECOND_REG:
28071 return true;
28072
28073 default:
28074 break;
28075 }
28076
28077 return false;
28078 }
28079
28080 /* If we are copying between general and FP registers, we need a memory
28081 location. The same is true for SSE and MMX registers.
28082
28083 To optimize register_move_cost performance, allow inline variant.
28084
28085 The macro can't work reliably when one of the CLASSES is class containing
28086 registers from multiple units (SSE, MMX, integer). We avoid this by never
28087 combining those units in single alternative in the machine description.
28088 Ensure that this constraint holds to avoid unexpected surprises.
28089
28090 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28091 enforce these sanity checks. */
28092
28093 static inline bool
28094 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28095 enum machine_mode mode, int strict)
28096 {
28097 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28098 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28099 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28100 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28101 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28102 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28103 {
28104 gcc_assert (!strict);
28105 return true;
28106 }
28107
28108 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28109 return true;
28110
28111 /* ??? This is a lie. We do have moves between mmx/general, and for
28112 mmx/sse2. But by saying we need secondary memory we discourage the
28113 register allocator from using the mmx registers unless needed. */
28114 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28115 return true;
28116
28117 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28118 {
28119 /* SSE1 doesn't have any direct moves from other classes. */
28120 if (!TARGET_SSE2)
28121 return true;
28122
28123 /* If the target says that inter-unit moves are more expensive
28124 than moving through memory, then don't generate them. */
28125 if (!TARGET_INTER_UNIT_MOVES)
28126 return true;
28127
28128 /* Between SSE and general, we have moves no larger than word size. */
28129 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28130 return true;
28131 }
28132
28133 return false;
28134 }
28135
28136 bool
28137 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28138 enum machine_mode mode, int strict)
28139 {
28140 return inline_secondary_memory_needed (class1, class2, mode, strict);
28141 }
28142
28143 /* Return true if the registers in CLASS cannot represent the change from
28144 modes FROM to TO. */
28145
28146 bool
28147 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28148 enum reg_class regclass)
28149 {
28150 if (from == to)
28151 return false;
28152
28153 /* x87 registers can't do subreg at all, as all values are reformatted
28154 to extended precision. */
28155 if (MAYBE_FLOAT_CLASS_P (regclass))
28156 return true;
28157
28158 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28159 {
28160 /* Vector registers do not support QI or HImode loads. If we don't
28161 disallow a change to these modes, reload will assume it's ok to
28162 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28163 the vec_dupv4hi pattern. */
28164 if (GET_MODE_SIZE (from) < 4)
28165 return true;
28166
28167 /* Vector registers do not support subreg with nonzero offsets, which
28168 are otherwise valid for integer registers. Since we can't see
28169 whether we have a nonzero offset from here, prohibit all
28170 nonparadoxical subregs changing size. */
28171 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28172 return true;
28173 }
28174
28175 return false;
28176 }
28177
28178 /* Return the cost of moving data of mode M between a
28179 register and memory. A value of 2 is the default; this cost is
28180 relative to those in `REGISTER_MOVE_COST'.
28181
28182 This function is used extensively by register_move_cost that is used to
28183 build tables at startup. Make it inline in this case.
28184 When IN is 2, return maximum of in and out move cost.
28185
28186 If moving between registers and memory is more expensive than
28187 between two registers, you should define this macro to express the
28188 relative cost.
28189
28190 Model also increased moving costs of QImode registers in non
28191 Q_REGS classes.
28192 */
28193 static inline int
28194 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28195 int in)
28196 {
28197 int cost;
28198 if (FLOAT_CLASS_P (regclass))
28199 {
28200 int index;
28201 switch (mode)
28202 {
28203 case SFmode:
28204 index = 0;
28205 break;
28206 case DFmode:
28207 index = 1;
28208 break;
28209 case XFmode:
28210 index = 2;
28211 break;
28212 default:
28213 return 100;
28214 }
28215 if (in == 2)
28216 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28217 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28218 }
28219 if (SSE_CLASS_P (regclass))
28220 {
28221 int index;
28222 switch (GET_MODE_SIZE (mode))
28223 {
28224 case 4:
28225 index = 0;
28226 break;
28227 case 8:
28228 index = 1;
28229 break;
28230 case 16:
28231 index = 2;
28232 break;
28233 default:
28234 return 100;
28235 }
28236 if (in == 2)
28237 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28238 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28239 }
28240 if (MMX_CLASS_P (regclass))
28241 {
28242 int index;
28243 switch (GET_MODE_SIZE (mode))
28244 {
28245 case 4:
28246 index = 0;
28247 break;
28248 case 8:
28249 index = 1;
28250 break;
28251 default:
28252 return 100;
28253 }
28254 if (in)
28255 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28256 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28257 }
28258 switch (GET_MODE_SIZE (mode))
28259 {
28260 case 1:
28261 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28262 {
28263 if (!in)
28264 return ix86_cost->int_store[0];
28265 if (TARGET_PARTIAL_REG_DEPENDENCY
28266 && optimize_function_for_speed_p (cfun))
28267 cost = ix86_cost->movzbl_load;
28268 else
28269 cost = ix86_cost->int_load[0];
28270 if (in == 2)
28271 return MAX (cost, ix86_cost->int_store[0]);
28272 return cost;
28273 }
28274 else
28275 {
28276 if (in == 2)
28277 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28278 if (in)
28279 return ix86_cost->movzbl_load;
28280 else
28281 return ix86_cost->int_store[0] + 4;
28282 }
28283 break;
28284 case 2:
28285 if (in == 2)
28286 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28287 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28288 default:
28289 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28290 if (mode == TFmode)
28291 mode = XFmode;
28292 if (in == 2)
28293 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28294 else if (in)
28295 cost = ix86_cost->int_load[2];
28296 else
28297 cost = ix86_cost->int_store[2];
28298 return (cost * (((int) GET_MODE_SIZE (mode)
28299 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28300 }
28301 }
28302
28303 static int
28304 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28305 bool in)
28306 {
28307 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28308 }
28309
28310
28311 /* Return the cost of moving data from a register in class CLASS1 to
28312 one in class CLASS2.
28313
28314 It is not required that the cost always equal 2 when FROM is the same as TO;
28315 on some machines it is expensive to move between registers if they are not
28316 general registers. */
28317
28318 static int
28319 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28320 reg_class_t class2_i)
28321 {
28322 enum reg_class class1 = (enum reg_class) class1_i;
28323 enum reg_class class2 = (enum reg_class) class2_i;
28324
28325 /* In case we require secondary memory, compute cost of the store followed
28326 by load. In order to avoid bad register allocation choices, we need
28327 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28328
28329 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28330 {
28331 int cost = 1;
28332
28333 cost += inline_memory_move_cost (mode, class1, 2);
28334 cost += inline_memory_move_cost (mode, class2, 2);
28335
28336 /* In case of copying from general_purpose_register we may emit multiple
28337 stores followed by single load causing memory size mismatch stall.
28338 Count this as arbitrarily high cost of 20. */
28339 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28340 cost += 20;
28341
28342 /* In the case of FP/MMX moves, the registers actually overlap, and we
28343 have to switch modes in order to treat them differently. */
28344 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28345 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28346 cost += 20;
28347
28348 return cost;
28349 }
28350
28351 /* Moves between SSE/MMX and integer unit are expensive. */
28352 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28353 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28354
28355 /* ??? By keeping returned value relatively high, we limit the number
28356 of moves between integer and MMX/SSE registers for all targets.
28357 Additionally, high value prevents problem with x86_modes_tieable_p(),
28358 where integer modes in MMX/SSE registers are not tieable
28359 because of missing QImode and HImode moves to, from or between
28360 MMX/SSE registers. */
28361 return MAX (8, ix86_cost->mmxsse_to_integer);
28362
28363 if (MAYBE_FLOAT_CLASS_P (class1))
28364 return ix86_cost->fp_move;
28365 if (MAYBE_SSE_CLASS_P (class1))
28366 return ix86_cost->sse_move;
28367 if (MAYBE_MMX_CLASS_P (class1))
28368 return ix86_cost->mmx_move;
28369 return 2;
28370 }
28371
28372 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28373 MODE. */
28374
28375 bool
28376 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28377 {
28378 /* Flags and only flags can only hold CCmode values. */
28379 if (CC_REGNO_P (regno))
28380 return GET_MODE_CLASS (mode) == MODE_CC;
28381 if (GET_MODE_CLASS (mode) == MODE_CC
28382 || GET_MODE_CLASS (mode) == MODE_RANDOM
28383 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28384 return false;
28385 if (FP_REGNO_P (regno))
28386 return VALID_FP_MODE_P (mode);
28387 if (SSE_REGNO_P (regno))
28388 {
28389 /* We implement the move patterns for all vector modes into and
28390 out of SSE registers, even when no operation instructions
28391 are available. OImode move is available only when AVX is
28392 enabled. */
28393 return ((TARGET_AVX && mode == OImode)
28394 || VALID_AVX256_REG_MODE (mode)
28395 || VALID_SSE_REG_MODE (mode)
28396 || VALID_SSE2_REG_MODE (mode)
28397 || VALID_MMX_REG_MODE (mode)
28398 || VALID_MMX_REG_MODE_3DNOW (mode));
28399 }
28400 if (MMX_REGNO_P (regno))
28401 {
28402 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28403 so if the register is available at all, then we can move data of
28404 the given mode into or out of it. */
28405 return (VALID_MMX_REG_MODE (mode)
28406 || VALID_MMX_REG_MODE_3DNOW (mode));
28407 }
28408
28409 if (mode == QImode)
28410 {
28411 /* Take care for QImode values - they can be in non-QI regs,
28412 but then they do cause partial register stalls. */
28413 if (regno <= BX_REG || TARGET_64BIT)
28414 return true;
28415 if (!TARGET_PARTIAL_REG_STALL)
28416 return true;
28417 return !can_create_pseudo_p ();
28418 }
28419 /* We handle both integer and floats in the general purpose registers. */
28420 else if (VALID_INT_MODE_P (mode))
28421 return true;
28422 else if (VALID_FP_MODE_P (mode))
28423 return true;
28424 else if (VALID_DFP_MODE_P (mode))
28425 return true;
28426 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28427 on to use that value in smaller contexts, this can easily force a
28428 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28429 supporting DImode, allow it. */
28430 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28431 return true;
28432
28433 return false;
28434 }
28435
28436 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28437 tieable integer mode. */
28438
28439 static bool
28440 ix86_tieable_integer_mode_p (enum machine_mode mode)
28441 {
28442 switch (mode)
28443 {
28444 case HImode:
28445 case SImode:
28446 return true;
28447
28448 case QImode:
28449 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28450
28451 case DImode:
28452 return TARGET_64BIT;
28453
28454 default:
28455 return false;
28456 }
28457 }
28458
28459 /* Return true if MODE1 is accessible in a register that can hold MODE2
28460 without copying. That is, all register classes that can hold MODE2
28461 can also hold MODE1. */
28462
28463 bool
28464 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28465 {
28466 if (mode1 == mode2)
28467 return true;
28468
28469 if (ix86_tieable_integer_mode_p (mode1)
28470 && ix86_tieable_integer_mode_p (mode2))
28471 return true;
28472
28473 /* MODE2 being XFmode implies fp stack or general regs, which means we
28474 can tie any smaller floating point modes to it. Note that we do not
28475 tie this with TFmode. */
28476 if (mode2 == XFmode)
28477 return mode1 == SFmode || mode1 == DFmode;
28478
28479 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28480 that we can tie it with SFmode. */
28481 if (mode2 == DFmode)
28482 return mode1 == SFmode;
28483
28484 /* If MODE2 is only appropriate for an SSE register, then tie with
28485 any other mode acceptable to SSE registers. */
28486 if (GET_MODE_SIZE (mode2) == 16
28487 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28488 return (GET_MODE_SIZE (mode1) == 16
28489 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28490
28491 /* If MODE2 is appropriate for an MMX register, then tie
28492 with any other mode acceptable to MMX registers. */
28493 if (GET_MODE_SIZE (mode2) == 8
28494 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28495 return (GET_MODE_SIZE (mode1) == 8
28496 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28497
28498 return false;
28499 }
28500
28501 /* Compute a (partial) cost for rtx X. Return true if the complete
28502 cost has been computed, and false if subexpressions should be
28503 scanned. In either case, *TOTAL contains the cost result. */
28504
28505 static bool
28506 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28507 {
28508 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28509 enum machine_mode mode = GET_MODE (x);
28510 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28511
28512 switch (code)
28513 {
28514 case CONST_INT:
28515 case CONST:
28516 case LABEL_REF:
28517 case SYMBOL_REF:
28518 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28519 *total = 3;
28520 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28521 *total = 2;
28522 else if (flag_pic && SYMBOLIC_CONST (x)
28523 && (!TARGET_64BIT
28524 || (!GET_CODE (x) != LABEL_REF
28525 && (GET_CODE (x) != SYMBOL_REF
28526 || !SYMBOL_REF_LOCAL_P (x)))))
28527 *total = 1;
28528 else
28529 *total = 0;
28530 return true;
28531
28532 case CONST_DOUBLE:
28533 if (mode == VOIDmode)
28534 *total = 0;
28535 else
28536 switch (standard_80387_constant_p (x))
28537 {
28538 case 1: /* 0.0 */
28539 *total = 1;
28540 break;
28541 default: /* Other constants */
28542 *total = 2;
28543 break;
28544 case 0:
28545 case -1:
28546 /* Start with (MEM (SYMBOL_REF)), since that's where
28547 it'll probably end up. Add a penalty for size. */
28548 *total = (COSTS_N_INSNS (1)
28549 + (flag_pic != 0 && !TARGET_64BIT)
28550 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28551 break;
28552 }
28553 return true;
28554
28555 case ZERO_EXTEND:
28556 /* The zero extensions is often completely free on x86_64, so make
28557 it as cheap as possible. */
28558 if (TARGET_64BIT && mode == DImode
28559 && GET_MODE (XEXP (x, 0)) == SImode)
28560 *total = 1;
28561 else if (TARGET_ZERO_EXTEND_WITH_AND)
28562 *total = cost->add;
28563 else
28564 *total = cost->movzx;
28565 return false;
28566
28567 case SIGN_EXTEND:
28568 *total = cost->movsx;
28569 return false;
28570
28571 case ASHIFT:
28572 if (CONST_INT_P (XEXP (x, 1))
28573 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28574 {
28575 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28576 if (value == 1)
28577 {
28578 *total = cost->add;
28579 return false;
28580 }
28581 if ((value == 2 || value == 3)
28582 && cost->lea <= cost->shift_const)
28583 {
28584 *total = cost->lea;
28585 return false;
28586 }
28587 }
28588 /* FALLTHRU */
28589
28590 case ROTATE:
28591 case ASHIFTRT:
28592 case LSHIFTRT:
28593 case ROTATERT:
28594 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28595 {
28596 if (CONST_INT_P (XEXP (x, 1)))
28597 {
28598 if (INTVAL (XEXP (x, 1)) > 32)
28599 *total = cost->shift_const + COSTS_N_INSNS (2);
28600 else
28601 *total = cost->shift_const * 2;
28602 }
28603 else
28604 {
28605 if (GET_CODE (XEXP (x, 1)) == AND)
28606 *total = cost->shift_var * 2;
28607 else
28608 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28609 }
28610 }
28611 else
28612 {
28613 if (CONST_INT_P (XEXP (x, 1)))
28614 *total = cost->shift_const;
28615 else
28616 *total = cost->shift_var;
28617 }
28618 return false;
28619
28620 case FMA:
28621 {
28622 rtx sub;
28623
28624 gcc_assert (FLOAT_MODE_P (mode));
28625 gcc_assert (TARGET_FMA || TARGET_FMA4);
28626
28627 /* ??? SSE scalar/vector cost should be used here. */
28628 /* ??? Bald assumption that fma has the same cost as fmul. */
28629 *total = cost->fmul;
28630 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28631
28632 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28633 sub = XEXP (x, 0);
28634 if (GET_CODE (sub) == NEG)
28635 sub = XEXP (sub, 0);
28636 *total += rtx_cost (sub, FMA, speed);
28637
28638 sub = XEXP (x, 2);
28639 if (GET_CODE (sub) == NEG)
28640 sub = XEXP (sub, 0);
28641 *total += rtx_cost (sub, FMA, speed);
28642 return true;
28643 }
28644
28645 case MULT:
28646 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28647 {
28648 /* ??? SSE scalar cost should be used here. */
28649 *total = cost->fmul;
28650 return false;
28651 }
28652 else if (X87_FLOAT_MODE_P (mode))
28653 {
28654 *total = cost->fmul;
28655 return false;
28656 }
28657 else if (FLOAT_MODE_P (mode))
28658 {
28659 /* ??? SSE vector cost should be used here. */
28660 *total = cost->fmul;
28661 return false;
28662 }
28663 else
28664 {
28665 rtx op0 = XEXP (x, 0);
28666 rtx op1 = XEXP (x, 1);
28667 int nbits;
28668 if (CONST_INT_P (XEXP (x, 1)))
28669 {
28670 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28671 for (nbits = 0; value != 0; value &= value - 1)
28672 nbits++;
28673 }
28674 else
28675 /* This is arbitrary. */
28676 nbits = 7;
28677
28678 /* Compute costs correctly for widening multiplication. */
28679 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28680 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28681 == GET_MODE_SIZE (mode))
28682 {
28683 int is_mulwiden = 0;
28684 enum machine_mode inner_mode = GET_MODE (op0);
28685
28686 if (GET_CODE (op0) == GET_CODE (op1))
28687 is_mulwiden = 1, op1 = XEXP (op1, 0);
28688 else if (CONST_INT_P (op1))
28689 {
28690 if (GET_CODE (op0) == SIGN_EXTEND)
28691 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28692 == INTVAL (op1);
28693 else
28694 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28695 }
28696
28697 if (is_mulwiden)
28698 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28699 }
28700
28701 *total = (cost->mult_init[MODE_INDEX (mode)]
28702 + nbits * cost->mult_bit
28703 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28704
28705 return true;
28706 }
28707
28708 case DIV:
28709 case UDIV:
28710 case MOD:
28711 case UMOD:
28712 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28713 /* ??? SSE cost should be used here. */
28714 *total = cost->fdiv;
28715 else if (X87_FLOAT_MODE_P (mode))
28716 *total = cost->fdiv;
28717 else if (FLOAT_MODE_P (mode))
28718 /* ??? SSE vector cost should be used here. */
28719 *total = cost->fdiv;
28720 else
28721 *total = cost->divide[MODE_INDEX (mode)];
28722 return false;
28723
28724 case PLUS:
28725 if (GET_MODE_CLASS (mode) == MODE_INT
28726 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28727 {
28728 if (GET_CODE (XEXP (x, 0)) == PLUS
28729 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28730 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28731 && CONSTANT_P (XEXP (x, 1)))
28732 {
28733 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28734 if (val == 2 || val == 4 || val == 8)
28735 {
28736 *total = cost->lea;
28737 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28738 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28739 outer_code, speed);
28740 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28741 return true;
28742 }
28743 }
28744 else if (GET_CODE (XEXP (x, 0)) == MULT
28745 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28746 {
28747 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28748 if (val == 2 || val == 4 || val == 8)
28749 {
28750 *total = cost->lea;
28751 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28752 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28753 return true;
28754 }
28755 }
28756 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28757 {
28758 *total = cost->lea;
28759 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28760 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28761 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28762 return true;
28763 }
28764 }
28765 /* FALLTHRU */
28766
28767 case MINUS:
28768 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28769 {
28770 /* ??? SSE cost should be used here. */
28771 *total = cost->fadd;
28772 return false;
28773 }
28774 else if (X87_FLOAT_MODE_P (mode))
28775 {
28776 *total = cost->fadd;
28777 return false;
28778 }
28779 else if (FLOAT_MODE_P (mode))
28780 {
28781 /* ??? SSE vector cost should be used here. */
28782 *total = cost->fadd;
28783 return false;
28784 }
28785 /* FALLTHRU */
28786
28787 case AND:
28788 case IOR:
28789 case XOR:
28790 if (!TARGET_64BIT && mode == DImode)
28791 {
28792 *total = (cost->add * 2
28793 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28794 << (GET_MODE (XEXP (x, 0)) != DImode))
28795 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28796 << (GET_MODE (XEXP (x, 1)) != DImode)));
28797 return true;
28798 }
28799 /* FALLTHRU */
28800
28801 case NEG:
28802 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28803 {
28804 /* ??? SSE cost should be used here. */
28805 *total = cost->fchs;
28806 return false;
28807 }
28808 else if (X87_FLOAT_MODE_P (mode))
28809 {
28810 *total = cost->fchs;
28811 return false;
28812 }
28813 else if (FLOAT_MODE_P (mode))
28814 {
28815 /* ??? SSE vector cost should be used here. */
28816 *total = cost->fchs;
28817 return false;
28818 }
28819 /* FALLTHRU */
28820
28821 case NOT:
28822 if (!TARGET_64BIT && mode == DImode)
28823 *total = cost->add * 2;
28824 else
28825 *total = cost->add;
28826 return false;
28827
28828 case COMPARE:
28829 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28830 && XEXP (XEXP (x, 0), 1) == const1_rtx
28831 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28832 && XEXP (x, 1) == const0_rtx)
28833 {
28834 /* This kind of construct is implemented using test[bwl].
28835 Treat it as if we had an AND. */
28836 *total = (cost->add
28837 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28838 + rtx_cost (const1_rtx, outer_code, speed));
28839 return true;
28840 }
28841 return false;
28842
28843 case FLOAT_EXTEND:
28844 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28845 *total = 0;
28846 return false;
28847
28848 case ABS:
28849 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28850 /* ??? SSE cost should be used here. */
28851 *total = cost->fabs;
28852 else if (X87_FLOAT_MODE_P (mode))
28853 *total = cost->fabs;
28854 else if (FLOAT_MODE_P (mode))
28855 /* ??? SSE vector cost should be used here. */
28856 *total = cost->fabs;
28857 return false;
28858
28859 case SQRT:
28860 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28861 /* ??? SSE cost should be used here. */
28862 *total = cost->fsqrt;
28863 else if (X87_FLOAT_MODE_P (mode))
28864 *total = cost->fsqrt;
28865 else if (FLOAT_MODE_P (mode))
28866 /* ??? SSE vector cost should be used here. */
28867 *total = cost->fsqrt;
28868 return false;
28869
28870 case UNSPEC:
28871 if (XINT (x, 1) == UNSPEC_TP)
28872 *total = 0;
28873 return false;
28874
28875 case VEC_SELECT:
28876 case VEC_CONCAT:
28877 case VEC_MERGE:
28878 case VEC_DUPLICATE:
28879 /* ??? Assume all of these vector manipulation patterns are
28880 recognizable. In which case they all pretty much have the
28881 same cost. */
28882 *total = COSTS_N_INSNS (1);
28883 return true;
28884
28885 default:
28886 return false;
28887 }
28888 }
28889
28890 #if TARGET_MACHO
28891
28892 static int current_machopic_label_num;
28893
28894 /* Given a symbol name and its associated stub, write out the
28895 definition of the stub. */
28896
28897 void
28898 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28899 {
28900 unsigned int length;
28901 char *binder_name, *symbol_name, lazy_ptr_name[32];
28902 int label = ++current_machopic_label_num;
28903
28904 /* For 64-bit we shouldn't get here. */
28905 gcc_assert (!TARGET_64BIT);
28906
28907 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28908 symb = targetm.strip_name_encoding (symb);
28909
28910 length = strlen (stub);
28911 binder_name = XALLOCAVEC (char, length + 32);
28912 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
28913
28914 length = strlen (symb);
28915 symbol_name = XALLOCAVEC (char, length + 32);
28916 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28917
28918 sprintf (lazy_ptr_name, "L%d$lz", label);
28919
28920 if (MACHOPIC_ATT_STUB)
28921 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
28922 else if (MACHOPIC_PURE)
28923 {
28924 if (TARGET_DEEP_BRANCH_PREDICTION)
28925 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
28926 else
28927 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
28928 }
28929 else
28930 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
28931
28932 fprintf (file, "%s:\n", stub);
28933 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28934
28935 if (MACHOPIC_ATT_STUB)
28936 {
28937 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
28938 }
28939 else if (MACHOPIC_PURE)
28940 {
28941 /* PIC stub. */
28942 if (TARGET_DEEP_BRANCH_PREDICTION)
28943 {
28944 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28945 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
28946 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
28947 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
28948 }
28949 else
28950 {
28951 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
28952 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
28953 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
28954 }
28955 fprintf (file, "\tjmp\t*%%ecx\n");
28956 }
28957 else
28958 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
28959
28960 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
28961 it needs no stub-binding-helper. */
28962 if (MACHOPIC_ATT_STUB)
28963 return;
28964
28965 fprintf (file, "%s:\n", binder_name);
28966
28967 if (MACHOPIC_PURE)
28968 {
28969 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
28970 fprintf (file, "\tpushl\t%%ecx\n");
28971 }
28972 else
28973 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
28974
28975 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
28976
28977 /* N.B. Keep the correspondence of these
28978 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
28979 old-pic/new-pic/non-pic stubs; altering this will break
28980 compatibility with existing dylibs. */
28981 if (MACHOPIC_PURE)
28982 {
28983 /* PIC stubs. */
28984 if (TARGET_DEEP_BRANCH_PREDICTION)
28985 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28986 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
28987 else
28988 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
28989 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
28990 }
28991 else
28992 /* 16-byte -mdynamic-no-pic stub. */
28993 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
28994
28995 fprintf (file, "%s:\n", lazy_ptr_name);
28996 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28997 fprintf (file, ASM_LONG "%s\n", binder_name);
28998 }
28999 #endif /* TARGET_MACHO */
29000
29001 /* Order the registers for register allocator. */
29002
29003 void
29004 x86_order_regs_for_local_alloc (void)
29005 {
29006 int pos = 0;
29007 int i;
29008
29009 /* First allocate the local general purpose registers. */
29010 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29011 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29012 reg_alloc_order [pos++] = i;
29013
29014 /* Global general purpose registers. */
29015 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29016 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29017 reg_alloc_order [pos++] = i;
29018
29019 /* x87 registers come first in case we are doing FP math
29020 using them. */
29021 if (!TARGET_SSE_MATH)
29022 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29023 reg_alloc_order [pos++] = i;
29024
29025 /* SSE registers. */
29026 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29027 reg_alloc_order [pos++] = i;
29028 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29029 reg_alloc_order [pos++] = i;
29030
29031 /* x87 registers. */
29032 if (TARGET_SSE_MATH)
29033 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29034 reg_alloc_order [pos++] = i;
29035
29036 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29037 reg_alloc_order [pos++] = i;
29038
29039 /* Initialize the rest of array as we do not allocate some registers
29040 at all. */
29041 while (pos < FIRST_PSEUDO_REGISTER)
29042 reg_alloc_order [pos++] = 0;
29043 }
29044
29045 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29046 in struct attribute_spec handler. */
29047 static tree
29048 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29049 tree args,
29050 int flags ATTRIBUTE_UNUSED,
29051 bool *no_add_attrs)
29052 {
29053 if (TREE_CODE (*node) != FUNCTION_TYPE
29054 && TREE_CODE (*node) != METHOD_TYPE
29055 && TREE_CODE (*node) != FIELD_DECL
29056 && TREE_CODE (*node) != TYPE_DECL)
29057 {
29058 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29059 name);
29060 *no_add_attrs = true;
29061 return NULL_TREE;
29062 }
29063 if (TARGET_64BIT)
29064 {
29065 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29066 name);
29067 *no_add_attrs = true;
29068 return NULL_TREE;
29069 }
29070 if (is_attribute_p ("callee_pop_aggregate_return", name))
29071 {
29072 tree cst;
29073
29074 cst = TREE_VALUE (args);
29075 if (TREE_CODE (cst) != INTEGER_CST)
29076 {
29077 warning (OPT_Wattributes,
29078 "%qE attribute requires an integer constant argument",
29079 name);
29080 *no_add_attrs = true;
29081 }
29082 else if (compare_tree_int (cst, 0) != 0
29083 && compare_tree_int (cst, 1) != 0)
29084 {
29085 warning (OPT_Wattributes,
29086 "argument to %qE attribute is neither zero, nor one",
29087 name);
29088 *no_add_attrs = true;
29089 }
29090
29091 return NULL_TREE;
29092 }
29093
29094 return NULL_TREE;
29095 }
29096
29097 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29098 struct attribute_spec.handler. */
29099 static tree
29100 ix86_handle_abi_attribute (tree *node, tree name,
29101 tree args ATTRIBUTE_UNUSED,
29102 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29103 {
29104 if (TREE_CODE (*node) != FUNCTION_TYPE
29105 && TREE_CODE (*node) != METHOD_TYPE
29106 && TREE_CODE (*node) != FIELD_DECL
29107 && TREE_CODE (*node) != TYPE_DECL)
29108 {
29109 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29110 name);
29111 *no_add_attrs = true;
29112 return NULL_TREE;
29113 }
29114 if (!TARGET_64BIT)
29115 {
29116 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29117 name);
29118 *no_add_attrs = true;
29119 return NULL_TREE;
29120 }
29121
29122 /* Can combine regparm with all attributes but fastcall. */
29123 if (is_attribute_p ("ms_abi", name))
29124 {
29125 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29126 {
29127 error ("ms_abi and sysv_abi attributes are not compatible");
29128 }
29129
29130 return NULL_TREE;
29131 }
29132 else if (is_attribute_p ("sysv_abi", name))
29133 {
29134 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29135 {
29136 error ("ms_abi and sysv_abi attributes are not compatible");
29137 }
29138
29139 return NULL_TREE;
29140 }
29141
29142 return NULL_TREE;
29143 }
29144
29145 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29146 struct attribute_spec.handler. */
29147 static tree
29148 ix86_handle_struct_attribute (tree *node, tree name,
29149 tree args ATTRIBUTE_UNUSED,
29150 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29151 {
29152 tree *type = NULL;
29153 if (DECL_P (*node))
29154 {
29155 if (TREE_CODE (*node) == TYPE_DECL)
29156 type = &TREE_TYPE (*node);
29157 }
29158 else
29159 type = node;
29160
29161 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29162 || TREE_CODE (*type) == UNION_TYPE)))
29163 {
29164 warning (OPT_Wattributes, "%qE attribute ignored",
29165 name);
29166 *no_add_attrs = true;
29167 }
29168
29169 else if ((is_attribute_p ("ms_struct", name)
29170 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29171 || ((is_attribute_p ("gcc_struct", name)
29172 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29173 {
29174 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29175 name);
29176 *no_add_attrs = true;
29177 }
29178
29179 return NULL_TREE;
29180 }
29181
29182 static tree
29183 ix86_handle_fndecl_attribute (tree *node, tree name,
29184 tree args ATTRIBUTE_UNUSED,
29185 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29186 {
29187 if (TREE_CODE (*node) != FUNCTION_DECL)
29188 {
29189 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29190 name);
29191 *no_add_attrs = true;
29192 }
29193 return NULL_TREE;
29194 }
29195
29196 static bool
29197 ix86_ms_bitfield_layout_p (const_tree record_type)
29198 {
29199 return ((TARGET_MS_BITFIELD_LAYOUT
29200 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29201 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29202 }
29203
29204 /* Returns an expression indicating where the this parameter is
29205 located on entry to the FUNCTION. */
29206
29207 static rtx
29208 x86_this_parameter (tree function)
29209 {
29210 tree type = TREE_TYPE (function);
29211 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29212 int nregs;
29213
29214 if (TARGET_64BIT)
29215 {
29216 const int *parm_regs;
29217
29218 if (ix86_function_type_abi (type) == MS_ABI)
29219 parm_regs = x86_64_ms_abi_int_parameter_registers;
29220 else
29221 parm_regs = x86_64_int_parameter_registers;
29222 return gen_rtx_REG (DImode, parm_regs[aggr]);
29223 }
29224
29225 nregs = ix86_function_regparm (type, function);
29226
29227 if (nregs > 0 && !stdarg_p (type))
29228 {
29229 int regno;
29230 unsigned int ccvt = ix86_get_callcvt (type);
29231
29232 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29233 regno = aggr ? DX_REG : CX_REG;
29234 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29235 {
29236 regno = CX_REG;
29237 if (aggr)
29238 return gen_rtx_MEM (SImode,
29239 plus_constant (stack_pointer_rtx, 4));
29240 }
29241 else
29242 {
29243 regno = AX_REG;
29244 if (aggr)
29245 {
29246 regno = DX_REG;
29247 if (nregs == 1)
29248 return gen_rtx_MEM (SImode,
29249 plus_constant (stack_pointer_rtx, 4));
29250 }
29251 }
29252 return gen_rtx_REG (SImode, regno);
29253 }
29254
29255 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29256 }
29257
29258 /* Determine whether x86_output_mi_thunk can succeed. */
29259
29260 static bool
29261 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29262 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29263 HOST_WIDE_INT vcall_offset, const_tree function)
29264 {
29265 /* 64-bit can handle anything. */
29266 if (TARGET_64BIT)
29267 return true;
29268
29269 /* For 32-bit, everything's fine if we have one free register. */
29270 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29271 return true;
29272
29273 /* Need a free register for vcall_offset. */
29274 if (vcall_offset)
29275 return false;
29276
29277 /* Need a free register for GOT references. */
29278 if (flag_pic && !targetm.binds_local_p (function))
29279 return false;
29280
29281 /* Otherwise ok. */
29282 return true;
29283 }
29284
29285 /* Output the assembler code for a thunk function. THUNK_DECL is the
29286 declaration for the thunk function itself, FUNCTION is the decl for
29287 the target function. DELTA is an immediate constant offset to be
29288 added to THIS. If VCALL_OFFSET is nonzero, the word at
29289 *(*this + vcall_offset) should be added to THIS. */
29290
29291 static void
29292 x86_output_mi_thunk (FILE *file,
29293 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29294 HOST_WIDE_INT vcall_offset, tree function)
29295 {
29296 rtx xops[3];
29297 rtx this_param = x86_this_parameter (function);
29298 rtx this_reg, tmp;
29299
29300 /* Make sure unwind info is emitted for the thunk if needed. */
29301 final_start_function (emit_barrier (), file, 1);
29302
29303 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29304 pull it in now and let DELTA benefit. */
29305 if (REG_P (this_param))
29306 this_reg = this_param;
29307 else if (vcall_offset)
29308 {
29309 /* Put the this parameter into %eax. */
29310 xops[0] = this_param;
29311 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29312 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29313 }
29314 else
29315 this_reg = NULL_RTX;
29316
29317 /* Adjust the this parameter by a fixed constant. */
29318 if (delta)
29319 {
29320 xops[0] = GEN_INT (delta);
29321 xops[1] = this_reg ? this_reg : this_param;
29322 if (TARGET_64BIT)
29323 {
29324 if (!x86_64_general_operand (xops[0], DImode))
29325 {
29326 tmp = gen_rtx_REG (DImode, R10_REG);
29327 xops[1] = tmp;
29328 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29329 xops[0] = tmp;
29330 xops[1] = this_param;
29331 }
29332 if (x86_maybe_negate_const_int (&xops[0], DImode))
29333 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29334 else
29335 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29336 }
29337 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29338 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29339 else
29340 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29341 }
29342
29343 /* Adjust the this parameter by a value stored in the vtable. */
29344 if (vcall_offset)
29345 {
29346 if (TARGET_64BIT)
29347 tmp = gen_rtx_REG (DImode, R10_REG);
29348 else
29349 {
29350 int tmp_regno = CX_REG;
29351 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29352 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29353 tmp_regno = AX_REG;
29354 tmp = gen_rtx_REG (SImode, tmp_regno);
29355 }
29356
29357 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29358 xops[1] = tmp;
29359 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29360
29361 /* Adjust the this parameter. */
29362 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29363 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29364 {
29365 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29366 xops[0] = GEN_INT (vcall_offset);
29367 xops[1] = tmp2;
29368 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29369 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29370 }
29371 xops[1] = this_reg;
29372 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29373 }
29374
29375 /* If necessary, drop THIS back to its stack slot. */
29376 if (this_reg && this_reg != this_param)
29377 {
29378 xops[0] = this_reg;
29379 xops[1] = this_param;
29380 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29381 }
29382
29383 xops[0] = XEXP (DECL_RTL (function), 0);
29384 if (TARGET_64BIT)
29385 {
29386 if (!flag_pic || targetm.binds_local_p (function)
29387 || DEFAULT_ABI == MS_ABI)
29388 output_asm_insn ("jmp\t%P0", xops);
29389 /* All thunks should be in the same object as their target,
29390 and thus binds_local_p should be true. */
29391 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29392 gcc_unreachable ();
29393 else
29394 {
29395 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29396 tmp = gen_rtx_CONST (Pmode, tmp);
29397 tmp = gen_rtx_MEM (QImode, tmp);
29398 xops[0] = tmp;
29399 output_asm_insn ("jmp\t%A0", xops);
29400 }
29401 }
29402 else
29403 {
29404 if (!flag_pic || targetm.binds_local_p (function))
29405 output_asm_insn ("jmp\t%P0", xops);
29406 else
29407 #if TARGET_MACHO
29408 if (TARGET_MACHO)
29409 {
29410 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29411 if (TARGET_MACHO_BRANCH_ISLANDS)
29412 sym_ref = (gen_rtx_SYMBOL_REF
29413 (Pmode,
29414 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29415 tmp = gen_rtx_MEM (QImode, sym_ref);
29416 xops[0] = tmp;
29417 output_asm_insn ("jmp\t%0", xops);
29418 }
29419 else
29420 #endif /* TARGET_MACHO */
29421 {
29422 tmp = gen_rtx_REG (SImode, CX_REG);
29423 output_set_got (tmp, NULL_RTX);
29424
29425 xops[1] = tmp;
29426 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29427 output_asm_insn ("jmp\t{*}%1", xops);
29428 }
29429 }
29430 final_end_function ();
29431 }
29432
29433 static void
29434 x86_file_start (void)
29435 {
29436 default_file_start ();
29437 #if TARGET_MACHO
29438 darwin_file_start ();
29439 #endif
29440 if (X86_FILE_START_VERSION_DIRECTIVE)
29441 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29442 if (X86_FILE_START_FLTUSED)
29443 fputs ("\t.global\t__fltused\n", asm_out_file);
29444 if (ix86_asm_dialect == ASM_INTEL)
29445 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29446 }
29447
29448 int
29449 x86_field_alignment (tree field, int computed)
29450 {
29451 enum machine_mode mode;
29452 tree type = TREE_TYPE (field);
29453
29454 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29455 return computed;
29456 mode = TYPE_MODE (strip_array_types (type));
29457 if (mode == DFmode || mode == DCmode
29458 || GET_MODE_CLASS (mode) == MODE_INT
29459 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29460 return MIN (32, computed);
29461 return computed;
29462 }
29463
29464 /* Output assembler code to FILE to increment profiler label # LABELNO
29465 for profiling a function entry. */
29466 void
29467 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29468 {
29469 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29470 : MCOUNT_NAME);
29471
29472 if (TARGET_64BIT)
29473 {
29474 #ifndef NO_PROFILE_COUNTERS
29475 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29476 #endif
29477
29478 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29479 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29480 else
29481 fprintf (file, "\tcall\t%s\n", mcount_name);
29482 }
29483 else if (flag_pic)
29484 {
29485 #ifndef NO_PROFILE_COUNTERS
29486 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29487 LPREFIX, labelno);
29488 #endif
29489 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29490 }
29491 else
29492 {
29493 #ifndef NO_PROFILE_COUNTERS
29494 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29495 LPREFIX, labelno);
29496 #endif
29497 fprintf (file, "\tcall\t%s\n", mcount_name);
29498 }
29499 }
29500
29501 /* We don't have exact information about the insn sizes, but we may assume
29502 quite safely that we are informed about all 1 byte insns and memory
29503 address sizes. This is enough to eliminate unnecessary padding in
29504 99% of cases. */
29505
29506 static int
29507 min_insn_size (rtx insn)
29508 {
29509 int l = 0, len;
29510
29511 if (!INSN_P (insn) || !active_insn_p (insn))
29512 return 0;
29513
29514 /* Discard alignments we've emit and jump instructions. */
29515 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29516 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29517 return 0;
29518 if (JUMP_TABLE_DATA_P (insn))
29519 return 0;
29520
29521 /* Important case - calls are always 5 bytes.
29522 It is common to have many calls in the row. */
29523 if (CALL_P (insn)
29524 && symbolic_reference_mentioned_p (PATTERN (insn))
29525 && !SIBLING_CALL_P (insn))
29526 return 5;
29527 len = get_attr_length (insn);
29528 if (len <= 1)
29529 return 1;
29530
29531 /* For normal instructions we rely on get_attr_length being exact,
29532 with a few exceptions. */
29533 if (!JUMP_P (insn))
29534 {
29535 enum attr_type type = get_attr_type (insn);
29536
29537 switch (type)
29538 {
29539 case TYPE_MULTI:
29540 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29541 || asm_noperands (PATTERN (insn)) >= 0)
29542 return 0;
29543 break;
29544 case TYPE_OTHER:
29545 case TYPE_FCMP:
29546 break;
29547 default:
29548 /* Otherwise trust get_attr_length. */
29549 return len;
29550 }
29551
29552 l = get_attr_length_address (insn);
29553 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29554 l = 4;
29555 }
29556 if (l)
29557 return 1+l;
29558 else
29559 return 2;
29560 }
29561
29562 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29563
29564 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29565 window. */
29566
29567 static void
29568 ix86_avoid_jump_mispredicts (void)
29569 {
29570 rtx insn, start = get_insns ();
29571 int nbytes = 0, njumps = 0;
29572 int isjump = 0;
29573
29574 /* Look for all minimal intervals of instructions containing 4 jumps.
29575 The intervals are bounded by START and INSN. NBYTES is the total
29576 size of instructions in the interval including INSN and not including
29577 START. When the NBYTES is smaller than 16 bytes, it is possible
29578 that the end of START and INSN ends up in the same 16byte page.
29579
29580 The smallest offset in the page INSN can start is the case where START
29581 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29582 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29583 */
29584 for (insn = start; insn; insn = NEXT_INSN (insn))
29585 {
29586 int min_size;
29587
29588 if (LABEL_P (insn))
29589 {
29590 int align = label_to_alignment (insn);
29591 int max_skip = label_to_max_skip (insn);
29592
29593 if (max_skip > 15)
29594 max_skip = 15;
29595 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29596 already in the current 16 byte page, because otherwise
29597 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29598 bytes to reach 16 byte boundary. */
29599 if (align <= 0
29600 || (align <= 3 && max_skip != (1 << align) - 1))
29601 max_skip = 0;
29602 if (dump_file)
29603 fprintf (dump_file, "Label %i with max_skip %i\n",
29604 INSN_UID (insn), max_skip);
29605 if (max_skip)
29606 {
29607 while (nbytes + max_skip >= 16)
29608 {
29609 start = NEXT_INSN (start);
29610 if ((JUMP_P (start)
29611 && GET_CODE (PATTERN (start)) != ADDR_VEC
29612 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29613 || CALL_P (start))
29614 njumps--, isjump = 1;
29615 else
29616 isjump = 0;
29617 nbytes -= min_insn_size (start);
29618 }
29619 }
29620 continue;
29621 }
29622
29623 min_size = min_insn_size (insn);
29624 nbytes += min_size;
29625 if (dump_file)
29626 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29627 INSN_UID (insn), min_size);
29628 if ((JUMP_P (insn)
29629 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29630 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29631 || CALL_P (insn))
29632 njumps++;
29633 else
29634 continue;
29635
29636 while (njumps > 3)
29637 {
29638 start = NEXT_INSN (start);
29639 if ((JUMP_P (start)
29640 && GET_CODE (PATTERN (start)) != ADDR_VEC
29641 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29642 || CALL_P (start))
29643 njumps--, isjump = 1;
29644 else
29645 isjump = 0;
29646 nbytes -= min_insn_size (start);
29647 }
29648 gcc_assert (njumps >= 0);
29649 if (dump_file)
29650 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29651 INSN_UID (start), INSN_UID (insn), nbytes);
29652
29653 if (njumps == 3 && isjump && nbytes < 16)
29654 {
29655 int padsize = 15 - nbytes + min_insn_size (insn);
29656
29657 if (dump_file)
29658 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29659 INSN_UID (insn), padsize);
29660 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29661 }
29662 }
29663 }
29664 #endif
29665
29666 /* AMD Athlon works faster
29667 when RET is not destination of conditional jump or directly preceded
29668 by other jump instruction. We avoid the penalty by inserting NOP just
29669 before the RET instructions in such cases. */
29670 static void
29671 ix86_pad_returns (void)
29672 {
29673 edge e;
29674 edge_iterator ei;
29675
29676 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29677 {
29678 basic_block bb = e->src;
29679 rtx ret = BB_END (bb);
29680 rtx prev;
29681 bool replace = false;
29682
29683 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29684 || optimize_bb_for_size_p (bb))
29685 continue;
29686 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29687 if (active_insn_p (prev) || LABEL_P (prev))
29688 break;
29689 if (prev && LABEL_P (prev))
29690 {
29691 edge e;
29692 edge_iterator ei;
29693
29694 FOR_EACH_EDGE (e, ei, bb->preds)
29695 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29696 && !(e->flags & EDGE_FALLTHRU))
29697 replace = true;
29698 }
29699 if (!replace)
29700 {
29701 prev = prev_active_insn (ret);
29702 if (prev
29703 && ((JUMP_P (prev) && any_condjump_p (prev))
29704 || CALL_P (prev)))
29705 replace = true;
29706 /* Empty functions get branch mispredict even when
29707 the jump destination is not visible to us. */
29708 if (!prev && !optimize_function_for_size_p (cfun))
29709 replace = true;
29710 }
29711 if (replace)
29712 {
29713 emit_jump_insn_before (gen_return_internal_long (), ret);
29714 delete_insn (ret);
29715 }
29716 }
29717 }
29718
29719 /* Count the minimum number of instructions in BB. Return 4 if the
29720 number of instructions >= 4. */
29721
29722 static int
29723 ix86_count_insn_bb (basic_block bb)
29724 {
29725 rtx insn;
29726 int insn_count = 0;
29727
29728 /* Count number of instructions in this block. Return 4 if the number
29729 of instructions >= 4. */
29730 FOR_BB_INSNS (bb, insn)
29731 {
29732 /* Only happen in exit blocks. */
29733 if (JUMP_P (insn)
29734 && GET_CODE (PATTERN (insn)) == RETURN)
29735 break;
29736
29737 if (NONDEBUG_INSN_P (insn)
29738 && GET_CODE (PATTERN (insn)) != USE
29739 && GET_CODE (PATTERN (insn)) != CLOBBER)
29740 {
29741 insn_count++;
29742 if (insn_count >= 4)
29743 return insn_count;
29744 }
29745 }
29746
29747 return insn_count;
29748 }
29749
29750
29751 /* Count the minimum number of instructions in code path in BB.
29752 Return 4 if the number of instructions >= 4. */
29753
29754 static int
29755 ix86_count_insn (basic_block bb)
29756 {
29757 edge e;
29758 edge_iterator ei;
29759 int min_prev_count;
29760
29761 /* Only bother counting instructions along paths with no
29762 more than 2 basic blocks between entry and exit. Given
29763 that BB has an edge to exit, determine if a predecessor
29764 of BB has an edge from entry. If so, compute the number
29765 of instructions in the predecessor block. If there
29766 happen to be multiple such blocks, compute the minimum. */
29767 min_prev_count = 4;
29768 FOR_EACH_EDGE (e, ei, bb->preds)
29769 {
29770 edge prev_e;
29771 edge_iterator prev_ei;
29772
29773 if (e->src == ENTRY_BLOCK_PTR)
29774 {
29775 min_prev_count = 0;
29776 break;
29777 }
29778 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29779 {
29780 if (prev_e->src == ENTRY_BLOCK_PTR)
29781 {
29782 int count = ix86_count_insn_bb (e->src);
29783 if (count < min_prev_count)
29784 min_prev_count = count;
29785 break;
29786 }
29787 }
29788 }
29789
29790 if (min_prev_count < 4)
29791 min_prev_count += ix86_count_insn_bb (bb);
29792
29793 return min_prev_count;
29794 }
29795
29796 /* Pad short funtion to 4 instructions. */
29797
29798 static void
29799 ix86_pad_short_function (void)
29800 {
29801 edge e;
29802 edge_iterator ei;
29803
29804 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29805 {
29806 rtx ret = BB_END (e->src);
29807 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29808 {
29809 int insn_count = ix86_count_insn (e->src);
29810
29811 /* Pad short function. */
29812 if (insn_count < 4)
29813 {
29814 rtx insn = ret;
29815
29816 /* Find epilogue. */
29817 while (insn
29818 && (!NOTE_P (insn)
29819 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29820 insn = PREV_INSN (insn);
29821
29822 if (!insn)
29823 insn = ret;
29824
29825 /* Two NOPs count as one instruction. */
29826 insn_count = 2 * (4 - insn_count);
29827 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29828 }
29829 }
29830 }
29831 }
29832
29833 /* Implement machine specific optimizations. We implement padding of returns
29834 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29835 static void
29836 ix86_reorg (void)
29837 {
29838 /* We are freeing block_for_insn in the toplev to keep compatibility
29839 with old MDEP_REORGS that are not CFG based. Recompute it now. */
29840 compute_bb_for_insn ();
29841
29842 /* Run the vzeroupper optimization if needed. */
29843 if (TARGET_VZEROUPPER)
29844 move_or_delete_vzeroupper ();
29845
29846 if (optimize && optimize_function_for_speed_p (cfun))
29847 {
29848 if (TARGET_PAD_SHORT_FUNCTION)
29849 ix86_pad_short_function ();
29850 else if (TARGET_PAD_RETURNS)
29851 ix86_pad_returns ();
29852 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29853 if (TARGET_FOUR_JUMP_LIMIT)
29854 ix86_avoid_jump_mispredicts ();
29855 #endif
29856 }
29857 }
29858
29859 /* Return nonzero when QImode register that must be represented via REX prefix
29860 is used. */
29861 bool
29862 x86_extended_QIreg_mentioned_p (rtx insn)
29863 {
29864 int i;
29865 extract_insn_cached (insn);
29866 for (i = 0; i < recog_data.n_operands; i++)
29867 if (REG_P (recog_data.operand[i])
29868 && REGNO (recog_data.operand[i]) > BX_REG)
29869 return true;
29870 return false;
29871 }
29872
29873 /* Return nonzero when P points to register encoded via REX prefix.
29874 Called via for_each_rtx. */
29875 static int
29876 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29877 {
29878 unsigned int regno;
29879 if (!REG_P (*p))
29880 return 0;
29881 regno = REGNO (*p);
29882 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29883 }
29884
29885 /* Return true when INSN mentions register that must be encoded using REX
29886 prefix. */
29887 bool
29888 x86_extended_reg_mentioned_p (rtx insn)
29889 {
29890 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29891 extended_reg_mentioned_1, NULL);
29892 }
29893
29894 /* If profitable, negate (without causing overflow) integer constant
29895 of mode MODE at location LOC. Return true in this case. */
29896 bool
29897 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29898 {
29899 HOST_WIDE_INT val;
29900
29901 if (!CONST_INT_P (*loc))
29902 return false;
29903
29904 switch (mode)
29905 {
29906 case DImode:
29907 /* DImode x86_64 constants must fit in 32 bits. */
29908 gcc_assert (x86_64_immediate_operand (*loc, mode));
29909
29910 mode = SImode;
29911 break;
29912
29913 case SImode:
29914 case HImode:
29915 case QImode:
29916 break;
29917
29918 default:
29919 gcc_unreachable ();
29920 }
29921
29922 /* Avoid overflows. */
29923 if (mode_signbit_p (mode, *loc))
29924 return false;
29925
29926 val = INTVAL (*loc);
29927
29928 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
29929 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
29930 if ((val < 0 && val != -128)
29931 || val == 128)
29932 {
29933 *loc = GEN_INT (-val);
29934 return true;
29935 }
29936
29937 return false;
29938 }
29939
29940 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
29941 optabs would emit if we didn't have TFmode patterns. */
29942
29943 void
29944 x86_emit_floatuns (rtx operands[2])
29945 {
29946 rtx neglab, donelab, i0, i1, f0, in, out;
29947 enum machine_mode mode, inmode;
29948
29949 inmode = GET_MODE (operands[1]);
29950 gcc_assert (inmode == SImode || inmode == DImode);
29951
29952 out = operands[0];
29953 in = force_reg (inmode, operands[1]);
29954 mode = GET_MODE (out);
29955 neglab = gen_label_rtx ();
29956 donelab = gen_label_rtx ();
29957 f0 = gen_reg_rtx (mode);
29958
29959 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
29960
29961 expand_float (out, in, 0);
29962
29963 emit_jump_insn (gen_jump (donelab));
29964 emit_barrier ();
29965
29966 emit_label (neglab);
29967
29968 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
29969 1, OPTAB_DIRECT);
29970 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
29971 1, OPTAB_DIRECT);
29972 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
29973
29974 expand_float (f0, i0, 0);
29975
29976 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
29977
29978 emit_label (donelab);
29979 }
29980 \f
29981 /* AVX does not support 32-byte integer vector operations,
29982 thus the longest vector we are faced with is V16QImode. */
29983 #define MAX_VECT_LEN 16
29984
29985 struct expand_vec_perm_d
29986 {
29987 rtx target, op0, op1;
29988 unsigned char perm[MAX_VECT_LEN];
29989 enum machine_mode vmode;
29990 unsigned char nelt;
29991 bool testing_p;
29992 };
29993
29994 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
29995 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
29996
29997 /* Get a vector mode of the same size as the original but with elements
29998 twice as wide. This is only guaranteed to apply to integral vectors. */
29999
30000 static inline enum machine_mode
30001 get_mode_wider_vector (enum machine_mode o)
30002 {
30003 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30004 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30005 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30006 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30007 return n;
30008 }
30009
30010 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30011 with all elements equal to VAR. Return true if successful. */
30012
30013 static bool
30014 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30015 rtx target, rtx val)
30016 {
30017 bool ok;
30018
30019 switch (mode)
30020 {
30021 case V2SImode:
30022 case V2SFmode:
30023 if (!mmx_ok)
30024 return false;
30025 /* FALLTHRU */
30026
30027 case V4DFmode:
30028 case V4DImode:
30029 case V8SFmode:
30030 case V8SImode:
30031 case V2DFmode:
30032 case V2DImode:
30033 case V4SFmode:
30034 case V4SImode:
30035 {
30036 rtx insn, dup;
30037
30038 /* First attempt to recognize VAL as-is. */
30039 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30040 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30041 if (recog_memoized (insn) < 0)
30042 {
30043 rtx seq;
30044 /* If that fails, force VAL into a register. */
30045
30046 start_sequence ();
30047 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30048 seq = get_insns ();
30049 end_sequence ();
30050 if (seq)
30051 emit_insn_before (seq, insn);
30052
30053 ok = recog_memoized (insn) >= 0;
30054 gcc_assert (ok);
30055 }
30056 }
30057 return true;
30058
30059 case V4HImode:
30060 if (!mmx_ok)
30061 return false;
30062 if (TARGET_SSE || TARGET_3DNOW_A)
30063 {
30064 rtx x;
30065
30066 val = gen_lowpart (SImode, val);
30067 x = gen_rtx_TRUNCATE (HImode, val);
30068 x = gen_rtx_VEC_DUPLICATE (mode, x);
30069 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30070 return true;
30071 }
30072 goto widen;
30073
30074 case V8QImode:
30075 if (!mmx_ok)
30076 return false;
30077 goto widen;
30078
30079 case V8HImode:
30080 if (TARGET_SSE2)
30081 {
30082 struct expand_vec_perm_d dperm;
30083 rtx tmp1, tmp2;
30084
30085 permute:
30086 memset (&dperm, 0, sizeof (dperm));
30087 dperm.target = target;
30088 dperm.vmode = mode;
30089 dperm.nelt = GET_MODE_NUNITS (mode);
30090 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30091
30092 /* Extend to SImode using a paradoxical SUBREG. */
30093 tmp1 = gen_reg_rtx (SImode);
30094 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30095
30096 /* Insert the SImode value as low element of a V4SImode vector. */
30097 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30098 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30099
30100 ok = (expand_vec_perm_1 (&dperm)
30101 || expand_vec_perm_broadcast_1 (&dperm));
30102 gcc_assert (ok);
30103 return ok;
30104 }
30105 goto widen;
30106
30107 case V16QImode:
30108 if (TARGET_SSE2)
30109 goto permute;
30110 goto widen;
30111
30112 widen:
30113 /* Replicate the value once into the next wider mode and recurse. */
30114 {
30115 enum machine_mode smode, wsmode, wvmode;
30116 rtx x;
30117
30118 smode = GET_MODE_INNER (mode);
30119 wvmode = get_mode_wider_vector (mode);
30120 wsmode = GET_MODE_INNER (wvmode);
30121
30122 val = convert_modes (wsmode, smode, val, true);
30123 x = expand_simple_binop (wsmode, ASHIFT, val,
30124 GEN_INT (GET_MODE_BITSIZE (smode)),
30125 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30126 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30127
30128 x = gen_lowpart (wvmode, target);
30129 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30130 gcc_assert (ok);
30131 return ok;
30132 }
30133
30134 case V16HImode:
30135 case V32QImode:
30136 {
30137 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30138 rtx x = gen_reg_rtx (hvmode);
30139
30140 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30141 gcc_assert (ok);
30142
30143 x = gen_rtx_VEC_CONCAT (mode, x, x);
30144 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30145 }
30146 return true;
30147
30148 default:
30149 return false;
30150 }
30151 }
30152
30153 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30154 whose ONE_VAR element is VAR, and other elements are zero. Return true
30155 if successful. */
30156
30157 static bool
30158 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30159 rtx target, rtx var, int one_var)
30160 {
30161 enum machine_mode vsimode;
30162 rtx new_target;
30163 rtx x, tmp;
30164 bool use_vector_set = false;
30165
30166 switch (mode)
30167 {
30168 case V2DImode:
30169 /* For SSE4.1, we normally use vector set. But if the second
30170 element is zero and inter-unit moves are OK, we use movq
30171 instead. */
30172 use_vector_set = (TARGET_64BIT
30173 && TARGET_SSE4_1
30174 && !(TARGET_INTER_UNIT_MOVES
30175 && one_var == 0));
30176 break;
30177 case V16QImode:
30178 case V4SImode:
30179 case V4SFmode:
30180 use_vector_set = TARGET_SSE4_1;
30181 break;
30182 case V8HImode:
30183 use_vector_set = TARGET_SSE2;
30184 break;
30185 case V4HImode:
30186 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30187 break;
30188 case V32QImode:
30189 case V16HImode:
30190 case V8SImode:
30191 case V8SFmode:
30192 case V4DFmode:
30193 use_vector_set = TARGET_AVX;
30194 break;
30195 case V4DImode:
30196 /* Use ix86_expand_vector_set in 64bit mode only. */
30197 use_vector_set = TARGET_AVX && TARGET_64BIT;
30198 break;
30199 default:
30200 break;
30201 }
30202
30203 if (use_vector_set)
30204 {
30205 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30206 var = force_reg (GET_MODE_INNER (mode), var);
30207 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30208 return true;
30209 }
30210
30211 switch (mode)
30212 {
30213 case V2SFmode:
30214 case V2SImode:
30215 if (!mmx_ok)
30216 return false;
30217 /* FALLTHRU */
30218
30219 case V2DFmode:
30220 case V2DImode:
30221 if (one_var != 0)
30222 return false;
30223 var = force_reg (GET_MODE_INNER (mode), var);
30224 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30225 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30226 return true;
30227
30228 case V4SFmode:
30229 case V4SImode:
30230 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30231 new_target = gen_reg_rtx (mode);
30232 else
30233 new_target = target;
30234 var = force_reg (GET_MODE_INNER (mode), var);
30235 x = gen_rtx_VEC_DUPLICATE (mode, var);
30236 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30237 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30238 if (one_var != 0)
30239 {
30240 /* We need to shuffle the value to the correct position, so
30241 create a new pseudo to store the intermediate result. */
30242
30243 /* With SSE2, we can use the integer shuffle insns. */
30244 if (mode != V4SFmode && TARGET_SSE2)
30245 {
30246 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30247 const1_rtx,
30248 GEN_INT (one_var == 1 ? 0 : 1),
30249 GEN_INT (one_var == 2 ? 0 : 1),
30250 GEN_INT (one_var == 3 ? 0 : 1)));
30251 if (target != new_target)
30252 emit_move_insn (target, new_target);
30253 return true;
30254 }
30255
30256 /* Otherwise convert the intermediate result to V4SFmode and
30257 use the SSE1 shuffle instructions. */
30258 if (mode != V4SFmode)
30259 {
30260 tmp = gen_reg_rtx (V4SFmode);
30261 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30262 }
30263 else
30264 tmp = new_target;
30265
30266 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30267 const1_rtx,
30268 GEN_INT (one_var == 1 ? 0 : 1),
30269 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30270 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30271
30272 if (mode != V4SFmode)
30273 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30274 else if (tmp != target)
30275 emit_move_insn (target, tmp);
30276 }
30277 else if (target != new_target)
30278 emit_move_insn (target, new_target);
30279 return true;
30280
30281 case V8HImode:
30282 case V16QImode:
30283 vsimode = V4SImode;
30284 goto widen;
30285 case V4HImode:
30286 case V8QImode:
30287 if (!mmx_ok)
30288 return false;
30289 vsimode = V2SImode;
30290 goto widen;
30291 widen:
30292 if (one_var != 0)
30293 return false;
30294
30295 /* Zero extend the variable element to SImode and recurse. */
30296 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30297
30298 x = gen_reg_rtx (vsimode);
30299 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30300 var, one_var))
30301 gcc_unreachable ();
30302
30303 emit_move_insn (target, gen_lowpart (mode, x));
30304 return true;
30305
30306 default:
30307 return false;
30308 }
30309 }
30310
30311 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30312 consisting of the values in VALS. It is known that all elements
30313 except ONE_VAR are constants. Return true if successful. */
30314
30315 static bool
30316 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30317 rtx target, rtx vals, int one_var)
30318 {
30319 rtx var = XVECEXP (vals, 0, one_var);
30320 enum machine_mode wmode;
30321 rtx const_vec, x;
30322
30323 const_vec = copy_rtx (vals);
30324 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30325 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30326
30327 switch (mode)
30328 {
30329 case V2DFmode:
30330 case V2DImode:
30331 case V2SFmode:
30332 case V2SImode:
30333 /* For the two element vectors, it's just as easy to use
30334 the general case. */
30335 return false;
30336
30337 case V4DImode:
30338 /* Use ix86_expand_vector_set in 64bit mode only. */
30339 if (!TARGET_64BIT)
30340 return false;
30341 case V4DFmode:
30342 case V8SFmode:
30343 case V8SImode:
30344 case V16HImode:
30345 case V32QImode:
30346 case V4SFmode:
30347 case V4SImode:
30348 case V8HImode:
30349 case V4HImode:
30350 break;
30351
30352 case V16QImode:
30353 if (TARGET_SSE4_1)
30354 break;
30355 wmode = V8HImode;
30356 goto widen;
30357 case V8QImode:
30358 wmode = V4HImode;
30359 goto widen;
30360 widen:
30361 /* There's no way to set one QImode entry easily. Combine
30362 the variable value with its adjacent constant value, and
30363 promote to an HImode set. */
30364 x = XVECEXP (vals, 0, one_var ^ 1);
30365 if (one_var & 1)
30366 {
30367 var = convert_modes (HImode, QImode, var, true);
30368 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30369 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30370 x = GEN_INT (INTVAL (x) & 0xff);
30371 }
30372 else
30373 {
30374 var = convert_modes (HImode, QImode, var, true);
30375 x = gen_int_mode (INTVAL (x) << 8, HImode);
30376 }
30377 if (x != const0_rtx)
30378 var = expand_simple_binop (HImode, IOR, var, x, var,
30379 1, OPTAB_LIB_WIDEN);
30380
30381 x = gen_reg_rtx (wmode);
30382 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30383 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30384
30385 emit_move_insn (target, gen_lowpart (mode, x));
30386 return true;
30387
30388 default:
30389 return false;
30390 }
30391
30392 emit_move_insn (target, const_vec);
30393 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30394 return true;
30395 }
30396
30397 /* A subroutine of ix86_expand_vector_init_general. Use vector
30398 concatenate to handle the most general case: all values variable,
30399 and none identical. */
30400
30401 static void
30402 ix86_expand_vector_init_concat (enum machine_mode mode,
30403 rtx target, rtx *ops, int n)
30404 {
30405 enum machine_mode cmode, hmode = VOIDmode;
30406 rtx first[8], second[4];
30407 rtvec v;
30408 int i, j;
30409
30410 switch (n)
30411 {
30412 case 2:
30413 switch (mode)
30414 {
30415 case V8SImode:
30416 cmode = V4SImode;
30417 break;
30418 case V8SFmode:
30419 cmode = V4SFmode;
30420 break;
30421 case V4DImode:
30422 cmode = V2DImode;
30423 break;
30424 case V4DFmode:
30425 cmode = V2DFmode;
30426 break;
30427 case V4SImode:
30428 cmode = V2SImode;
30429 break;
30430 case V4SFmode:
30431 cmode = V2SFmode;
30432 break;
30433 case V2DImode:
30434 cmode = DImode;
30435 break;
30436 case V2SImode:
30437 cmode = SImode;
30438 break;
30439 case V2DFmode:
30440 cmode = DFmode;
30441 break;
30442 case V2SFmode:
30443 cmode = SFmode;
30444 break;
30445 default:
30446 gcc_unreachable ();
30447 }
30448
30449 if (!register_operand (ops[1], cmode))
30450 ops[1] = force_reg (cmode, ops[1]);
30451 if (!register_operand (ops[0], cmode))
30452 ops[0] = force_reg (cmode, ops[0]);
30453 emit_insn (gen_rtx_SET (VOIDmode, target,
30454 gen_rtx_VEC_CONCAT (mode, ops[0],
30455 ops[1])));
30456 break;
30457
30458 case 4:
30459 switch (mode)
30460 {
30461 case V4DImode:
30462 cmode = V2DImode;
30463 break;
30464 case V4DFmode:
30465 cmode = V2DFmode;
30466 break;
30467 case V4SImode:
30468 cmode = V2SImode;
30469 break;
30470 case V4SFmode:
30471 cmode = V2SFmode;
30472 break;
30473 default:
30474 gcc_unreachable ();
30475 }
30476 goto half;
30477
30478 case 8:
30479 switch (mode)
30480 {
30481 case V8SImode:
30482 cmode = V2SImode;
30483 hmode = V4SImode;
30484 break;
30485 case V8SFmode:
30486 cmode = V2SFmode;
30487 hmode = V4SFmode;
30488 break;
30489 default:
30490 gcc_unreachable ();
30491 }
30492 goto half;
30493
30494 half:
30495 /* FIXME: We process inputs backward to help RA. PR 36222. */
30496 i = n - 1;
30497 j = (n >> 1) - 1;
30498 for (; i > 0; i -= 2, j--)
30499 {
30500 first[j] = gen_reg_rtx (cmode);
30501 v = gen_rtvec (2, ops[i - 1], ops[i]);
30502 ix86_expand_vector_init (false, first[j],
30503 gen_rtx_PARALLEL (cmode, v));
30504 }
30505
30506 n >>= 1;
30507 if (n > 2)
30508 {
30509 gcc_assert (hmode != VOIDmode);
30510 for (i = j = 0; i < n; i += 2, j++)
30511 {
30512 second[j] = gen_reg_rtx (hmode);
30513 ix86_expand_vector_init_concat (hmode, second [j],
30514 &first [i], 2);
30515 }
30516 n >>= 1;
30517 ix86_expand_vector_init_concat (mode, target, second, n);
30518 }
30519 else
30520 ix86_expand_vector_init_concat (mode, target, first, n);
30521 break;
30522
30523 default:
30524 gcc_unreachable ();
30525 }
30526 }
30527
30528 /* A subroutine of ix86_expand_vector_init_general. Use vector
30529 interleave to handle the most general case: all values variable,
30530 and none identical. */
30531
30532 static void
30533 ix86_expand_vector_init_interleave (enum machine_mode mode,
30534 rtx target, rtx *ops, int n)
30535 {
30536 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30537 int i, j;
30538 rtx op0, op1;
30539 rtx (*gen_load_even) (rtx, rtx, rtx);
30540 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30541 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30542
30543 switch (mode)
30544 {
30545 case V8HImode:
30546 gen_load_even = gen_vec_setv8hi;
30547 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30548 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30549 inner_mode = HImode;
30550 first_imode = V4SImode;
30551 second_imode = V2DImode;
30552 third_imode = VOIDmode;
30553 break;
30554 case V16QImode:
30555 gen_load_even = gen_vec_setv16qi;
30556 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30557 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30558 inner_mode = QImode;
30559 first_imode = V8HImode;
30560 second_imode = V4SImode;
30561 third_imode = V2DImode;
30562 break;
30563 default:
30564 gcc_unreachable ();
30565 }
30566
30567 for (i = 0; i < n; i++)
30568 {
30569 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30570 op0 = gen_reg_rtx (SImode);
30571 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30572
30573 /* Insert the SImode value as low element of V4SImode vector. */
30574 op1 = gen_reg_rtx (V4SImode);
30575 op0 = gen_rtx_VEC_MERGE (V4SImode,
30576 gen_rtx_VEC_DUPLICATE (V4SImode,
30577 op0),
30578 CONST0_RTX (V4SImode),
30579 const1_rtx);
30580 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30581
30582 /* Cast the V4SImode vector back to a vector in orignal mode. */
30583 op0 = gen_reg_rtx (mode);
30584 emit_move_insn (op0, gen_lowpart (mode, op1));
30585
30586 /* Load even elements into the second positon. */
30587 emit_insn (gen_load_even (op0,
30588 force_reg (inner_mode,
30589 ops [i + i + 1]),
30590 const1_rtx));
30591
30592 /* Cast vector to FIRST_IMODE vector. */
30593 ops[i] = gen_reg_rtx (first_imode);
30594 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30595 }
30596
30597 /* Interleave low FIRST_IMODE vectors. */
30598 for (i = j = 0; i < n; i += 2, j++)
30599 {
30600 op0 = gen_reg_rtx (first_imode);
30601 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30602
30603 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30604 ops[j] = gen_reg_rtx (second_imode);
30605 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30606 }
30607
30608 /* Interleave low SECOND_IMODE vectors. */
30609 switch (second_imode)
30610 {
30611 case V4SImode:
30612 for (i = j = 0; i < n / 2; i += 2, j++)
30613 {
30614 op0 = gen_reg_rtx (second_imode);
30615 emit_insn (gen_interleave_second_low (op0, ops[i],
30616 ops[i + 1]));
30617
30618 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30619 vector. */
30620 ops[j] = gen_reg_rtx (third_imode);
30621 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30622 }
30623 second_imode = V2DImode;
30624 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30625 /* FALLTHRU */
30626
30627 case V2DImode:
30628 op0 = gen_reg_rtx (second_imode);
30629 emit_insn (gen_interleave_second_low (op0, ops[0],
30630 ops[1]));
30631
30632 /* Cast the SECOND_IMODE vector back to a vector on original
30633 mode. */
30634 emit_insn (gen_rtx_SET (VOIDmode, target,
30635 gen_lowpart (mode, op0)));
30636 break;
30637
30638 default:
30639 gcc_unreachable ();
30640 }
30641 }
30642
30643 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30644 all values variable, and none identical. */
30645
30646 static void
30647 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30648 rtx target, rtx vals)
30649 {
30650 rtx ops[32], op0, op1;
30651 enum machine_mode half_mode = VOIDmode;
30652 int n, i;
30653
30654 switch (mode)
30655 {
30656 case V2SFmode:
30657 case V2SImode:
30658 if (!mmx_ok && !TARGET_SSE)
30659 break;
30660 /* FALLTHRU */
30661
30662 case V8SFmode:
30663 case V8SImode:
30664 case V4DFmode:
30665 case V4DImode:
30666 case V4SFmode:
30667 case V4SImode:
30668 case V2DFmode:
30669 case V2DImode:
30670 n = GET_MODE_NUNITS (mode);
30671 for (i = 0; i < n; i++)
30672 ops[i] = XVECEXP (vals, 0, i);
30673 ix86_expand_vector_init_concat (mode, target, ops, n);
30674 return;
30675
30676 case V32QImode:
30677 half_mode = V16QImode;
30678 goto half;
30679
30680 case V16HImode:
30681 half_mode = V8HImode;
30682 goto half;
30683
30684 half:
30685 n = GET_MODE_NUNITS (mode);
30686 for (i = 0; i < n; i++)
30687 ops[i] = XVECEXP (vals, 0, i);
30688 op0 = gen_reg_rtx (half_mode);
30689 op1 = gen_reg_rtx (half_mode);
30690 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30691 n >> 2);
30692 ix86_expand_vector_init_interleave (half_mode, op1,
30693 &ops [n >> 1], n >> 2);
30694 emit_insn (gen_rtx_SET (VOIDmode, target,
30695 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30696 return;
30697
30698 case V16QImode:
30699 if (!TARGET_SSE4_1)
30700 break;
30701 /* FALLTHRU */
30702
30703 case V8HImode:
30704 if (!TARGET_SSE2)
30705 break;
30706
30707 /* Don't use ix86_expand_vector_init_interleave if we can't
30708 move from GPR to SSE register directly. */
30709 if (!TARGET_INTER_UNIT_MOVES)
30710 break;
30711
30712 n = GET_MODE_NUNITS (mode);
30713 for (i = 0; i < n; i++)
30714 ops[i] = XVECEXP (vals, 0, i);
30715 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30716 return;
30717
30718 case V4HImode:
30719 case V8QImode:
30720 break;
30721
30722 default:
30723 gcc_unreachable ();
30724 }
30725
30726 {
30727 int i, j, n_elts, n_words, n_elt_per_word;
30728 enum machine_mode inner_mode;
30729 rtx words[4], shift;
30730
30731 inner_mode = GET_MODE_INNER (mode);
30732 n_elts = GET_MODE_NUNITS (mode);
30733 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30734 n_elt_per_word = n_elts / n_words;
30735 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30736
30737 for (i = 0; i < n_words; ++i)
30738 {
30739 rtx word = NULL_RTX;
30740
30741 for (j = 0; j < n_elt_per_word; ++j)
30742 {
30743 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30744 elt = convert_modes (word_mode, inner_mode, elt, true);
30745
30746 if (j == 0)
30747 word = elt;
30748 else
30749 {
30750 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30751 word, 1, OPTAB_LIB_WIDEN);
30752 word = expand_simple_binop (word_mode, IOR, word, elt,
30753 word, 1, OPTAB_LIB_WIDEN);
30754 }
30755 }
30756
30757 words[i] = word;
30758 }
30759
30760 if (n_words == 1)
30761 emit_move_insn (target, gen_lowpart (mode, words[0]));
30762 else if (n_words == 2)
30763 {
30764 rtx tmp = gen_reg_rtx (mode);
30765 emit_clobber (tmp);
30766 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30767 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30768 emit_move_insn (target, tmp);
30769 }
30770 else if (n_words == 4)
30771 {
30772 rtx tmp = gen_reg_rtx (V4SImode);
30773 gcc_assert (word_mode == SImode);
30774 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30775 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30776 emit_move_insn (target, gen_lowpart (mode, tmp));
30777 }
30778 else
30779 gcc_unreachable ();
30780 }
30781 }
30782
30783 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30784 instructions unless MMX_OK is true. */
30785
30786 void
30787 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30788 {
30789 enum machine_mode mode = GET_MODE (target);
30790 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30791 int n_elts = GET_MODE_NUNITS (mode);
30792 int n_var = 0, one_var = -1;
30793 bool all_same = true, all_const_zero = true;
30794 int i;
30795 rtx x;
30796
30797 for (i = 0; i < n_elts; ++i)
30798 {
30799 x = XVECEXP (vals, 0, i);
30800 if (!(CONST_INT_P (x)
30801 || GET_CODE (x) == CONST_DOUBLE
30802 || GET_CODE (x) == CONST_FIXED))
30803 n_var++, one_var = i;
30804 else if (x != CONST0_RTX (inner_mode))
30805 all_const_zero = false;
30806 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30807 all_same = false;
30808 }
30809
30810 /* Constants are best loaded from the constant pool. */
30811 if (n_var == 0)
30812 {
30813 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30814 return;
30815 }
30816
30817 /* If all values are identical, broadcast the value. */
30818 if (all_same
30819 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30820 XVECEXP (vals, 0, 0)))
30821 return;
30822
30823 /* Values where only one field is non-constant are best loaded from
30824 the pool and overwritten via move later. */
30825 if (n_var == 1)
30826 {
30827 if (all_const_zero
30828 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30829 XVECEXP (vals, 0, one_var),
30830 one_var))
30831 return;
30832
30833 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30834 return;
30835 }
30836
30837 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30838 }
30839
30840 void
30841 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30842 {
30843 enum machine_mode mode = GET_MODE (target);
30844 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30845 enum machine_mode half_mode;
30846 bool use_vec_merge = false;
30847 rtx tmp;
30848 static rtx (*gen_extract[6][2]) (rtx, rtx)
30849 = {
30850 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30851 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30852 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30853 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30854 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30855 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30856 };
30857 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30858 = {
30859 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30860 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30861 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30862 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30863 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30864 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30865 };
30866 int i, j, n;
30867
30868 switch (mode)
30869 {
30870 case V2SFmode:
30871 case V2SImode:
30872 if (mmx_ok)
30873 {
30874 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30875 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30876 if (elt == 0)
30877 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30878 else
30879 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30880 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30881 return;
30882 }
30883 break;
30884
30885 case V2DImode:
30886 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
30887 if (use_vec_merge)
30888 break;
30889
30890 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30891 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
30892 if (elt == 0)
30893 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30894 else
30895 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30896 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30897 return;
30898
30899 case V2DFmode:
30900 {
30901 rtx op0, op1;
30902
30903 /* For the two element vectors, we implement a VEC_CONCAT with
30904 the extraction of the other element. */
30905
30906 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30907 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30908
30909 if (elt == 0)
30910 op0 = val, op1 = tmp;
30911 else
30912 op0 = tmp, op1 = val;
30913
30914 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30915 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30916 }
30917 return;
30918
30919 case V4SFmode:
30920 use_vec_merge = TARGET_SSE4_1;
30921 if (use_vec_merge)
30922 break;
30923
30924 switch (elt)
30925 {
30926 case 0:
30927 use_vec_merge = true;
30928 break;
30929
30930 case 1:
30931 /* tmp = target = A B C D */
30932 tmp = copy_to_reg (target);
30933 /* target = A A B B */
30934 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
30935 /* target = X A B B */
30936 ix86_expand_vector_set (false, target, val, 0);
30937 /* target = A X C D */
30938 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30939 const1_rtx, const0_rtx,
30940 GEN_INT (2+4), GEN_INT (3+4)));
30941 return;
30942
30943 case 2:
30944 /* tmp = target = A B C D */
30945 tmp = copy_to_reg (target);
30946 /* tmp = X B C D */
30947 ix86_expand_vector_set (false, tmp, val, 0);
30948 /* target = A B X D */
30949 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30950 const0_rtx, const1_rtx,
30951 GEN_INT (0+4), GEN_INT (3+4)));
30952 return;
30953
30954 case 3:
30955 /* tmp = target = A B C D */
30956 tmp = copy_to_reg (target);
30957 /* tmp = X B C D */
30958 ix86_expand_vector_set (false, tmp, val, 0);
30959 /* target = A B X D */
30960 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30961 const0_rtx, const1_rtx,
30962 GEN_INT (2+4), GEN_INT (0+4)));
30963 return;
30964
30965 default:
30966 gcc_unreachable ();
30967 }
30968 break;
30969
30970 case V4SImode:
30971 use_vec_merge = TARGET_SSE4_1;
30972 if (use_vec_merge)
30973 break;
30974
30975 /* Element 0 handled by vec_merge below. */
30976 if (elt == 0)
30977 {
30978 use_vec_merge = true;
30979 break;
30980 }
30981
30982 if (TARGET_SSE2)
30983 {
30984 /* With SSE2, use integer shuffles to swap element 0 and ELT,
30985 store into element 0, then shuffle them back. */
30986
30987 rtx order[4];
30988
30989 order[0] = GEN_INT (elt);
30990 order[1] = const1_rtx;
30991 order[2] = const2_rtx;
30992 order[3] = GEN_INT (3);
30993 order[elt] = const0_rtx;
30994
30995 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30996 order[1], order[2], order[3]));
30997
30998 ix86_expand_vector_set (false, target, val, 0);
30999
31000 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31001 order[1], order[2], order[3]));
31002 }
31003 else
31004 {
31005 /* For SSE1, we have to reuse the V4SF code. */
31006 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31007 gen_lowpart (SFmode, val), elt);
31008 }
31009 return;
31010
31011 case V8HImode:
31012 use_vec_merge = TARGET_SSE2;
31013 break;
31014 case V4HImode:
31015 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31016 break;
31017
31018 case V16QImode:
31019 use_vec_merge = TARGET_SSE4_1;
31020 break;
31021
31022 case V8QImode:
31023 break;
31024
31025 case V32QImode:
31026 half_mode = V16QImode;
31027 j = 0;
31028 n = 16;
31029 goto half;
31030
31031 case V16HImode:
31032 half_mode = V8HImode;
31033 j = 1;
31034 n = 8;
31035 goto half;
31036
31037 case V8SImode:
31038 half_mode = V4SImode;
31039 j = 2;
31040 n = 4;
31041 goto half;
31042
31043 case V4DImode:
31044 half_mode = V2DImode;
31045 j = 3;
31046 n = 2;
31047 goto half;
31048
31049 case V8SFmode:
31050 half_mode = V4SFmode;
31051 j = 4;
31052 n = 4;
31053 goto half;
31054
31055 case V4DFmode:
31056 half_mode = V2DFmode;
31057 j = 5;
31058 n = 2;
31059 goto half;
31060
31061 half:
31062 /* Compute offset. */
31063 i = elt / n;
31064 elt %= n;
31065
31066 gcc_assert (i <= 1);
31067
31068 /* Extract the half. */
31069 tmp = gen_reg_rtx (half_mode);
31070 emit_insn (gen_extract[j][i] (tmp, target));
31071
31072 /* Put val in tmp at elt. */
31073 ix86_expand_vector_set (false, tmp, val, elt);
31074
31075 /* Put it back. */
31076 emit_insn (gen_insert[j][i] (target, target, tmp));
31077 return;
31078
31079 default:
31080 break;
31081 }
31082
31083 if (use_vec_merge)
31084 {
31085 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31086 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31087 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31088 }
31089 else
31090 {
31091 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31092
31093 emit_move_insn (mem, target);
31094
31095 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31096 emit_move_insn (tmp, val);
31097
31098 emit_move_insn (target, mem);
31099 }
31100 }
31101
31102 void
31103 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31104 {
31105 enum machine_mode mode = GET_MODE (vec);
31106 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31107 bool use_vec_extr = false;
31108 rtx tmp;
31109
31110 switch (mode)
31111 {
31112 case V2SImode:
31113 case V2SFmode:
31114 if (!mmx_ok)
31115 break;
31116 /* FALLTHRU */
31117
31118 case V2DFmode:
31119 case V2DImode:
31120 use_vec_extr = true;
31121 break;
31122
31123 case V4SFmode:
31124 use_vec_extr = TARGET_SSE4_1;
31125 if (use_vec_extr)
31126 break;
31127
31128 switch (elt)
31129 {
31130 case 0:
31131 tmp = vec;
31132 break;
31133
31134 case 1:
31135 case 3:
31136 tmp = gen_reg_rtx (mode);
31137 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31138 GEN_INT (elt), GEN_INT (elt),
31139 GEN_INT (elt+4), GEN_INT (elt+4)));
31140 break;
31141
31142 case 2:
31143 tmp = gen_reg_rtx (mode);
31144 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31145 break;
31146
31147 default:
31148 gcc_unreachable ();
31149 }
31150 vec = tmp;
31151 use_vec_extr = true;
31152 elt = 0;
31153 break;
31154
31155 case V4SImode:
31156 use_vec_extr = TARGET_SSE4_1;
31157 if (use_vec_extr)
31158 break;
31159
31160 if (TARGET_SSE2)
31161 {
31162 switch (elt)
31163 {
31164 case 0:
31165 tmp = vec;
31166 break;
31167
31168 case 1:
31169 case 3:
31170 tmp = gen_reg_rtx (mode);
31171 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31172 GEN_INT (elt), GEN_INT (elt),
31173 GEN_INT (elt), GEN_INT (elt)));
31174 break;
31175
31176 case 2:
31177 tmp = gen_reg_rtx (mode);
31178 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31179 break;
31180
31181 default:
31182 gcc_unreachable ();
31183 }
31184 vec = tmp;
31185 use_vec_extr = true;
31186 elt = 0;
31187 }
31188 else
31189 {
31190 /* For SSE1, we have to reuse the V4SF code. */
31191 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31192 gen_lowpart (V4SFmode, vec), elt);
31193 return;
31194 }
31195 break;
31196
31197 case V8HImode:
31198 use_vec_extr = TARGET_SSE2;
31199 break;
31200 case V4HImode:
31201 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31202 break;
31203
31204 case V16QImode:
31205 use_vec_extr = TARGET_SSE4_1;
31206 break;
31207
31208 case V8QImode:
31209 /* ??? Could extract the appropriate HImode element and shift. */
31210 default:
31211 break;
31212 }
31213
31214 if (use_vec_extr)
31215 {
31216 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31217 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31218
31219 /* Let the rtl optimizers know about the zero extension performed. */
31220 if (inner_mode == QImode || inner_mode == HImode)
31221 {
31222 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31223 target = gen_lowpart (SImode, target);
31224 }
31225
31226 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31227 }
31228 else
31229 {
31230 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31231
31232 emit_move_insn (mem, vec);
31233
31234 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31235 emit_move_insn (target, tmp);
31236 }
31237 }
31238
31239 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31240 pattern to reduce; DEST is the destination; IN is the input vector. */
31241
31242 void
31243 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31244 {
31245 rtx tmp1, tmp2, tmp3;
31246
31247 tmp1 = gen_reg_rtx (V4SFmode);
31248 tmp2 = gen_reg_rtx (V4SFmode);
31249 tmp3 = gen_reg_rtx (V4SFmode);
31250
31251 emit_insn (gen_sse_movhlps (tmp1, in, in));
31252 emit_insn (fn (tmp2, tmp1, in));
31253
31254 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31255 const1_rtx, const1_rtx,
31256 GEN_INT (1+4), GEN_INT (1+4)));
31257 emit_insn (fn (dest, tmp2, tmp3));
31258 }
31259 \f
31260 /* Target hook for scalar_mode_supported_p. */
31261 static bool
31262 ix86_scalar_mode_supported_p (enum machine_mode mode)
31263 {
31264 if (DECIMAL_FLOAT_MODE_P (mode))
31265 return default_decimal_float_supported_p ();
31266 else if (mode == TFmode)
31267 return true;
31268 else
31269 return default_scalar_mode_supported_p (mode);
31270 }
31271
31272 /* Implements target hook vector_mode_supported_p. */
31273 static bool
31274 ix86_vector_mode_supported_p (enum machine_mode mode)
31275 {
31276 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31277 return true;
31278 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31279 return true;
31280 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31281 return true;
31282 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31283 return true;
31284 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31285 return true;
31286 return false;
31287 }
31288
31289 /* Target hook for c_mode_for_suffix. */
31290 static enum machine_mode
31291 ix86_c_mode_for_suffix (char suffix)
31292 {
31293 if (suffix == 'q')
31294 return TFmode;
31295 if (suffix == 'w')
31296 return XFmode;
31297
31298 return VOIDmode;
31299 }
31300
31301 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31302
31303 We do this in the new i386 backend to maintain source compatibility
31304 with the old cc0-based compiler. */
31305
31306 static tree
31307 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31308 tree inputs ATTRIBUTE_UNUSED,
31309 tree clobbers)
31310 {
31311 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31312 clobbers);
31313 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31314 clobbers);
31315 return clobbers;
31316 }
31317
31318 /* Implements target vector targetm.asm.encode_section_info. This
31319 is not used by netware. */
31320
31321 static void ATTRIBUTE_UNUSED
31322 ix86_encode_section_info (tree decl, rtx rtl, int first)
31323 {
31324 default_encode_section_info (decl, rtl, first);
31325
31326 if (TREE_CODE (decl) == VAR_DECL
31327 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31328 && ix86_in_large_data_p (decl))
31329 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31330 }
31331
31332 /* Worker function for REVERSE_CONDITION. */
31333
31334 enum rtx_code
31335 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31336 {
31337 return (mode != CCFPmode && mode != CCFPUmode
31338 ? reverse_condition (code)
31339 : reverse_condition_maybe_unordered (code));
31340 }
31341
31342 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31343 to OPERANDS[0]. */
31344
31345 const char *
31346 output_387_reg_move (rtx insn, rtx *operands)
31347 {
31348 if (REG_P (operands[0]))
31349 {
31350 if (REG_P (operands[1])
31351 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31352 {
31353 if (REGNO (operands[0]) == FIRST_STACK_REG)
31354 return output_387_ffreep (operands, 0);
31355 return "fstp\t%y0";
31356 }
31357 if (STACK_TOP_P (operands[0]))
31358 return "fld%Z1\t%y1";
31359 return "fst\t%y0";
31360 }
31361 else if (MEM_P (operands[0]))
31362 {
31363 gcc_assert (REG_P (operands[1]));
31364 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31365 return "fstp%Z0\t%y0";
31366 else
31367 {
31368 /* There is no non-popping store to memory for XFmode.
31369 So if we need one, follow the store with a load. */
31370 if (GET_MODE (operands[0]) == XFmode)
31371 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31372 else
31373 return "fst%Z0\t%y0";
31374 }
31375 }
31376 else
31377 gcc_unreachable();
31378 }
31379
31380 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31381 FP status register is set. */
31382
31383 void
31384 ix86_emit_fp_unordered_jump (rtx label)
31385 {
31386 rtx reg = gen_reg_rtx (HImode);
31387 rtx temp;
31388
31389 emit_insn (gen_x86_fnstsw_1 (reg));
31390
31391 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31392 {
31393 emit_insn (gen_x86_sahf_1 (reg));
31394
31395 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31396 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31397 }
31398 else
31399 {
31400 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31401
31402 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31403 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31404 }
31405
31406 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31407 gen_rtx_LABEL_REF (VOIDmode, label),
31408 pc_rtx);
31409 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31410
31411 emit_jump_insn (temp);
31412 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31413 }
31414
31415 /* Output code to perform a log1p XFmode calculation. */
31416
31417 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31418 {
31419 rtx label1 = gen_label_rtx ();
31420 rtx label2 = gen_label_rtx ();
31421
31422 rtx tmp = gen_reg_rtx (XFmode);
31423 rtx tmp2 = gen_reg_rtx (XFmode);
31424 rtx test;
31425
31426 emit_insn (gen_absxf2 (tmp, op1));
31427 test = gen_rtx_GE (VOIDmode, tmp,
31428 CONST_DOUBLE_FROM_REAL_VALUE (
31429 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31430 XFmode));
31431 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31432
31433 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31434 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31435 emit_jump (label2);
31436
31437 emit_label (label1);
31438 emit_move_insn (tmp, CONST1_RTX (XFmode));
31439 emit_insn (gen_addxf3 (tmp, op1, tmp));
31440 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31441 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31442
31443 emit_label (label2);
31444 }
31445
31446 /* Output code to perform a Newton-Rhapson approximation of a single precision
31447 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31448
31449 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31450 {
31451 rtx x0, x1, e0, e1;
31452
31453 x0 = gen_reg_rtx (mode);
31454 e0 = gen_reg_rtx (mode);
31455 e1 = gen_reg_rtx (mode);
31456 x1 = gen_reg_rtx (mode);
31457
31458 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31459
31460 /* x0 = rcp(b) estimate */
31461 emit_insn (gen_rtx_SET (VOIDmode, x0,
31462 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31463 UNSPEC_RCP)));
31464 /* e0 = x0 * b */
31465 emit_insn (gen_rtx_SET (VOIDmode, e0,
31466 gen_rtx_MULT (mode, x0, b)));
31467
31468 /* e0 = x0 * e0 */
31469 emit_insn (gen_rtx_SET (VOIDmode, e0,
31470 gen_rtx_MULT (mode, x0, e0)));
31471
31472 /* e1 = x0 + x0 */
31473 emit_insn (gen_rtx_SET (VOIDmode, e1,
31474 gen_rtx_PLUS (mode, x0, x0)));
31475
31476 /* x1 = e1 - e0 */
31477 emit_insn (gen_rtx_SET (VOIDmode, x1,
31478 gen_rtx_MINUS (mode, e1, e0)));
31479
31480 /* res = a * x1 */
31481 emit_insn (gen_rtx_SET (VOIDmode, res,
31482 gen_rtx_MULT (mode, a, x1)));
31483 }
31484
31485 /* Output code to perform a Newton-Rhapson approximation of a
31486 single precision floating point [reciprocal] square root. */
31487
31488 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31489 bool recip)
31490 {
31491 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31492 REAL_VALUE_TYPE r;
31493
31494 x0 = gen_reg_rtx (mode);
31495 e0 = gen_reg_rtx (mode);
31496 e1 = gen_reg_rtx (mode);
31497 e2 = gen_reg_rtx (mode);
31498 e3 = gen_reg_rtx (mode);
31499
31500 real_from_integer (&r, VOIDmode, -3, -1, 0);
31501 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31502
31503 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31504 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31505
31506 if (VECTOR_MODE_P (mode))
31507 {
31508 mthree = ix86_build_const_vector (mode, true, mthree);
31509 mhalf = ix86_build_const_vector (mode, true, mhalf);
31510 }
31511
31512 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31513 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31514
31515 /* x0 = rsqrt(a) estimate */
31516 emit_insn (gen_rtx_SET (VOIDmode, x0,
31517 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31518 UNSPEC_RSQRT)));
31519
31520 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31521 if (!recip)
31522 {
31523 rtx zero, mask;
31524
31525 zero = gen_reg_rtx (mode);
31526 mask = gen_reg_rtx (mode);
31527
31528 zero = force_reg (mode, CONST0_RTX(mode));
31529 emit_insn (gen_rtx_SET (VOIDmode, mask,
31530 gen_rtx_NE (mode, zero, a)));
31531
31532 emit_insn (gen_rtx_SET (VOIDmode, x0,
31533 gen_rtx_AND (mode, x0, mask)));
31534 }
31535
31536 /* e0 = x0 * a */
31537 emit_insn (gen_rtx_SET (VOIDmode, e0,
31538 gen_rtx_MULT (mode, x0, a)));
31539 /* e1 = e0 * x0 */
31540 emit_insn (gen_rtx_SET (VOIDmode, e1,
31541 gen_rtx_MULT (mode, e0, x0)));
31542
31543 /* e2 = e1 - 3. */
31544 mthree = force_reg (mode, mthree);
31545 emit_insn (gen_rtx_SET (VOIDmode, e2,
31546 gen_rtx_PLUS (mode, e1, mthree)));
31547
31548 mhalf = force_reg (mode, mhalf);
31549 if (recip)
31550 /* e3 = -.5 * x0 */
31551 emit_insn (gen_rtx_SET (VOIDmode, e3,
31552 gen_rtx_MULT (mode, x0, mhalf)));
31553 else
31554 /* e3 = -.5 * e0 */
31555 emit_insn (gen_rtx_SET (VOIDmode, e3,
31556 gen_rtx_MULT (mode, e0, mhalf)));
31557 /* ret = e2 * e3 */
31558 emit_insn (gen_rtx_SET (VOIDmode, res,
31559 gen_rtx_MULT (mode, e2, e3)));
31560 }
31561
31562 #ifdef TARGET_SOLARIS
31563 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31564
31565 static void
31566 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31567 tree decl)
31568 {
31569 /* With Binutils 2.15, the "@unwind" marker must be specified on
31570 every occurrence of the ".eh_frame" section, not just the first
31571 one. */
31572 if (TARGET_64BIT
31573 && strcmp (name, ".eh_frame") == 0)
31574 {
31575 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31576 flags & SECTION_WRITE ? "aw" : "a");
31577 return;
31578 }
31579
31580 #ifndef USE_GAS
31581 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
31582 {
31583 solaris_elf_asm_comdat_section (name, flags, decl);
31584 return;
31585 }
31586 #endif
31587
31588 default_elf_asm_named_section (name, flags, decl);
31589 }
31590 #endif /* TARGET_SOLARIS */
31591
31592 /* Return the mangling of TYPE if it is an extended fundamental type. */
31593
31594 static const char *
31595 ix86_mangle_type (const_tree type)
31596 {
31597 type = TYPE_MAIN_VARIANT (type);
31598
31599 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31600 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31601 return NULL;
31602
31603 switch (TYPE_MODE (type))
31604 {
31605 case TFmode:
31606 /* __float128 is "g". */
31607 return "g";
31608 case XFmode:
31609 /* "long double" or __float80 is "e". */
31610 return "e";
31611 default:
31612 return NULL;
31613 }
31614 }
31615
31616 /* For 32-bit code we can save PIC register setup by using
31617 __stack_chk_fail_local hidden function instead of calling
31618 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31619 register, so it is better to call __stack_chk_fail directly. */
31620
31621 static tree ATTRIBUTE_UNUSED
31622 ix86_stack_protect_fail (void)
31623 {
31624 return TARGET_64BIT
31625 ? default_external_stack_protect_fail ()
31626 : default_hidden_stack_protect_fail ();
31627 }
31628
31629 /* Select a format to encode pointers in exception handling data. CODE
31630 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31631 true if the symbol may be affected by dynamic relocations.
31632
31633 ??? All x86 object file formats are capable of representing this.
31634 After all, the relocation needed is the same as for the call insn.
31635 Whether or not a particular assembler allows us to enter such, I
31636 guess we'll have to see. */
31637 int
31638 asm_preferred_eh_data_format (int code, int global)
31639 {
31640 if (flag_pic)
31641 {
31642 int type = DW_EH_PE_sdata8;
31643 if (!TARGET_64BIT
31644 || ix86_cmodel == CM_SMALL_PIC
31645 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31646 type = DW_EH_PE_sdata4;
31647 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31648 }
31649 if (ix86_cmodel == CM_SMALL
31650 || (ix86_cmodel == CM_MEDIUM && code))
31651 return DW_EH_PE_udata4;
31652 return DW_EH_PE_absptr;
31653 }
31654 \f
31655 /* Expand copysign from SIGN to the positive value ABS_VALUE
31656 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31657 the sign-bit. */
31658 static void
31659 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31660 {
31661 enum machine_mode mode = GET_MODE (sign);
31662 rtx sgn = gen_reg_rtx (mode);
31663 if (mask == NULL_RTX)
31664 {
31665 enum machine_mode vmode;
31666
31667 if (mode == SFmode)
31668 vmode = V4SFmode;
31669 else if (mode == DFmode)
31670 vmode = V2DFmode;
31671 else
31672 vmode = mode;
31673
31674 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31675 if (!VECTOR_MODE_P (mode))
31676 {
31677 /* We need to generate a scalar mode mask in this case. */
31678 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31679 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31680 mask = gen_reg_rtx (mode);
31681 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31682 }
31683 }
31684 else
31685 mask = gen_rtx_NOT (mode, mask);
31686 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31687 gen_rtx_AND (mode, mask, sign)));
31688 emit_insn (gen_rtx_SET (VOIDmode, result,
31689 gen_rtx_IOR (mode, abs_value, sgn)));
31690 }
31691
31692 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31693 mask for masking out the sign-bit is stored in *SMASK, if that is
31694 non-null. */
31695 static rtx
31696 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31697 {
31698 enum machine_mode vmode, mode = GET_MODE (op0);
31699 rtx xa, mask;
31700
31701 xa = gen_reg_rtx (mode);
31702 if (mode == SFmode)
31703 vmode = V4SFmode;
31704 else if (mode == DFmode)
31705 vmode = V2DFmode;
31706 else
31707 vmode = mode;
31708 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31709 if (!VECTOR_MODE_P (mode))
31710 {
31711 /* We need to generate a scalar mode mask in this case. */
31712 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31713 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31714 mask = gen_reg_rtx (mode);
31715 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31716 }
31717 emit_insn (gen_rtx_SET (VOIDmode, xa,
31718 gen_rtx_AND (mode, op0, mask)));
31719
31720 if (smask)
31721 *smask = mask;
31722
31723 return xa;
31724 }
31725
31726 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31727 swapping the operands if SWAP_OPERANDS is true. The expanded
31728 code is a forward jump to a newly created label in case the
31729 comparison is true. The generated label rtx is returned. */
31730 static rtx
31731 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31732 bool swap_operands)
31733 {
31734 rtx label, tmp;
31735
31736 if (swap_operands)
31737 {
31738 tmp = op0;
31739 op0 = op1;
31740 op1 = tmp;
31741 }
31742
31743 label = gen_label_rtx ();
31744 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31745 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31746 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31747 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31748 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31749 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31750 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31751 JUMP_LABEL (tmp) = label;
31752
31753 return label;
31754 }
31755
31756 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31757 using comparison code CODE. Operands are swapped for the comparison if
31758 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31759 static rtx
31760 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31761 bool swap_operands)
31762 {
31763 rtx (*insn)(rtx, rtx, rtx, rtx);
31764 enum machine_mode mode = GET_MODE (op0);
31765 rtx mask = gen_reg_rtx (mode);
31766
31767 if (swap_operands)
31768 {
31769 rtx tmp = op0;
31770 op0 = op1;
31771 op1 = tmp;
31772 }
31773
31774 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
31775
31776 emit_insn (insn (mask, op0, op1,
31777 gen_rtx_fmt_ee (code, mode, op0, op1)));
31778 return mask;
31779 }
31780
31781 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31782 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31783 static rtx
31784 ix86_gen_TWO52 (enum machine_mode mode)
31785 {
31786 REAL_VALUE_TYPE TWO52r;
31787 rtx TWO52;
31788
31789 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31790 TWO52 = const_double_from_real_value (TWO52r, mode);
31791 TWO52 = force_reg (mode, TWO52);
31792
31793 return TWO52;
31794 }
31795
31796 /* Expand SSE sequence for computing lround from OP1 storing
31797 into OP0. */
31798 void
31799 ix86_expand_lround (rtx op0, rtx op1)
31800 {
31801 /* C code for the stuff we're doing below:
31802 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31803 return (long)tmp;
31804 */
31805 enum machine_mode mode = GET_MODE (op1);
31806 const struct real_format *fmt;
31807 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31808 rtx adj;
31809
31810 /* load nextafter (0.5, 0.0) */
31811 fmt = REAL_MODE_FORMAT (mode);
31812 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31813 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31814
31815 /* adj = copysign (0.5, op1) */
31816 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31817 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31818
31819 /* adj = op1 + adj */
31820 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31821
31822 /* op0 = (imode)adj */
31823 expand_fix (op0, adj, 0);
31824 }
31825
31826 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31827 into OPERAND0. */
31828 void
31829 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31830 {
31831 /* C code for the stuff we're doing below (for do_floor):
31832 xi = (long)op1;
31833 xi -= (double)xi > op1 ? 1 : 0;
31834 return xi;
31835 */
31836 enum machine_mode fmode = GET_MODE (op1);
31837 enum machine_mode imode = GET_MODE (op0);
31838 rtx ireg, freg, label, tmp;
31839
31840 /* reg = (long)op1 */
31841 ireg = gen_reg_rtx (imode);
31842 expand_fix (ireg, op1, 0);
31843
31844 /* freg = (double)reg */
31845 freg = gen_reg_rtx (fmode);
31846 expand_float (freg, ireg, 0);
31847
31848 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31849 label = ix86_expand_sse_compare_and_jump (UNLE,
31850 freg, op1, !do_floor);
31851 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31852 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31853 emit_move_insn (ireg, tmp);
31854
31855 emit_label (label);
31856 LABEL_NUSES (label) = 1;
31857
31858 emit_move_insn (op0, ireg);
31859 }
31860
31861 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31862 result in OPERAND0. */
31863 void
31864 ix86_expand_rint (rtx operand0, rtx operand1)
31865 {
31866 /* C code for the stuff we're doing below:
31867 xa = fabs (operand1);
31868 if (!isless (xa, 2**52))
31869 return operand1;
31870 xa = xa + 2**52 - 2**52;
31871 return copysign (xa, operand1);
31872 */
31873 enum machine_mode mode = GET_MODE (operand0);
31874 rtx res, xa, label, TWO52, mask;
31875
31876 res = gen_reg_rtx (mode);
31877 emit_move_insn (res, operand1);
31878
31879 /* xa = abs (operand1) */
31880 xa = ix86_expand_sse_fabs (res, &mask);
31881
31882 /* if (!isless (xa, TWO52)) goto label; */
31883 TWO52 = ix86_gen_TWO52 (mode);
31884 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31885
31886 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31887 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31888
31889 ix86_sse_copysign_to_positive (res, xa, res, mask);
31890
31891 emit_label (label);
31892 LABEL_NUSES (label) = 1;
31893
31894 emit_move_insn (operand0, res);
31895 }
31896
31897 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31898 into OPERAND0. */
31899 void
31900 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31901 {
31902 /* C code for the stuff we expand below.
31903 double xa = fabs (x), x2;
31904 if (!isless (xa, TWO52))
31905 return x;
31906 xa = xa + TWO52 - TWO52;
31907 x2 = copysign (xa, x);
31908 Compensate. Floor:
31909 if (x2 > x)
31910 x2 -= 1;
31911 Compensate. Ceil:
31912 if (x2 < x)
31913 x2 -= -1;
31914 return x2;
31915 */
31916 enum machine_mode mode = GET_MODE (operand0);
31917 rtx xa, TWO52, tmp, label, one, res, mask;
31918
31919 TWO52 = ix86_gen_TWO52 (mode);
31920
31921 /* Temporary for holding the result, initialized to the input
31922 operand to ease control flow. */
31923 res = gen_reg_rtx (mode);
31924 emit_move_insn (res, operand1);
31925
31926 /* xa = abs (operand1) */
31927 xa = ix86_expand_sse_fabs (res, &mask);
31928
31929 /* if (!isless (xa, TWO52)) goto label; */
31930 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31931
31932 /* xa = xa + TWO52 - TWO52; */
31933 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31934 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31935
31936 /* xa = copysign (xa, operand1) */
31937 ix86_sse_copysign_to_positive (xa, xa, res, mask);
31938
31939 /* generate 1.0 or -1.0 */
31940 one = force_reg (mode,
31941 const_double_from_real_value (do_floor
31942 ? dconst1 : dconstm1, mode));
31943
31944 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31945 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31946 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31947 gen_rtx_AND (mode, one, tmp)));
31948 /* We always need to subtract here to preserve signed zero. */
31949 tmp = expand_simple_binop (mode, MINUS,
31950 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31951 emit_move_insn (res, tmp);
31952
31953 emit_label (label);
31954 LABEL_NUSES (label) = 1;
31955
31956 emit_move_insn (operand0, res);
31957 }
31958
31959 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31960 into OPERAND0. */
31961 void
31962 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
31963 {
31964 /* C code for the stuff we expand below.
31965 double xa = fabs (x), x2;
31966 if (!isless (xa, TWO52))
31967 return x;
31968 x2 = (double)(long)x;
31969 Compensate. Floor:
31970 if (x2 > x)
31971 x2 -= 1;
31972 Compensate. Ceil:
31973 if (x2 < x)
31974 x2 += 1;
31975 if (HONOR_SIGNED_ZEROS (mode))
31976 return copysign (x2, x);
31977 return x2;
31978 */
31979 enum machine_mode mode = GET_MODE (operand0);
31980 rtx xa, xi, TWO52, tmp, label, one, res, mask;
31981
31982 TWO52 = ix86_gen_TWO52 (mode);
31983
31984 /* Temporary for holding the result, initialized to the input
31985 operand to ease control flow. */
31986 res = gen_reg_rtx (mode);
31987 emit_move_insn (res, operand1);
31988
31989 /* xa = abs (operand1) */
31990 xa = ix86_expand_sse_fabs (res, &mask);
31991
31992 /* if (!isless (xa, TWO52)) goto label; */
31993 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31994
31995 /* xa = (double)(long)x */
31996 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31997 expand_fix (xi, res, 0);
31998 expand_float (xa, xi, 0);
31999
32000 /* generate 1.0 */
32001 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32002
32003 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32004 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32005 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32006 gen_rtx_AND (mode, one, tmp)));
32007 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32008 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32009 emit_move_insn (res, tmp);
32010
32011 if (HONOR_SIGNED_ZEROS (mode))
32012 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32013
32014 emit_label (label);
32015 LABEL_NUSES (label) = 1;
32016
32017 emit_move_insn (operand0, res);
32018 }
32019
32020 /* Expand SSE sequence for computing round from OPERAND1 storing
32021 into OPERAND0. Sequence that works without relying on DImode truncation
32022 via cvttsd2siq that is only available on 64bit targets. */
32023 void
32024 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32025 {
32026 /* C code for the stuff we expand below.
32027 double xa = fabs (x), xa2, x2;
32028 if (!isless (xa, TWO52))
32029 return x;
32030 Using the absolute value and copying back sign makes
32031 -0.0 -> -0.0 correct.
32032 xa2 = xa + TWO52 - TWO52;
32033 Compensate.
32034 dxa = xa2 - xa;
32035 if (dxa <= -0.5)
32036 xa2 += 1;
32037 else if (dxa > 0.5)
32038 xa2 -= 1;
32039 x2 = copysign (xa2, x);
32040 return x2;
32041 */
32042 enum machine_mode mode = GET_MODE (operand0);
32043 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32044
32045 TWO52 = ix86_gen_TWO52 (mode);
32046
32047 /* Temporary for holding the result, initialized to the input
32048 operand to ease control flow. */
32049 res = gen_reg_rtx (mode);
32050 emit_move_insn (res, operand1);
32051
32052 /* xa = abs (operand1) */
32053 xa = ix86_expand_sse_fabs (res, &mask);
32054
32055 /* if (!isless (xa, TWO52)) goto label; */
32056 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32057
32058 /* xa2 = xa + TWO52 - TWO52; */
32059 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32060 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32061
32062 /* dxa = xa2 - xa; */
32063 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32064
32065 /* generate 0.5, 1.0 and -0.5 */
32066 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32067 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32068 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32069 0, OPTAB_DIRECT);
32070
32071 /* Compensate. */
32072 tmp = gen_reg_rtx (mode);
32073 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32074 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32075 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32076 gen_rtx_AND (mode, one, tmp)));
32077 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32078 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32079 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32080 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32081 gen_rtx_AND (mode, one, tmp)));
32082 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32083
32084 /* res = copysign (xa2, operand1) */
32085 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32086
32087 emit_label (label);
32088 LABEL_NUSES (label) = 1;
32089
32090 emit_move_insn (operand0, res);
32091 }
32092
32093 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32094 into OPERAND0. */
32095 void
32096 ix86_expand_trunc (rtx operand0, rtx operand1)
32097 {
32098 /* C code for SSE variant we expand below.
32099 double xa = fabs (x), x2;
32100 if (!isless (xa, TWO52))
32101 return x;
32102 x2 = (double)(long)x;
32103 if (HONOR_SIGNED_ZEROS (mode))
32104 return copysign (x2, x);
32105 return x2;
32106 */
32107 enum machine_mode mode = GET_MODE (operand0);
32108 rtx xa, xi, TWO52, label, res, mask;
32109
32110 TWO52 = ix86_gen_TWO52 (mode);
32111
32112 /* Temporary for holding the result, initialized to the input
32113 operand to ease control flow. */
32114 res = gen_reg_rtx (mode);
32115 emit_move_insn (res, operand1);
32116
32117 /* xa = abs (operand1) */
32118 xa = ix86_expand_sse_fabs (res, &mask);
32119
32120 /* if (!isless (xa, TWO52)) goto label; */
32121 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32122
32123 /* x = (double)(long)x */
32124 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32125 expand_fix (xi, res, 0);
32126 expand_float (res, xi, 0);
32127
32128 if (HONOR_SIGNED_ZEROS (mode))
32129 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32130
32131 emit_label (label);
32132 LABEL_NUSES (label) = 1;
32133
32134 emit_move_insn (operand0, res);
32135 }
32136
32137 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32138 into OPERAND0. */
32139 void
32140 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32141 {
32142 enum machine_mode mode = GET_MODE (operand0);
32143 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32144
32145 /* C code for SSE variant we expand below.
32146 double xa = fabs (x), x2;
32147 if (!isless (xa, TWO52))
32148 return x;
32149 xa2 = xa + TWO52 - TWO52;
32150 Compensate:
32151 if (xa2 > xa)
32152 xa2 -= 1.0;
32153 x2 = copysign (xa2, x);
32154 return x2;
32155 */
32156
32157 TWO52 = ix86_gen_TWO52 (mode);
32158
32159 /* Temporary for holding the result, initialized to the input
32160 operand to ease control flow. */
32161 res = gen_reg_rtx (mode);
32162 emit_move_insn (res, operand1);
32163
32164 /* xa = abs (operand1) */
32165 xa = ix86_expand_sse_fabs (res, &smask);
32166
32167 /* if (!isless (xa, TWO52)) goto label; */
32168 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32169
32170 /* res = xa + TWO52 - TWO52; */
32171 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32172 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32173 emit_move_insn (res, tmp);
32174
32175 /* generate 1.0 */
32176 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32177
32178 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32179 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32180 emit_insn (gen_rtx_SET (VOIDmode, mask,
32181 gen_rtx_AND (mode, mask, one)));
32182 tmp = expand_simple_binop (mode, MINUS,
32183 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32184 emit_move_insn (res, tmp);
32185
32186 /* res = copysign (res, operand1) */
32187 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32188
32189 emit_label (label);
32190 LABEL_NUSES (label) = 1;
32191
32192 emit_move_insn (operand0, res);
32193 }
32194
32195 /* Expand SSE sequence for computing round from OPERAND1 storing
32196 into OPERAND0. */
32197 void
32198 ix86_expand_round (rtx operand0, rtx operand1)
32199 {
32200 /* C code for the stuff we're doing below:
32201 double xa = fabs (x);
32202 if (!isless (xa, TWO52))
32203 return x;
32204 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32205 return copysign (xa, x);
32206 */
32207 enum machine_mode mode = GET_MODE (operand0);
32208 rtx res, TWO52, xa, label, xi, half, mask;
32209 const struct real_format *fmt;
32210 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32211
32212 /* Temporary for holding the result, initialized to the input
32213 operand to ease control flow. */
32214 res = gen_reg_rtx (mode);
32215 emit_move_insn (res, operand1);
32216
32217 TWO52 = ix86_gen_TWO52 (mode);
32218 xa = ix86_expand_sse_fabs (res, &mask);
32219 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32220
32221 /* load nextafter (0.5, 0.0) */
32222 fmt = REAL_MODE_FORMAT (mode);
32223 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32224 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32225
32226 /* xa = xa + 0.5 */
32227 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32228 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32229
32230 /* xa = (double)(int64_t)xa */
32231 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32232 expand_fix (xi, xa, 0);
32233 expand_float (xa, xi, 0);
32234
32235 /* res = copysign (xa, operand1) */
32236 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32237
32238 emit_label (label);
32239 LABEL_NUSES (label) = 1;
32240
32241 emit_move_insn (operand0, res);
32242 }
32243 \f
32244
32245 /* Table of valid machine attributes. */
32246 static const struct attribute_spec ix86_attribute_table[] =
32247 {
32248 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32249 affects_type_identity } */
32250 /* Stdcall attribute says callee is responsible for popping arguments
32251 if they are not variable. */
32252 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32253 true },
32254 /* Fastcall attribute says callee is responsible for popping arguments
32255 if they are not variable. */
32256 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32257 true },
32258 /* Thiscall attribute says callee is responsible for popping arguments
32259 if they are not variable. */
32260 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32261 true },
32262 /* Cdecl attribute says the callee is a normal C declaration */
32263 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32264 true },
32265 /* Regparm attribute specifies how many integer arguments are to be
32266 passed in registers. */
32267 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32268 true },
32269 /* Sseregparm attribute says we are using x86_64 calling conventions
32270 for FP arguments. */
32271 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32272 true },
32273 /* force_align_arg_pointer says this function realigns the stack at entry. */
32274 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32275 false, true, true, ix86_handle_cconv_attribute, false },
32276 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32277 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32278 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32279 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32280 false },
32281 #endif
32282 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32283 false },
32284 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32285 false },
32286 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32287 SUBTARGET_ATTRIBUTE_TABLE,
32288 #endif
32289 /* ms_abi and sysv_abi calling convention function attributes. */
32290 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32291 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32292 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32293 false },
32294 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32295 ix86_handle_callee_pop_aggregate_return, true },
32296 /* End element. */
32297 { NULL, 0, 0, false, false, false, NULL, false }
32298 };
32299
32300 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32301 static int
32302 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32303 tree vectype ATTRIBUTE_UNUSED,
32304 int misalign ATTRIBUTE_UNUSED)
32305 {
32306 switch (type_of_cost)
32307 {
32308 case scalar_stmt:
32309 return ix86_cost->scalar_stmt_cost;
32310
32311 case scalar_load:
32312 return ix86_cost->scalar_load_cost;
32313
32314 case scalar_store:
32315 return ix86_cost->scalar_store_cost;
32316
32317 case vector_stmt:
32318 return ix86_cost->vec_stmt_cost;
32319
32320 case vector_load:
32321 return ix86_cost->vec_align_load_cost;
32322
32323 case vector_store:
32324 return ix86_cost->vec_store_cost;
32325
32326 case vec_to_scalar:
32327 return ix86_cost->vec_to_scalar_cost;
32328
32329 case scalar_to_vec:
32330 return ix86_cost->scalar_to_vec_cost;
32331
32332 case unaligned_load:
32333 case unaligned_store:
32334 return ix86_cost->vec_unalign_load_cost;
32335
32336 case cond_branch_taken:
32337 return ix86_cost->cond_taken_branch_cost;
32338
32339 case cond_branch_not_taken:
32340 return ix86_cost->cond_not_taken_branch_cost;
32341
32342 case vec_perm:
32343 return 1;
32344
32345 default:
32346 gcc_unreachable ();
32347 }
32348 }
32349
32350
32351 /* Implement targetm.vectorize.builtin_vec_perm. */
32352
32353 static tree
32354 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32355 {
32356 tree itype = TREE_TYPE (vec_type);
32357 bool u = TYPE_UNSIGNED (itype);
32358 enum machine_mode vmode = TYPE_MODE (vec_type);
32359 enum ix86_builtins fcode;
32360 bool ok = TARGET_SSE2;
32361
32362 switch (vmode)
32363 {
32364 case V4DFmode:
32365 ok = TARGET_AVX;
32366 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32367 goto get_di;
32368 case V2DFmode:
32369 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32370 get_di:
32371 itype = ix86_get_builtin_type (IX86_BT_DI);
32372 break;
32373
32374 case V8SFmode:
32375 ok = TARGET_AVX;
32376 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32377 goto get_si;
32378 case V4SFmode:
32379 ok = TARGET_SSE;
32380 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32381 get_si:
32382 itype = ix86_get_builtin_type (IX86_BT_SI);
32383 break;
32384
32385 case V2DImode:
32386 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32387 break;
32388 case V4SImode:
32389 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32390 break;
32391 case V8HImode:
32392 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32393 break;
32394 case V16QImode:
32395 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32396 break;
32397 default:
32398 ok = false;
32399 break;
32400 }
32401
32402 if (!ok)
32403 return NULL_TREE;
32404
32405 *mask_type = itype;
32406 return ix86_builtins[(int) fcode];
32407 }
32408
32409 /* Return a vector mode with twice as many elements as VMODE. */
32410 /* ??? Consider moving this to a table generated by genmodes.c. */
32411
32412 static enum machine_mode
32413 doublesize_vector_mode (enum machine_mode vmode)
32414 {
32415 switch (vmode)
32416 {
32417 case V2SFmode: return V4SFmode;
32418 case V1DImode: return V2DImode;
32419 case V2SImode: return V4SImode;
32420 case V4HImode: return V8HImode;
32421 case V8QImode: return V16QImode;
32422
32423 case V2DFmode: return V4DFmode;
32424 case V4SFmode: return V8SFmode;
32425 case V2DImode: return V4DImode;
32426 case V4SImode: return V8SImode;
32427 case V8HImode: return V16HImode;
32428 case V16QImode: return V32QImode;
32429
32430 case V4DFmode: return V8DFmode;
32431 case V8SFmode: return V16SFmode;
32432 case V4DImode: return V8DImode;
32433 case V8SImode: return V16SImode;
32434 case V16HImode: return V32HImode;
32435 case V32QImode: return V64QImode;
32436
32437 default:
32438 gcc_unreachable ();
32439 }
32440 }
32441
32442 /* Construct (set target (vec_select op0 (parallel perm))) and
32443 return true if that's a valid instruction in the active ISA. */
32444
32445 static bool
32446 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32447 {
32448 rtx rperm[MAX_VECT_LEN], x;
32449 unsigned i;
32450
32451 for (i = 0; i < nelt; ++i)
32452 rperm[i] = GEN_INT (perm[i]);
32453
32454 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32455 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32456 x = gen_rtx_SET (VOIDmode, target, x);
32457
32458 x = emit_insn (x);
32459 if (recog_memoized (x) < 0)
32460 {
32461 remove_insn (x);
32462 return false;
32463 }
32464 return true;
32465 }
32466
32467 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32468
32469 static bool
32470 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32471 const unsigned char *perm, unsigned nelt)
32472 {
32473 enum machine_mode v2mode;
32474 rtx x;
32475
32476 v2mode = doublesize_vector_mode (GET_MODE (op0));
32477 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32478 return expand_vselect (target, x, perm, nelt);
32479 }
32480
32481 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32482 in terms of blendp[sd] / pblendw / pblendvb. */
32483
32484 static bool
32485 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32486 {
32487 enum machine_mode vmode = d->vmode;
32488 unsigned i, mask, nelt = d->nelt;
32489 rtx target, op0, op1, x;
32490
32491 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32492 return false;
32493 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32494 return false;
32495
32496 /* This is a blend, not a permute. Elements must stay in their
32497 respective lanes. */
32498 for (i = 0; i < nelt; ++i)
32499 {
32500 unsigned e = d->perm[i];
32501 if (!(e == i || e == i + nelt))
32502 return false;
32503 }
32504
32505 if (d->testing_p)
32506 return true;
32507
32508 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32509 decision should be extracted elsewhere, so that we only try that
32510 sequence once all budget==3 options have been tried. */
32511
32512 /* For bytes, see if bytes move in pairs so we can use pblendw with
32513 an immediate argument, rather than pblendvb with a vector argument. */
32514 if (vmode == V16QImode)
32515 {
32516 bool pblendw_ok = true;
32517 for (i = 0; i < 16 && pblendw_ok; i += 2)
32518 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32519
32520 if (!pblendw_ok)
32521 {
32522 rtx rperm[16], vperm;
32523
32524 for (i = 0; i < nelt; ++i)
32525 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32526
32527 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32528 vperm = force_reg (V16QImode, vperm);
32529
32530 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32531 return true;
32532 }
32533 }
32534
32535 target = d->target;
32536 op0 = d->op0;
32537 op1 = d->op1;
32538 mask = 0;
32539
32540 switch (vmode)
32541 {
32542 case V4DFmode:
32543 case V8SFmode:
32544 case V2DFmode:
32545 case V4SFmode:
32546 case V8HImode:
32547 for (i = 0; i < nelt; ++i)
32548 mask |= (d->perm[i] >= nelt) << i;
32549 break;
32550
32551 case V2DImode:
32552 for (i = 0; i < 2; ++i)
32553 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32554 goto do_subreg;
32555
32556 case V4SImode:
32557 for (i = 0; i < 4; ++i)
32558 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32559 goto do_subreg;
32560
32561 case V16QImode:
32562 for (i = 0; i < 8; ++i)
32563 mask |= (d->perm[i * 2] >= 16) << i;
32564
32565 do_subreg:
32566 vmode = V8HImode;
32567 target = gen_lowpart (vmode, target);
32568 op0 = gen_lowpart (vmode, op0);
32569 op1 = gen_lowpart (vmode, op1);
32570 break;
32571
32572 default:
32573 gcc_unreachable ();
32574 }
32575
32576 /* This matches five different patterns with the different modes. */
32577 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32578 x = gen_rtx_SET (VOIDmode, target, x);
32579 emit_insn (x);
32580
32581 return true;
32582 }
32583
32584 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32585 in terms of the variable form of vpermilps.
32586
32587 Note that we will have already failed the immediate input vpermilps,
32588 which requires that the high and low part shuffle be identical; the
32589 variable form doesn't require that. */
32590
32591 static bool
32592 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32593 {
32594 rtx rperm[8], vperm;
32595 unsigned i;
32596
32597 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32598 return false;
32599
32600 /* We can only permute within the 128-bit lane. */
32601 for (i = 0; i < 8; ++i)
32602 {
32603 unsigned e = d->perm[i];
32604 if (i < 4 ? e >= 4 : e < 4)
32605 return false;
32606 }
32607
32608 if (d->testing_p)
32609 return true;
32610
32611 for (i = 0; i < 8; ++i)
32612 {
32613 unsigned e = d->perm[i];
32614
32615 /* Within each 128-bit lane, the elements of op0 are numbered
32616 from 0 and the elements of op1 are numbered from 4. */
32617 if (e >= 8 + 4)
32618 e -= 8;
32619 else if (e >= 4)
32620 e -= 4;
32621
32622 rperm[i] = GEN_INT (e);
32623 }
32624
32625 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32626 vperm = force_reg (V8SImode, vperm);
32627 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32628
32629 return true;
32630 }
32631
32632 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32633 in terms of pshufb or vpperm. */
32634
32635 static bool
32636 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32637 {
32638 unsigned i, nelt, eltsz;
32639 rtx rperm[16], vperm, target, op0, op1;
32640
32641 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32642 return false;
32643 if (GET_MODE_SIZE (d->vmode) != 16)
32644 return false;
32645
32646 if (d->testing_p)
32647 return true;
32648
32649 nelt = d->nelt;
32650 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32651
32652 for (i = 0; i < nelt; ++i)
32653 {
32654 unsigned j, e = d->perm[i];
32655 for (j = 0; j < eltsz; ++j)
32656 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32657 }
32658
32659 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32660 vperm = force_reg (V16QImode, vperm);
32661
32662 target = gen_lowpart (V16QImode, d->target);
32663 op0 = gen_lowpart (V16QImode, d->op0);
32664 if (d->op0 == d->op1)
32665 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32666 else
32667 {
32668 op1 = gen_lowpart (V16QImode, d->op1);
32669 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32670 }
32671
32672 return true;
32673 }
32674
32675 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32676 in a single instruction. */
32677
32678 static bool
32679 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32680 {
32681 unsigned i, nelt = d->nelt;
32682 unsigned char perm2[MAX_VECT_LEN];
32683
32684 /* Check plain VEC_SELECT first, because AVX has instructions that could
32685 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32686 input where SEL+CONCAT may not. */
32687 if (d->op0 == d->op1)
32688 {
32689 int mask = nelt - 1;
32690
32691 for (i = 0; i < nelt; i++)
32692 perm2[i] = d->perm[i] & mask;
32693
32694 if (expand_vselect (d->target, d->op0, perm2, nelt))
32695 return true;
32696
32697 /* There are plenty of patterns in sse.md that are written for
32698 SEL+CONCAT and are not replicated for a single op. Perhaps
32699 that should be changed, to avoid the nastiness here. */
32700
32701 /* Recognize interleave style patterns, which means incrementing
32702 every other permutation operand. */
32703 for (i = 0; i < nelt; i += 2)
32704 {
32705 perm2[i] = d->perm[i] & mask;
32706 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32707 }
32708 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32709 return true;
32710
32711 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32712 if (nelt >= 4)
32713 {
32714 for (i = 0; i < nelt; i += 4)
32715 {
32716 perm2[i + 0] = d->perm[i + 0] & mask;
32717 perm2[i + 1] = d->perm[i + 1] & mask;
32718 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32719 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32720 }
32721
32722 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32723 return true;
32724 }
32725 }
32726
32727 /* Finally, try the fully general two operand permute. */
32728 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32729 return true;
32730
32731 /* Recognize interleave style patterns with reversed operands. */
32732 if (d->op0 != d->op1)
32733 {
32734 for (i = 0; i < nelt; ++i)
32735 {
32736 unsigned e = d->perm[i];
32737 if (e >= nelt)
32738 e -= nelt;
32739 else
32740 e += nelt;
32741 perm2[i] = e;
32742 }
32743
32744 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32745 return true;
32746 }
32747
32748 /* Try the SSE4.1 blend variable merge instructions. */
32749 if (expand_vec_perm_blend (d))
32750 return true;
32751
32752 /* Try one of the AVX vpermil variable permutations. */
32753 if (expand_vec_perm_vpermil (d))
32754 return true;
32755
32756 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32757 if (expand_vec_perm_pshufb (d))
32758 return true;
32759
32760 return false;
32761 }
32762
32763 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32764 in terms of a pair of pshuflw + pshufhw instructions. */
32765
32766 static bool
32767 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32768 {
32769 unsigned char perm2[MAX_VECT_LEN];
32770 unsigned i;
32771 bool ok;
32772
32773 if (d->vmode != V8HImode || d->op0 != d->op1)
32774 return false;
32775
32776 /* The two permutations only operate in 64-bit lanes. */
32777 for (i = 0; i < 4; ++i)
32778 if (d->perm[i] >= 4)
32779 return false;
32780 for (i = 4; i < 8; ++i)
32781 if (d->perm[i] < 4)
32782 return false;
32783
32784 if (d->testing_p)
32785 return true;
32786
32787 /* Emit the pshuflw. */
32788 memcpy (perm2, d->perm, 4);
32789 for (i = 4; i < 8; ++i)
32790 perm2[i] = i;
32791 ok = expand_vselect (d->target, d->op0, perm2, 8);
32792 gcc_assert (ok);
32793
32794 /* Emit the pshufhw. */
32795 memcpy (perm2 + 4, d->perm + 4, 4);
32796 for (i = 0; i < 4; ++i)
32797 perm2[i] = i;
32798 ok = expand_vselect (d->target, d->target, perm2, 8);
32799 gcc_assert (ok);
32800
32801 return true;
32802 }
32803
32804 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32805 the permutation using the SSSE3 palignr instruction. This succeeds
32806 when all of the elements in PERM fit within one vector and we merely
32807 need to shift them down so that a single vector permutation has a
32808 chance to succeed. */
32809
32810 static bool
32811 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32812 {
32813 unsigned i, nelt = d->nelt;
32814 unsigned min, max;
32815 bool in_order, ok;
32816 rtx shift;
32817
32818 /* Even with AVX, palignr only operates on 128-bit vectors. */
32819 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32820 return false;
32821
32822 min = nelt, max = 0;
32823 for (i = 0; i < nelt; ++i)
32824 {
32825 unsigned e = d->perm[i];
32826 if (e < min)
32827 min = e;
32828 if (e > max)
32829 max = e;
32830 }
32831 if (min == 0 || max - min >= nelt)
32832 return false;
32833
32834 /* Given that we have SSSE3, we know we'll be able to implement the
32835 single operand permutation after the palignr with pshufb. */
32836 if (d->testing_p)
32837 return true;
32838
32839 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32840 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32841 gen_lowpart (TImode, d->op1),
32842 gen_lowpart (TImode, d->op0), shift));
32843
32844 d->op0 = d->op1 = d->target;
32845
32846 in_order = true;
32847 for (i = 0; i < nelt; ++i)
32848 {
32849 unsigned e = d->perm[i] - min;
32850 if (e != i)
32851 in_order = false;
32852 d->perm[i] = e;
32853 }
32854
32855 /* Test for the degenerate case where the alignment by itself
32856 produces the desired permutation. */
32857 if (in_order)
32858 return true;
32859
32860 ok = expand_vec_perm_1 (d);
32861 gcc_assert (ok);
32862
32863 return ok;
32864 }
32865
32866 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32867 a two vector permutation into a single vector permutation by using
32868 an interleave operation to merge the vectors. */
32869
32870 static bool
32871 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32872 {
32873 struct expand_vec_perm_d dremap, dfinal;
32874 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32875 unsigned contents, h1, h2, h3, h4;
32876 unsigned char remap[2 * MAX_VECT_LEN];
32877 rtx seq;
32878 bool ok;
32879
32880 if (d->op0 == d->op1)
32881 return false;
32882
32883 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32884 lanes. We can use similar techniques with the vperm2f128 instruction,
32885 but it requires slightly different logic. */
32886 if (GET_MODE_SIZE (d->vmode) != 16)
32887 return false;
32888
32889 /* Examine from whence the elements come. */
32890 contents = 0;
32891 for (i = 0; i < nelt; ++i)
32892 contents |= 1u << d->perm[i];
32893
32894 /* Split the two input vectors into 4 halves. */
32895 h1 = (1u << nelt2) - 1;
32896 h2 = h1 << nelt2;
32897 h3 = h2 << nelt2;
32898 h4 = h3 << nelt2;
32899
32900 memset (remap, 0xff, sizeof (remap));
32901 dremap = *d;
32902
32903 /* If the elements from the low halves use interleave low, and similarly
32904 for interleave high. If the elements are from mis-matched halves, we
32905 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32906 if ((contents & (h1 | h3)) == contents)
32907 {
32908 for (i = 0; i < nelt2; ++i)
32909 {
32910 remap[i] = i * 2;
32911 remap[i + nelt] = i * 2 + 1;
32912 dremap.perm[i * 2] = i;
32913 dremap.perm[i * 2 + 1] = i + nelt;
32914 }
32915 }
32916 else if ((contents & (h2 | h4)) == contents)
32917 {
32918 for (i = 0; i < nelt2; ++i)
32919 {
32920 remap[i + nelt2] = i * 2;
32921 remap[i + nelt + nelt2] = i * 2 + 1;
32922 dremap.perm[i * 2] = i + nelt2;
32923 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32924 }
32925 }
32926 else if ((contents & (h1 | h4)) == contents)
32927 {
32928 for (i = 0; i < nelt2; ++i)
32929 {
32930 remap[i] = i;
32931 remap[i + nelt + nelt2] = i + nelt2;
32932 dremap.perm[i] = i;
32933 dremap.perm[i + nelt2] = i + nelt + nelt2;
32934 }
32935 if (nelt != 4)
32936 {
32937 dremap.vmode = V2DImode;
32938 dremap.nelt = 2;
32939 dremap.perm[0] = 0;
32940 dremap.perm[1] = 3;
32941 }
32942 }
32943 else if ((contents & (h2 | h3)) == contents)
32944 {
32945 for (i = 0; i < nelt2; ++i)
32946 {
32947 remap[i + nelt2] = i;
32948 remap[i + nelt] = i + nelt2;
32949 dremap.perm[i] = i + nelt2;
32950 dremap.perm[i + nelt2] = i + nelt;
32951 }
32952 if (nelt != 4)
32953 {
32954 dremap.vmode = V2DImode;
32955 dremap.nelt = 2;
32956 dremap.perm[0] = 1;
32957 dremap.perm[1] = 2;
32958 }
32959 }
32960 else
32961 return false;
32962
32963 /* Use the remapping array set up above to move the elements from their
32964 swizzled locations into their final destinations. */
32965 dfinal = *d;
32966 for (i = 0; i < nelt; ++i)
32967 {
32968 unsigned e = remap[d->perm[i]];
32969 gcc_assert (e < nelt);
32970 dfinal.perm[i] = e;
32971 }
32972 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
32973 dfinal.op1 = dfinal.op0;
32974 dremap.target = dfinal.op0;
32975
32976 /* Test if the final remap can be done with a single insn. For V4SFmode or
32977 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
32978 start_sequence ();
32979 ok = expand_vec_perm_1 (&dfinal);
32980 seq = get_insns ();
32981 end_sequence ();
32982
32983 if (!ok)
32984 return false;
32985
32986 if (dremap.vmode != dfinal.vmode)
32987 {
32988 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
32989 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
32990 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
32991 }
32992
32993 ok = expand_vec_perm_1 (&dremap);
32994 gcc_assert (ok);
32995
32996 emit_insn (seq);
32997 return true;
32998 }
32999
33000 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33001 permutation with two pshufb insns and an ior. We should have already
33002 failed all two instruction sequences. */
33003
33004 static bool
33005 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33006 {
33007 rtx rperm[2][16], vperm, l, h, op, m128;
33008 unsigned int i, nelt, eltsz;
33009
33010 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33011 return false;
33012 gcc_assert (d->op0 != d->op1);
33013
33014 nelt = d->nelt;
33015 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33016
33017 /* Generate two permutation masks. If the required element is within
33018 the given vector it is shuffled into the proper lane. If the required
33019 element is in the other vector, force a zero into the lane by setting
33020 bit 7 in the permutation mask. */
33021 m128 = GEN_INT (-128);
33022 for (i = 0; i < nelt; ++i)
33023 {
33024 unsigned j, e = d->perm[i];
33025 unsigned which = (e >= nelt);
33026 if (e >= nelt)
33027 e -= nelt;
33028
33029 for (j = 0; j < eltsz; ++j)
33030 {
33031 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33032 rperm[1-which][i*eltsz + j] = m128;
33033 }
33034 }
33035
33036 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33037 vperm = force_reg (V16QImode, vperm);
33038
33039 l = gen_reg_rtx (V16QImode);
33040 op = gen_lowpart (V16QImode, d->op0);
33041 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33042
33043 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33044 vperm = force_reg (V16QImode, vperm);
33045
33046 h = gen_reg_rtx (V16QImode);
33047 op = gen_lowpart (V16QImode, d->op1);
33048 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33049
33050 op = gen_lowpart (V16QImode, d->target);
33051 emit_insn (gen_iorv16qi3 (op, l, h));
33052
33053 return true;
33054 }
33055
33056 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33057 and extract-odd permutations. */
33058
33059 static bool
33060 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33061 {
33062 rtx t1, t2, t3;
33063
33064 switch (d->vmode)
33065 {
33066 case V4DFmode:
33067 t1 = gen_reg_rtx (V4DFmode);
33068 t2 = gen_reg_rtx (V4DFmode);
33069
33070 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33071 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33072 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33073
33074 /* Now an unpck[lh]pd will produce the result required. */
33075 if (odd)
33076 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33077 else
33078 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33079 emit_insn (t3);
33080 break;
33081
33082 case V8SFmode:
33083 {
33084 int mask = odd ? 0xdd : 0x88;
33085
33086 t1 = gen_reg_rtx (V8SFmode);
33087 t2 = gen_reg_rtx (V8SFmode);
33088 t3 = gen_reg_rtx (V8SFmode);
33089
33090 /* Shuffle within the 128-bit lanes to produce:
33091 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33092 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33093 GEN_INT (mask)));
33094
33095 /* Shuffle the lanes around to produce:
33096 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33097 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33098 GEN_INT (0x3)));
33099
33100 /* Shuffle within the 128-bit lanes to produce:
33101 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33102 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33103
33104 /* Shuffle within the 128-bit lanes to produce:
33105 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33106 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33107
33108 /* Shuffle the lanes around to produce:
33109 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33110 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33111 GEN_INT (0x20)));
33112 }
33113 break;
33114
33115 case V2DFmode:
33116 case V4SFmode:
33117 case V2DImode:
33118 case V4SImode:
33119 /* These are always directly implementable by expand_vec_perm_1. */
33120 gcc_unreachable ();
33121
33122 case V8HImode:
33123 if (TARGET_SSSE3)
33124 return expand_vec_perm_pshufb2 (d);
33125 else
33126 {
33127 /* We need 2*log2(N)-1 operations to achieve odd/even
33128 with interleave. */
33129 t1 = gen_reg_rtx (V8HImode);
33130 t2 = gen_reg_rtx (V8HImode);
33131 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33132 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33133 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33134 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33135 if (odd)
33136 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33137 else
33138 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33139 emit_insn (t3);
33140 }
33141 break;
33142
33143 case V16QImode:
33144 if (TARGET_SSSE3)
33145 return expand_vec_perm_pshufb2 (d);
33146 else
33147 {
33148 t1 = gen_reg_rtx (V16QImode);
33149 t2 = gen_reg_rtx (V16QImode);
33150 t3 = gen_reg_rtx (V16QImode);
33151 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33152 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33153 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33154 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33155 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33156 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33157 if (odd)
33158 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33159 else
33160 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33161 emit_insn (t3);
33162 }
33163 break;
33164
33165 default:
33166 gcc_unreachable ();
33167 }
33168
33169 return true;
33170 }
33171
33172 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33173 extract-even and extract-odd permutations. */
33174
33175 static bool
33176 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33177 {
33178 unsigned i, odd, nelt = d->nelt;
33179
33180 odd = d->perm[0];
33181 if (odd != 0 && odd != 1)
33182 return false;
33183
33184 for (i = 1; i < nelt; ++i)
33185 if (d->perm[i] != 2 * i + odd)
33186 return false;
33187
33188 return expand_vec_perm_even_odd_1 (d, odd);
33189 }
33190
33191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33192 permutations. We assume that expand_vec_perm_1 has already failed. */
33193
33194 static bool
33195 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33196 {
33197 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33198 enum machine_mode vmode = d->vmode;
33199 unsigned char perm2[4];
33200 rtx op0 = d->op0;
33201 bool ok;
33202
33203 switch (vmode)
33204 {
33205 case V4DFmode:
33206 case V8SFmode:
33207 /* These are special-cased in sse.md so that we can optionally
33208 use the vbroadcast instruction. They expand to two insns
33209 if the input happens to be in a register. */
33210 gcc_unreachable ();
33211
33212 case V2DFmode:
33213 case V2DImode:
33214 case V4SFmode:
33215 case V4SImode:
33216 /* These are always implementable using standard shuffle patterns. */
33217 gcc_unreachable ();
33218
33219 case V8HImode:
33220 case V16QImode:
33221 /* These can be implemented via interleave. We save one insn by
33222 stopping once we have promoted to V4SImode and then use pshufd. */
33223 do
33224 {
33225 optab otab = vec_interleave_low_optab;
33226
33227 if (elt >= nelt2)
33228 {
33229 otab = vec_interleave_high_optab;
33230 elt -= nelt2;
33231 }
33232 nelt2 /= 2;
33233
33234 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33235 vmode = get_mode_wider_vector (vmode);
33236 op0 = gen_lowpart (vmode, op0);
33237 }
33238 while (vmode != V4SImode);
33239
33240 memset (perm2, elt, 4);
33241 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33242 gcc_assert (ok);
33243 return true;
33244
33245 default:
33246 gcc_unreachable ();
33247 }
33248 }
33249
33250 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33251 broadcast permutations. */
33252
33253 static bool
33254 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33255 {
33256 unsigned i, elt, nelt = d->nelt;
33257
33258 if (d->op0 != d->op1)
33259 return false;
33260
33261 elt = d->perm[0];
33262 for (i = 1; i < nelt; ++i)
33263 if (d->perm[i] != elt)
33264 return false;
33265
33266 return expand_vec_perm_broadcast_1 (d);
33267 }
33268
33269 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33270 With all of the interface bits taken care of, perform the expansion
33271 in D and return true on success. */
33272
33273 static bool
33274 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33275 {
33276 /* Try a single instruction expansion. */
33277 if (expand_vec_perm_1 (d))
33278 return true;
33279
33280 /* Try sequences of two instructions. */
33281
33282 if (expand_vec_perm_pshuflw_pshufhw (d))
33283 return true;
33284
33285 if (expand_vec_perm_palignr (d))
33286 return true;
33287
33288 if (expand_vec_perm_interleave2 (d))
33289 return true;
33290
33291 if (expand_vec_perm_broadcast (d))
33292 return true;
33293
33294 /* Try sequences of three instructions. */
33295
33296 if (expand_vec_perm_pshufb2 (d))
33297 return true;
33298
33299 /* ??? Look for narrow permutations whose element orderings would
33300 allow the promotion to a wider mode. */
33301
33302 /* ??? Look for sequences of interleave or a wider permute that place
33303 the data into the correct lanes for a half-vector shuffle like
33304 pshuf[lh]w or vpermilps. */
33305
33306 /* ??? Look for sequences of interleave that produce the desired results.
33307 The combinatorics of punpck[lh] get pretty ugly... */
33308
33309 if (expand_vec_perm_even_odd (d))
33310 return true;
33311
33312 return false;
33313 }
33314
33315 /* Extract the values from the vector CST into the permutation array in D.
33316 Return 0 on error, 1 if all values from the permutation come from the
33317 first vector, 2 if all values from the second vector, and 3 otherwise. */
33318
33319 static int
33320 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33321 {
33322 tree list = TREE_VECTOR_CST_ELTS (cst);
33323 unsigned i, nelt = d->nelt;
33324 int ret = 0;
33325
33326 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33327 {
33328 unsigned HOST_WIDE_INT e;
33329
33330 if (!host_integerp (TREE_VALUE (list), 1))
33331 return 0;
33332 e = tree_low_cst (TREE_VALUE (list), 1);
33333 if (e >= 2 * nelt)
33334 return 0;
33335
33336 ret |= (e < nelt ? 1 : 2);
33337 d->perm[i] = e;
33338 }
33339 gcc_assert (list == NULL);
33340
33341 /* For all elements from second vector, fold the elements to first. */
33342 if (ret == 2)
33343 for (i = 0; i < nelt; ++i)
33344 d->perm[i] -= nelt;
33345
33346 return ret;
33347 }
33348
33349 static rtx
33350 ix86_expand_vec_perm_builtin (tree exp)
33351 {
33352 struct expand_vec_perm_d d;
33353 tree arg0, arg1, arg2;
33354
33355 arg0 = CALL_EXPR_ARG (exp, 0);
33356 arg1 = CALL_EXPR_ARG (exp, 1);
33357 arg2 = CALL_EXPR_ARG (exp, 2);
33358
33359 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33360 d.nelt = GET_MODE_NUNITS (d.vmode);
33361 d.testing_p = false;
33362 gcc_assert (VECTOR_MODE_P (d.vmode));
33363
33364 if (TREE_CODE (arg2) != VECTOR_CST)
33365 {
33366 error_at (EXPR_LOCATION (exp),
33367 "vector permutation requires vector constant");
33368 goto exit_error;
33369 }
33370
33371 switch (extract_vec_perm_cst (&d, arg2))
33372 {
33373 default:
33374 gcc_unreachable();
33375
33376 case 0:
33377 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33378 goto exit_error;
33379
33380 case 3:
33381 if (!operand_equal_p (arg0, arg1, 0))
33382 {
33383 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33384 d.op0 = force_reg (d.vmode, d.op0);
33385 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33386 d.op1 = force_reg (d.vmode, d.op1);
33387 break;
33388 }
33389
33390 /* The elements of PERM do not suggest that only the first operand
33391 is used, but both operands are identical. Allow easier matching
33392 of the permutation by folding the permutation into the single
33393 input vector. */
33394 {
33395 unsigned i, nelt = d.nelt;
33396 for (i = 0; i < nelt; ++i)
33397 if (d.perm[i] >= nelt)
33398 d.perm[i] -= nelt;
33399 }
33400 /* FALLTHRU */
33401
33402 case 1:
33403 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33404 d.op0 = force_reg (d.vmode, d.op0);
33405 d.op1 = d.op0;
33406 break;
33407
33408 case 2:
33409 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33410 d.op0 = force_reg (d.vmode, d.op0);
33411 d.op1 = d.op0;
33412 break;
33413 }
33414
33415 d.target = gen_reg_rtx (d.vmode);
33416 if (ix86_expand_vec_perm_builtin_1 (&d))
33417 return d.target;
33418
33419 /* For compiler generated permutations, we should never got here, because
33420 the compiler should also be checking the ok hook. But since this is a
33421 builtin the user has access too, so don't abort. */
33422 switch (d.nelt)
33423 {
33424 case 2:
33425 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33426 break;
33427 case 4:
33428 sorry ("vector permutation (%d %d %d %d)",
33429 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33430 break;
33431 case 8:
33432 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33433 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33434 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33435 break;
33436 case 16:
33437 sorry ("vector permutation "
33438 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33439 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33440 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33441 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33442 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33443 break;
33444 default:
33445 gcc_unreachable ();
33446 }
33447 exit_error:
33448 return CONST0_RTX (d.vmode);
33449 }
33450
33451 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33452
33453 static bool
33454 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33455 {
33456 struct expand_vec_perm_d d;
33457 int vec_mask;
33458 bool ret, one_vec;
33459
33460 d.vmode = TYPE_MODE (vec_type);
33461 d.nelt = GET_MODE_NUNITS (d.vmode);
33462 d.testing_p = true;
33463
33464 /* Given sufficient ISA support we can just return true here
33465 for selected vector modes. */
33466 if (GET_MODE_SIZE (d.vmode) == 16)
33467 {
33468 /* All implementable with a single vpperm insn. */
33469 if (TARGET_XOP)
33470 return true;
33471 /* All implementable with 2 pshufb + 1 ior. */
33472 if (TARGET_SSSE3)
33473 return true;
33474 /* All implementable with shufpd or unpck[lh]pd. */
33475 if (d.nelt == 2)
33476 return true;
33477 }
33478
33479 vec_mask = extract_vec_perm_cst (&d, mask);
33480
33481 /* This hook is cannot be called in response to something that the
33482 user does (unlike the builtin expander) so we shouldn't ever see
33483 an error generated from the extract. */
33484 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33485 one_vec = (vec_mask != 3);
33486
33487 /* Implementable with shufps or pshufd. */
33488 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33489 return true;
33490
33491 /* Otherwise we have to go through the motions and see if we can
33492 figure out how to generate the requested permutation. */
33493 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33494 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33495 if (!one_vec)
33496 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33497
33498 start_sequence ();
33499 ret = ix86_expand_vec_perm_builtin_1 (&d);
33500 end_sequence ();
33501
33502 return ret;
33503 }
33504
33505 void
33506 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33507 {
33508 struct expand_vec_perm_d d;
33509 unsigned i, nelt;
33510
33511 d.target = targ;
33512 d.op0 = op0;
33513 d.op1 = op1;
33514 d.vmode = GET_MODE (targ);
33515 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33516 d.testing_p = false;
33517
33518 for (i = 0; i < nelt; ++i)
33519 d.perm[i] = i * 2 + odd;
33520
33521 /* We'll either be able to implement the permutation directly... */
33522 if (expand_vec_perm_1 (&d))
33523 return;
33524
33525 /* ... or we use the special-case patterns. */
33526 expand_vec_perm_even_odd_1 (&d, odd);
33527 }
33528
33529 /* Expand an insert into a vector register through pinsr insn.
33530 Return true if successful. */
33531
33532 bool
33533 ix86_expand_pinsr (rtx *operands)
33534 {
33535 rtx dst = operands[0];
33536 rtx src = operands[3];
33537
33538 unsigned int size = INTVAL (operands[1]);
33539 unsigned int pos = INTVAL (operands[2]);
33540
33541 if (GET_CODE (dst) == SUBREG)
33542 {
33543 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
33544 dst = SUBREG_REG (dst);
33545 }
33546
33547 if (GET_CODE (src) == SUBREG)
33548 src = SUBREG_REG (src);
33549
33550 switch (GET_MODE (dst))
33551 {
33552 case V16QImode:
33553 case V8HImode:
33554 case V4SImode:
33555 case V2DImode:
33556 {
33557 enum machine_mode srcmode, dstmode;
33558 rtx (*pinsr)(rtx, rtx, rtx, rtx);
33559
33560 srcmode = mode_for_size (size, MODE_INT, 0);
33561
33562 switch (srcmode)
33563 {
33564 case QImode:
33565 if (!TARGET_SSE4_1)
33566 return false;
33567 dstmode = V16QImode;
33568 pinsr = gen_sse4_1_pinsrb;
33569 break;
33570
33571 case HImode:
33572 if (!TARGET_SSE2)
33573 return false;
33574 dstmode = V8HImode;
33575 pinsr = gen_sse2_pinsrw;
33576 break;
33577
33578 case SImode:
33579 if (!TARGET_SSE4_1)
33580 return false;
33581 dstmode = V4SImode;
33582 pinsr = gen_sse4_1_pinsrd;
33583 break;
33584
33585 case DImode:
33586 gcc_assert (TARGET_64BIT);
33587 if (!TARGET_SSE4_1)
33588 return false;
33589 dstmode = V2DImode;
33590 pinsr = gen_sse4_1_pinsrq;
33591 break;
33592
33593 default:
33594 return false;
33595 }
33596
33597 dst = gen_lowpart (dstmode, dst);
33598 src = gen_lowpart (srcmode, src);
33599
33600 pos /= size;
33601
33602 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
33603 return true;
33604 }
33605
33606 default:
33607 return false;
33608 }
33609 }
33610 \f
33611 /* This function returns the calling abi specific va_list type node.
33612 It returns the FNDECL specific va_list type. */
33613
33614 static tree
33615 ix86_fn_abi_va_list (tree fndecl)
33616 {
33617 if (!TARGET_64BIT)
33618 return va_list_type_node;
33619 gcc_assert (fndecl != NULL_TREE);
33620
33621 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33622 return ms_va_list_type_node;
33623 else
33624 return sysv_va_list_type_node;
33625 }
33626
33627 /* Returns the canonical va_list type specified by TYPE. If there
33628 is no valid TYPE provided, it return NULL_TREE. */
33629
33630 static tree
33631 ix86_canonical_va_list_type (tree type)
33632 {
33633 tree wtype, htype;
33634
33635 /* Resolve references and pointers to va_list type. */
33636 if (TREE_CODE (type) == MEM_REF)
33637 type = TREE_TYPE (type);
33638 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33639 type = TREE_TYPE (type);
33640 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33641 type = TREE_TYPE (type);
33642
33643 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33644 {
33645 wtype = va_list_type_node;
33646 gcc_assert (wtype != NULL_TREE);
33647 htype = type;
33648 if (TREE_CODE (wtype) == ARRAY_TYPE)
33649 {
33650 /* If va_list is an array type, the argument may have decayed
33651 to a pointer type, e.g. by being passed to another function.
33652 In that case, unwrap both types so that we can compare the
33653 underlying records. */
33654 if (TREE_CODE (htype) == ARRAY_TYPE
33655 || POINTER_TYPE_P (htype))
33656 {
33657 wtype = TREE_TYPE (wtype);
33658 htype = TREE_TYPE (htype);
33659 }
33660 }
33661 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33662 return va_list_type_node;
33663 wtype = sysv_va_list_type_node;
33664 gcc_assert (wtype != NULL_TREE);
33665 htype = type;
33666 if (TREE_CODE (wtype) == ARRAY_TYPE)
33667 {
33668 /* If va_list is an array type, the argument may have decayed
33669 to a pointer type, e.g. by being passed to another function.
33670 In that case, unwrap both types so that we can compare the
33671 underlying records. */
33672 if (TREE_CODE (htype) == ARRAY_TYPE
33673 || POINTER_TYPE_P (htype))
33674 {
33675 wtype = TREE_TYPE (wtype);
33676 htype = TREE_TYPE (htype);
33677 }
33678 }
33679 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33680 return sysv_va_list_type_node;
33681 wtype = ms_va_list_type_node;
33682 gcc_assert (wtype != NULL_TREE);
33683 htype = type;
33684 if (TREE_CODE (wtype) == ARRAY_TYPE)
33685 {
33686 /* If va_list is an array type, the argument may have decayed
33687 to a pointer type, e.g. by being passed to another function.
33688 In that case, unwrap both types so that we can compare the
33689 underlying records. */
33690 if (TREE_CODE (htype) == ARRAY_TYPE
33691 || POINTER_TYPE_P (htype))
33692 {
33693 wtype = TREE_TYPE (wtype);
33694 htype = TREE_TYPE (htype);
33695 }
33696 }
33697 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33698 return ms_va_list_type_node;
33699 return NULL_TREE;
33700 }
33701 return std_canonical_va_list_type (type);
33702 }
33703
33704 /* Iterate through the target-specific builtin types for va_list.
33705 IDX denotes the iterator, *PTREE is set to the result type of
33706 the va_list builtin, and *PNAME to its internal type.
33707 Returns zero if there is no element for this index, otherwise
33708 IDX should be increased upon the next call.
33709 Note, do not iterate a base builtin's name like __builtin_va_list.
33710 Used from c_common_nodes_and_builtins. */
33711
33712 static int
33713 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33714 {
33715 if (TARGET_64BIT)
33716 {
33717 switch (idx)
33718 {
33719 default:
33720 break;
33721
33722 case 0:
33723 *ptree = ms_va_list_type_node;
33724 *pname = "__builtin_ms_va_list";
33725 return 1;
33726
33727 case 1:
33728 *ptree = sysv_va_list_type_node;
33729 *pname = "__builtin_sysv_va_list";
33730 return 1;
33731 }
33732 }
33733
33734 return 0;
33735 }
33736
33737 #undef TARGET_SCHED_DISPATCH
33738 #define TARGET_SCHED_DISPATCH has_dispatch
33739 #undef TARGET_SCHED_DISPATCH_DO
33740 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33741
33742 /* The size of the dispatch window is the total number of bytes of
33743 object code allowed in a window. */
33744 #define DISPATCH_WINDOW_SIZE 16
33745
33746 /* Number of dispatch windows considered for scheduling. */
33747 #define MAX_DISPATCH_WINDOWS 3
33748
33749 /* Maximum number of instructions in a window. */
33750 #define MAX_INSN 4
33751
33752 /* Maximum number of immediate operands in a window. */
33753 #define MAX_IMM 4
33754
33755 /* Maximum number of immediate bits allowed in a window. */
33756 #define MAX_IMM_SIZE 128
33757
33758 /* Maximum number of 32 bit immediates allowed in a window. */
33759 #define MAX_IMM_32 4
33760
33761 /* Maximum number of 64 bit immediates allowed in a window. */
33762 #define MAX_IMM_64 2
33763
33764 /* Maximum total of loads or prefetches allowed in a window. */
33765 #define MAX_LOAD 2
33766
33767 /* Maximum total of stores allowed in a window. */
33768 #define MAX_STORE 1
33769
33770 #undef BIG
33771 #define BIG 100
33772
33773
33774 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33775 enum dispatch_group {
33776 disp_no_group = 0,
33777 disp_load,
33778 disp_store,
33779 disp_load_store,
33780 disp_prefetch,
33781 disp_imm,
33782 disp_imm_32,
33783 disp_imm_64,
33784 disp_branch,
33785 disp_cmp,
33786 disp_jcc,
33787 disp_last
33788 };
33789
33790 /* Number of allowable groups in a dispatch window. It is an array
33791 indexed by dispatch_group enum. 100 is used as a big number,
33792 because the number of these kind of operations does not have any
33793 effect in dispatch window, but we need them for other reasons in
33794 the table. */
33795 static unsigned int num_allowable_groups[disp_last] = {
33796 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33797 };
33798
33799 char group_name[disp_last + 1][16] = {
33800 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33801 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33802 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33803 };
33804
33805 /* Instruction path. */
33806 enum insn_path {
33807 no_path = 0,
33808 path_single, /* Single micro op. */
33809 path_double, /* Double micro op. */
33810 path_multi, /* Instructions with more than 2 micro op.. */
33811 last_path
33812 };
33813
33814 /* sched_insn_info defines a window to the instructions scheduled in
33815 the basic block. It contains a pointer to the insn_info table and
33816 the instruction scheduled.
33817
33818 Windows are allocated for each basic block and are linked
33819 together. */
33820 typedef struct sched_insn_info_s {
33821 rtx insn;
33822 enum dispatch_group group;
33823 enum insn_path path;
33824 int byte_len;
33825 int imm_bytes;
33826 } sched_insn_info;
33827
33828 /* Linked list of dispatch windows. This is a two way list of
33829 dispatch windows of a basic block. It contains information about
33830 the number of uops in the window and the total number of
33831 instructions and of bytes in the object code for this dispatch
33832 window. */
33833 typedef struct dispatch_windows_s {
33834 int num_insn; /* Number of insn in the window. */
33835 int num_uops; /* Number of uops in the window. */
33836 int window_size; /* Number of bytes in the window. */
33837 int window_num; /* Window number between 0 or 1. */
33838 int num_imm; /* Number of immediates in an insn. */
33839 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33840 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33841 int imm_size; /* Total immediates in the window. */
33842 int num_loads; /* Total memory loads in the window. */
33843 int num_stores; /* Total memory stores in the window. */
33844 int violation; /* Violation exists in window. */
33845 sched_insn_info *window; /* Pointer to the window. */
33846 struct dispatch_windows_s *next;
33847 struct dispatch_windows_s *prev;
33848 } dispatch_windows;
33849
33850 /* Immediate valuse used in an insn. */
33851 typedef struct imm_info_s
33852 {
33853 int imm;
33854 int imm32;
33855 int imm64;
33856 } imm_info;
33857
33858 static dispatch_windows *dispatch_window_list;
33859 static dispatch_windows *dispatch_window_list1;
33860
33861 /* Get dispatch group of insn. */
33862
33863 static enum dispatch_group
33864 get_mem_group (rtx insn)
33865 {
33866 enum attr_memory memory;
33867
33868 if (INSN_CODE (insn) < 0)
33869 return disp_no_group;
33870 memory = get_attr_memory (insn);
33871 if (memory == MEMORY_STORE)
33872 return disp_store;
33873
33874 if (memory == MEMORY_LOAD)
33875 return disp_load;
33876
33877 if (memory == MEMORY_BOTH)
33878 return disp_load_store;
33879
33880 return disp_no_group;
33881 }
33882
33883 /* Return true if insn is a compare instruction. */
33884
33885 static bool
33886 is_cmp (rtx insn)
33887 {
33888 enum attr_type type;
33889
33890 type = get_attr_type (insn);
33891 return (type == TYPE_TEST
33892 || type == TYPE_ICMP
33893 || type == TYPE_FCMP
33894 || GET_CODE (PATTERN (insn)) == COMPARE);
33895 }
33896
33897 /* Return true if a dispatch violation encountered. */
33898
33899 static bool
33900 dispatch_violation (void)
33901 {
33902 if (dispatch_window_list->next)
33903 return dispatch_window_list->next->violation;
33904 return dispatch_window_list->violation;
33905 }
33906
33907 /* Return true if insn is a branch instruction. */
33908
33909 static bool
33910 is_branch (rtx insn)
33911 {
33912 return (CALL_P (insn) || JUMP_P (insn));
33913 }
33914
33915 /* Return true if insn is a prefetch instruction. */
33916
33917 static bool
33918 is_prefetch (rtx insn)
33919 {
33920 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33921 }
33922
33923 /* This function initializes a dispatch window and the list container holding a
33924 pointer to the window. */
33925
33926 static void
33927 init_window (int window_num)
33928 {
33929 int i;
33930 dispatch_windows *new_list;
33931
33932 if (window_num == 0)
33933 new_list = dispatch_window_list;
33934 else
33935 new_list = dispatch_window_list1;
33936
33937 new_list->num_insn = 0;
33938 new_list->num_uops = 0;
33939 new_list->window_size = 0;
33940 new_list->next = NULL;
33941 new_list->prev = NULL;
33942 new_list->window_num = window_num;
33943 new_list->num_imm = 0;
33944 new_list->num_imm_32 = 0;
33945 new_list->num_imm_64 = 0;
33946 new_list->imm_size = 0;
33947 new_list->num_loads = 0;
33948 new_list->num_stores = 0;
33949 new_list->violation = false;
33950
33951 for (i = 0; i < MAX_INSN; i++)
33952 {
33953 new_list->window[i].insn = NULL;
33954 new_list->window[i].group = disp_no_group;
33955 new_list->window[i].path = no_path;
33956 new_list->window[i].byte_len = 0;
33957 new_list->window[i].imm_bytes = 0;
33958 }
33959 return;
33960 }
33961
33962 /* This function allocates and initializes a dispatch window and the
33963 list container holding a pointer to the window. */
33964
33965 static dispatch_windows *
33966 allocate_window (void)
33967 {
33968 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33969 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33970
33971 return new_list;
33972 }
33973
33974 /* This routine initializes the dispatch scheduling information. It
33975 initiates building dispatch scheduler tables and constructs the
33976 first dispatch window. */
33977
33978 static void
33979 init_dispatch_sched (void)
33980 {
33981 /* Allocate a dispatch list and a window. */
33982 dispatch_window_list = allocate_window ();
33983 dispatch_window_list1 = allocate_window ();
33984 init_window (0);
33985 init_window (1);
33986 }
33987
33988 /* This function returns true if a branch is detected. End of a basic block
33989 does not have to be a branch, but here we assume only branches end a
33990 window. */
33991
33992 static bool
33993 is_end_basic_block (enum dispatch_group group)
33994 {
33995 return group == disp_branch;
33996 }
33997
33998 /* This function is called when the end of a window processing is reached. */
33999
34000 static void
34001 process_end_window (void)
34002 {
34003 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34004 if (dispatch_window_list->next)
34005 {
34006 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34007 gcc_assert (dispatch_window_list->window_size
34008 + dispatch_window_list1->window_size <= 48);
34009 init_window (1);
34010 }
34011 init_window (0);
34012 }
34013
34014 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34015 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34016 for 48 bytes of instructions. Note that these windows are not dispatch
34017 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34018
34019 static dispatch_windows *
34020 allocate_next_window (int window_num)
34021 {
34022 if (window_num == 0)
34023 {
34024 if (dispatch_window_list->next)
34025 init_window (1);
34026 init_window (0);
34027 return dispatch_window_list;
34028 }
34029
34030 dispatch_window_list->next = dispatch_window_list1;
34031 dispatch_window_list1->prev = dispatch_window_list;
34032
34033 return dispatch_window_list1;
34034 }
34035
34036 /* Increment the number of immediate operands of an instruction. */
34037
34038 static int
34039 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34040 {
34041 if (*in_rtx == 0)
34042 return 0;
34043
34044 switch ( GET_CODE (*in_rtx))
34045 {
34046 case CONST:
34047 case SYMBOL_REF:
34048 case CONST_INT:
34049 (imm_values->imm)++;
34050 if (x86_64_immediate_operand (*in_rtx, SImode))
34051 (imm_values->imm32)++;
34052 else
34053 (imm_values->imm64)++;
34054 break;
34055
34056 case CONST_DOUBLE:
34057 (imm_values->imm)++;
34058 (imm_values->imm64)++;
34059 break;
34060
34061 case CODE_LABEL:
34062 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34063 {
34064 (imm_values->imm)++;
34065 (imm_values->imm32)++;
34066 }
34067 break;
34068
34069 default:
34070 break;
34071 }
34072
34073 return 0;
34074 }
34075
34076 /* Compute number of immediate operands of an instruction. */
34077
34078 static void
34079 find_constant (rtx in_rtx, imm_info *imm_values)
34080 {
34081 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34082 (rtx_function) find_constant_1, (void *) imm_values);
34083 }
34084
34085 /* Return total size of immediate operands of an instruction along with number
34086 of corresponding immediate-operands. It initializes its parameters to zero
34087 befor calling FIND_CONSTANT.
34088 INSN is the input instruction. IMM is the total of immediates.
34089 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34090 bit immediates. */
34091
34092 static int
34093 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34094 {
34095 imm_info imm_values = {0, 0, 0};
34096
34097 find_constant (insn, &imm_values);
34098 *imm = imm_values.imm;
34099 *imm32 = imm_values.imm32;
34100 *imm64 = imm_values.imm64;
34101 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34102 }
34103
34104 /* This function indicates if an operand of an instruction is an
34105 immediate. */
34106
34107 static bool
34108 has_immediate (rtx insn)
34109 {
34110 int num_imm_operand;
34111 int num_imm32_operand;
34112 int num_imm64_operand;
34113
34114 if (insn)
34115 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34116 &num_imm64_operand);
34117 return false;
34118 }
34119
34120 /* Return single or double path for instructions. */
34121
34122 static enum insn_path
34123 get_insn_path (rtx insn)
34124 {
34125 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34126
34127 if ((int)path == 0)
34128 return path_single;
34129
34130 if ((int)path == 1)
34131 return path_double;
34132
34133 return path_multi;
34134 }
34135
34136 /* Return insn dispatch group. */
34137
34138 static enum dispatch_group
34139 get_insn_group (rtx insn)
34140 {
34141 enum dispatch_group group = get_mem_group (insn);
34142 if (group)
34143 return group;
34144
34145 if (is_branch (insn))
34146 return disp_branch;
34147
34148 if (is_cmp (insn))
34149 return disp_cmp;
34150
34151 if (has_immediate (insn))
34152 return disp_imm;
34153
34154 if (is_prefetch (insn))
34155 return disp_prefetch;
34156
34157 return disp_no_group;
34158 }
34159
34160 /* Count number of GROUP restricted instructions in a dispatch
34161 window WINDOW_LIST. */
34162
34163 static int
34164 count_num_restricted (rtx insn, dispatch_windows *window_list)
34165 {
34166 enum dispatch_group group = get_insn_group (insn);
34167 int imm_size;
34168 int num_imm_operand;
34169 int num_imm32_operand;
34170 int num_imm64_operand;
34171
34172 if (group == disp_no_group)
34173 return 0;
34174
34175 if (group == disp_imm)
34176 {
34177 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34178 &num_imm64_operand);
34179 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34180 || num_imm_operand + window_list->num_imm > MAX_IMM
34181 || (num_imm32_operand > 0
34182 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34183 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34184 || (num_imm64_operand > 0
34185 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34186 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34187 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34188 && num_imm64_operand > 0
34189 && ((window_list->num_imm_64 > 0
34190 && window_list->num_insn >= 2)
34191 || window_list->num_insn >= 3)))
34192 return BIG;
34193
34194 return 1;
34195 }
34196
34197 if ((group == disp_load_store
34198 && (window_list->num_loads >= MAX_LOAD
34199 || window_list->num_stores >= MAX_STORE))
34200 || ((group == disp_load
34201 || group == disp_prefetch)
34202 && window_list->num_loads >= MAX_LOAD)
34203 || (group == disp_store
34204 && window_list->num_stores >= MAX_STORE))
34205 return BIG;
34206
34207 return 1;
34208 }
34209
34210 /* This function returns true if insn satisfies dispatch rules on the
34211 last window scheduled. */
34212
34213 static bool
34214 fits_dispatch_window (rtx insn)
34215 {
34216 dispatch_windows *window_list = dispatch_window_list;
34217 dispatch_windows *window_list_next = dispatch_window_list->next;
34218 unsigned int num_restrict;
34219 enum dispatch_group group = get_insn_group (insn);
34220 enum insn_path path = get_insn_path (insn);
34221 int sum;
34222
34223 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34224 instructions should be given the lowest priority in the
34225 scheduling process in Haifa scheduler to make sure they will be
34226 scheduled in the same dispatch window as the refrence to them. */
34227 if (group == disp_jcc || group == disp_cmp)
34228 return false;
34229
34230 /* Check nonrestricted. */
34231 if (group == disp_no_group || group == disp_branch)
34232 return true;
34233
34234 /* Get last dispatch window. */
34235 if (window_list_next)
34236 window_list = window_list_next;
34237
34238 if (window_list->window_num == 1)
34239 {
34240 sum = window_list->prev->window_size + window_list->window_size;
34241
34242 if (sum == 32
34243 || (min_insn_size (insn) + sum) >= 48)
34244 /* Window 1 is full. Go for next window. */
34245 return true;
34246 }
34247
34248 num_restrict = count_num_restricted (insn, window_list);
34249
34250 if (num_restrict > num_allowable_groups[group])
34251 return false;
34252
34253 /* See if it fits in the first window. */
34254 if (window_list->window_num == 0)
34255 {
34256 /* The first widow should have only single and double path
34257 uops. */
34258 if (path == path_double
34259 && (window_list->num_uops + 2) > MAX_INSN)
34260 return false;
34261 else if (path != path_single)
34262 return false;
34263 }
34264 return true;
34265 }
34266
34267 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34268 dispatch window WINDOW_LIST. */
34269
34270 static void
34271 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34272 {
34273 int byte_len = min_insn_size (insn);
34274 int num_insn = window_list->num_insn;
34275 int imm_size;
34276 sched_insn_info *window = window_list->window;
34277 enum dispatch_group group = get_insn_group (insn);
34278 enum insn_path path = get_insn_path (insn);
34279 int num_imm_operand;
34280 int num_imm32_operand;
34281 int num_imm64_operand;
34282
34283 if (!window_list->violation && group != disp_cmp
34284 && !fits_dispatch_window (insn))
34285 window_list->violation = true;
34286
34287 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34288 &num_imm64_operand);
34289
34290 /* Initialize window with new instruction. */
34291 window[num_insn].insn = insn;
34292 window[num_insn].byte_len = byte_len;
34293 window[num_insn].group = group;
34294 window[num_insn].path = path;
34295 window[num_insn].imm_bytes = imm_size;
34296
34297 window_list->window_size += byte_len;
34298 window_list->num_insn = num_insn + 1;
34299 window_list->num_uops = window_list->num_uops + num_uops;
34300 window_list->imm_size += imm_size;
34301 window_list->num_imm += num_imm_operand;
34302 window_list->num_imm_32 += num_imm32_operand;
34303 window_list->num_imm_64 += num_imm64_operand;
34304
34305 if (group == disp_store)
34306 window_list->num_stores += 1;
34307 else if (group == disp_load
34308 || group == disp_prefetch)
34309 window_list->num_loads += 1;
34310 else if (group == disp_load_store)
34311 {
34312 window_list->num_stores += 1;
34313 window_list->num_loads += 1;
34314 }
34315 }
34316
34317 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34318 If the total bytes of instructions or the number of instructions in
34319 the window exceed allowable, it allocates a new window. */
34320
34321 static void
34322 add_to_dispatch_window (rtx insn)
34323 {
34324 int byte_len;
34325 dispatch_windows *window_list;
34326 dispatch_windows *next_list;
34327 dispatch_windows *window0_list;
34328 enum insn_path path;
34329 enum dispatch_group insn_group;
34330 bool insn_fits;
34331 int num_insn;
34332 int num_uops;
34333 int window_num;
34334 int insn_num_uops;
34335 int sum;
34336
34337 if (INSN_CODE (insn) < 0)
34338 return;
34339
34340 byte_len = min_insn_size (insn);
34341 window_list = dispatch_window_list;
34342 next_list = window_list->next;
34343 path = get_insn_path (insn);
34344 insn_group = get_insn_group (insn);
34345
34346 /* Get the last dispatch window. */
34347 if (next_list)
34348 window_list = dispatch_window_list->next;
34349
34350 if (path == path_single)
34351 insn_num_uops = 1;
34352 else if (path == path_double)
34353 insn_num_uops = 2;
34354 else
34355 insn_num_uops = (int) path;
34356
34357 /* If current window is full, get a new window.
34358 Window number zero is full, if MAX_INSN uops are scheduled in it.
34359 Window number one is full, if window zero's bytes plus window
34360 one's bytes is 32, or if the bytes of the new instruction added
34361 to the total makes it greater than 48, or it has already MAX_INSN
34362 instructions in it. */
34363 num_insn = window_list->num_insn;
34364 num_uops = window_list->num_uops;
34365 window_num = window_list->window_num;
34366 insn_fits = fits_dispatch_window (insn);
34367
34368 if (num_insn >= MAX_INSN
34369 || num_uops + insn_num_uops > MAX_INSN
34370 || !(insn_fits))
34371 {
34372 window_num = ~window_num & 1;
34373 window_list = allocate_next_window (window_num);
34374 }
34375
34376 if (window_num == 0)
34377 {
34378 add_insn_window (insn, window_list, insn_num_uops);
34379 if (window_list->num_insn >= MAX_INSN
34380 && insn_group == disp_branch)
34381 {
34382 process_end_window ();
34383 return;
34384 }
34385 }
34386 else if (window_num == 1)
34387 {
34388 window0_list = window_list->prev;
34389 sum = window0_list->window_size + window_list->window_size;
34390 if (sum == 32
34391 || (byte_len + sum) >= 48)
34392 {
34393 process_end_window ();
34394 window_list = dispatch_window_list;
34395 }
34396
34397 add_insn_window (insn, window_list, insn_num_uops);
34398 }
34399 else
34400 gcc_unreachable ();
34401
34402 if (is_end_basic_block (insn_group))
34403 {
34404 /* End of basic block is reached do end-basic-block process. */
34405 process_end_window ();
34406 return;
34407 }
34408 }
34409
34410 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34411
34412 DEBUG_FUNCTION static void
34413 debug_dispatch_window_file (FILE *file, int window_num)
34414 {
34415 dispatch_windows *list;
34416 int i;
34417
34418 if (window_num == 0)
34419 list = dispatch_window_list;
34420 else
34421 list = dispatch_window_list1;
34422
34423 fprintf (file, "Window #%d:\n", list->window_num);
34424 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34425 list->num_insn, list->num_uops, list->window_size);
34426 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34427 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34428
34429 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34430 list->num_stores);
34431 fprintf (file, " insn info:\n");
34432
34433 for (i = 0; i < MAX_INSN; i++)
34434 {
34435 if (!list->window[i].insn)
34436 break;
34437 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34438 i, group_name[list->window[i].group],
34439 i, (void *)list->window[i].insn,
34440 i, list->window[i].path,
34441 i, list->window[i].byte_len,
34442 i, list->window[i].imm_bytes);
34443 }
34444 }
34445
34446 /* Print to stdout a dispatch window. */
34447
34448 DEBUG_FUNCTION void
34449 debug_dispatch_window (int window_num)
34450 {
34451 debug_dispatch_window_file (stdout, window_num);
34452 }
34453
34454 /* Print INSN dispatch information to FILE. */
34455
34456 DEBUG_FUNCTION static void
34457 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34458 {
34459 int byte_len;
34460 enum insn_path path;
34461 enum dispatch_group group;
34462 int imm_size;
34463 int num_imm_operand;
34464 int num_imm32_operand;
34465 int num_imm64_operand;
34466
34467 if (INSN_CODE (insn) < 0)
34468 return;
34469
34470 byte_len = min_insn_size (insn);
34471 path = get_insn_path (insn);
34472 group = get_insn_group (insn);
34473 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34474 &num_imm64_operand);
34475
34476 fprintf (file, " insn info:\n");
34477 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34478 group_name[group], path, byte_len);
34479 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34480 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34481 }
34482
34483 /* Print to STDERR the status of the ready list with respect to
34484 dispatch windows. */
34485
34486 DEBUG_FUNCTION void
34487 debug_ready_dispatch (void)
34488 {
34489 int i;
34490 int no_ready = number_in_ready ();
34491
34492 fprintf (stdout, "Number of ready: %d\n", no_ready);
34493
34494 for (i = 0; i < no_ready; i++)
34495 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34496 }
34497
34498 /* This routine is the driver of the dispatch scheduler. */
34499
34500 static void
34501 do_dispatch (rtx insn, int mode)
34502 {
34503 if (mode == DISPATCH_INIT)
34504 init_dispatch_sched ();
34505 else if (mode == ADD_TO_DISPATCH_WINDOW)
34506 add_to_dispatch_window (insn);
34507 }
34508
34509 /* Return TRUE if Dispatch Scheduling is supported. */
34510
34511 static bool
34512 has_dispatch (rtx insn, int action)
34513 {
34514 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34515 switch (action)
34516 {
34517 default:
34518 return false;
34519
34520 case IS_DISPATCH_ON:
34521 return true;
34522 break;
34523
34524 case IS_CMP:
34525 return is_cmp (insn);
34526
34527 case DISPATCH_VIOLATION:
34528 return dispatch_violation ();
34529
34530 case FITS_DISPATCH_WINDOW:
34531 return fits_dispatch_window (insn);
34532 }
34533
34534 return false;
34535 }
34536
34537 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34538 place emms and femms instructions. */
34539
34540 static enum machine_mode
34541 ix86_preferred_simd_mode (enum machine_mode mode)
34542 {
34543 if (!TARGET_SSE)
34544 return word_mode;
34545
34546 switch (mode)
34547 {
34548 case QImode:
34549 return V16QImode;
34550 case HImode:
34551 return V8HImode;
34552 case SImode:
34553 return V4SImode;
34554 case DImode:
34555 return V2DImode;
34556
34557 case SFmode:
34558 if (TARGET_AVX && !flag_prefer_avx128)
34559 return V8SFmode;
34560 else
34561 return V4SFmode;
34562
34563 case DFmode:
34564 if (!TARGET_VECTORIZE_DOUBLE)
34565 return word_mode;
34566 else if (TARGET_AVX && !flag_prefer_avx128)
34567 return V4DFmode;
34568 else if (TARGET_SSE2)
34569 return V2DFmode;
34570 /* FALLTHRU */
34571
34572 default:
34573 return word_mode;
34574 }
34575 }
34576
34577 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34578 vectors. */
34579
34580 static unsigned int
34581 ix86_autovectorize_vector_sizes (void)
34582 {
34583 return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
34584 }
34585
34586 /* Initialize the GCC target structure. */
34587 #undef TARGET_RETURN_IN_MEMORY
34588 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34589
34590 #undef TARGET_LEGITIMIZE_ADDRESS
34591 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34592
34593 #undef TARGET_ATTRIBUTE_TABLE
34594 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34595 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34596 # undef TARGET_MERGE_DECL_ATTRIBUTES
34597 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34598 #endif
34599
34600 #undef TARGET_COMP_TYPE_ATTRIBUTES
34601 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34602
34603 #undef TARGET_INIT_BUILTINS
34604 #define TARGET_INIT_BUILTINS ix86_init_builtins
34605 #undef TARGET_BUILTIN_DECL
34606 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34607 #undef TARGET_EXPAND_BUILTIN
34608 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34609
34610 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34611 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34612 ix86_builtin_vectorized_function
34613
34614 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34615 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34616
34617 #undef TARGET_BUILTIN_RECIPROCAL
34618 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34619
34620 #undef TARGET_ASM_FUNCTION_EPILOGUE
34621 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34622
34623 #undef TARGET_ENCODE_SECTION_INFO
34624 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34625 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34626 #else
34627 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34628 #endif
34629
34630 #undef TARGET_ASM_OPEN_PAREN
34631 #define TARGET_ASM_OPEN_PAREN ""
34632 #undef TARGET_ASM_CLOSE_PAREN
34633 #define TARGET_ASM_CLOSE_PAREN ""
34634
34635 #undef TARGET_ASM_BYTE_OP
34636 #define TARGET_ASM_BYTE_OP ASM_BYTE
34637
34638 #undef TARGET_ASM_ALIGNED_HI_OP
34639 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34640 #undef TARGET_ASM_ALIGNED_SI_OP
34641 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34642 #ifdef ASM_QUAD
34643 #undef TARGET_ASM_ALIGNED_DI_OP
34644 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34645 #endif
34646
34647 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34648 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34649
34650 #undef TARGET_ASM_UNALIGNED_HI_OP
34651 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34652 #undef TARGET_ASM_UNALIGNED_SI_OP
34653 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34654 #undef TARGET_ASM_UNALIGNED_DI_OP
34655 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34656
34657 #undef TARGET_PRINT_OPERAND
34658 #define TARGET_PRINT_OPERAND ix86_print_operand
34659 #undef TARGET_PRINT_OPERAND_ADDRESS
34660 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34661 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34662 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34663 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34664 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34665
34666 #undef TARGET_SCHED_INIT_GLOBAL
34667 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34668 #undef TARGET_SCHED_ADJUST_COST
34669 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34670 #undef TARGET_SCHED_ISSUE_RATE
34671 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34672 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34673 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34674 ia32_multipass_dfa_lookahead
34675
34676 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34677 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34678
34679 #ifdef HAVE_AS_TLS
34680 #undef TARGET_HAVE_TLS
34681 #define TARGET_HAVE_TLS true
34682 #endif
34683 #undef TARGET_CANNOT_FORCE_CONST_MEM
34684 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34685 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34686 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34687
34688 #undef TARGET_DELEGITIMIZE_ADDRESS
34689 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34690
34691 #undef TARGET_MS_BITFIELD_LAYOUT_P
34692 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34693
34694 #if TARGET_MACHO
34695 #undef TARGET_BINDS_LOCAL_P
34696 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34697 #endif
34698 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34699 #undef TARGET_BINDS_LOCAL_P
34700 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34701 #endif
34702
34703 #undef TARGET_ASM_OUTPUT_MI_THUNK
34704 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34705 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34706 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34707
34708 #undef TARGET_ASM_FILE_START
34709 #define TARGET_ASM_FILE_START x86_file_start
34710
34711 #undef TARGET_OPTION_OVERRIDE
34712 #define TARGET_OPTION_OVERRIDE ix86_option_override
34713
34714 #undef TARGET_REGISTER_MOVE_COST
34715 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34716 #undef TARGET_MEMORY_MOVE_COST
34717 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34718 #undef TARGET_RTX_COSTS
34719 #define TARGET_RTX_COSTS ix86_rtx_costs
34720 #undef TARGET_ADDRESS_COST
34721 #define TARGET_ADDRESS_COST ix86_address_cost
34722
34723 #undef TARGET_FIXED_CONDITION_CODE_REGS
34724 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34725 #undef TARGET_CC_MODES_COMPATIBLE
34726 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34727
34728 #undef TARGET_MACHINE_DEPENDENT_REORG
34729 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34730
34731 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34732 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34733
34734 #undef TARGET_BUILD_BUILTIN_VA_LIST
34735 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34736
34737 #undef TARGET_ENUM_VA_LIST_P
34738 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34739
34740 #undef TARGET_FN_ABI_VA_LIST
34741 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34742
34743 #undef TARGET_CANONICAL_VA_LIST_TYPE
34744 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34745
34746 #undef TARGET_EXPAND_BUILTIN_VA_START
34747 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34748
34749 #undef TARGET_MD_ASM_CLOBBERS
34750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34751
34752 #undef TARGET_PROMOTE_PROTOTYPES
34753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34754 #undef TARGET_STRUCT_VALUE_RTX
34755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34756 #undef TARGET_SETUP_INCOMING_VARARGS
34757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34758 #undef TARGET_MUST_PASS_IN_STACK
34759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34760 #undef TARGET_FUNCTION_ARG_ADVANCE
34761 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34762 #undef TARGET_FUNCTION_ARG
34763 #define TARGET_FUNCTION_ARG ix86_function_arg
34764 #undef TARGET_FUNCTION_ARG_BOUNDARY
34765 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34766 #undef TARGET_PASS_BY_REFERENCE
34767 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34768 #undef TARGET_INTERNAL_ARG_POINTER
34769 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34770 #undef TARGET_UPDATE_STACK_BOUNDARY
34771 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34772 #undef TARGET_GET_DRAP_RTX
34773 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34774 #undef TARGET_STRICT_ARGUMENT_NAMING
34775 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34776 #undef TARGET_STATIC_CHAIN
34777 #define TARGET_STATIC_CHAIN ix86_static_chain
34778 #undef TARGET_TRAMPOLINE_INIT
34779 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34780 #undef TARGET_RETURN_POPS_ARGS
34781 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34782
34783 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34784 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34785
34786 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34787 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34788
34789 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34790 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34791
34792 #undef TARGET_C_MODE_FOR_SUFFIX
34793 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34794
34795 #ifdef HAVE_AS_TLS
34796 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34797 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34798 #endif
34799
34800 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34801 #undef TARGET_INSERT_ATTRIBUTES
34802 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34803 #endif
34804
34805 #undef TARGET_MANGLE_TYPE
34806 #define TARGET_MANGLE_TYPE ix86_mangle_type
34807
34808 #ifndef TARGET_MACHO
34809 #undef TARGET_STACK_PROTECT_FAIL
34810 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34811 #endif
34812
34813 #undef TARGET_FUNCTION_VALUE
34814 #define TARGET_FUNCTION_VALUE ix86_function_value
34815
34816 #undef TARGET_FUNCTION_VALUE_REGNO_P
34817 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34818
34819 #undef TARGET_SECONDARY_RELOAD
34820 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34821
34822 #undef TARGET_PREFERRED_RELOAD_CLASS
34823 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34824 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34825 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34826 #undef TARGET_CLASS_LIKELY_SPILLED_P
34827 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34828
34829 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34830 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34831 ix86_builtin_vectorization_cost
34832 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34833 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34834 ix86_vectorize_builtin_vec_perm
34835 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34836 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34837 ix86_vectorize_builtin_vec_perm_ok
34838 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34839 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34840 ix86_preferred_simd_mode
34841 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34842 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34843 ix86_autovectorize_vector_sizes
34844
34845 #undef TARGET_SET_CURRENT_FUNCTION
34846 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34847
34848 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34849 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34850
34851 #undef TARGET_OPTION_SAVE
34852 #define TARGET_OPTION_SAVE ix86_function_specific_save
34853
34854 #undef TARGET_OPTION_RESTORE
34855 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34856
34857 #undef TARGET_OPTION_PRINT
34858 #define TARGET_OPTION_PRINT ix86_function_specific_print
34859
34860 #undef TARGET_CAN_INLINE_P
34861 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34862
34863 #undef TARGET_EXPAND_TO_RTL_HOOK
34864 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34865
34866 #undef TARGET_LEGITIMATE_ADDRESS_P
34867 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34868
34869 #undef TARGET_LEGITIMATE_CONSTANT_P
34870 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
34871
34872 #undef TARGET_FRAME_POINTER_REQUIRED
34873 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34874
34875 #undef TARGET_CAN_ELIMINATE
34876 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34877
34878 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34879 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34880
34881 #undef TARGET_ASM_CODE_END
34882 #define TARGET_ASM_CODE_END ix86_code_end
34883
34884 #undef TARGET_CONDITIONAL_REGISTER_USAGE
34885 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
34886
34887 #if TARGET_MACHO
34888 #undef TARGET_INIT_LIBFUNCS
34889 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
34890 #endif
34891
34892 struct gcc_target targetm = TARGET_INITIALIZER;
34893 \f
34894 #include "gt-i386.h"