1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 /* This is used for communication between ASM_OUTPUT_LABEL and
46 ASM_OUTPUT_LABELREF. */
47 int ia64_asm_output_label
= 0;
49 /* Define the information needed to generate branch and scc insns. This is
50 stored from the compare operation. */
51 struct rtx_def
* ia64_compare_op0
;
52 struct rtx_def
* ia64_compare_op1
;
54 /* Register number where ar.pfs was saved in the prologue, or zero
55 if it was not saved. */
59 /* Register number where rp was saved in the prologue, or zero if it was
64 /* Register number where frame pointer was saved in the prologue, or zero
65 if it was not saved. */
69 /* Number of input and local registers used. This is needed for the .regstk
70 directive, and also for debugging info. */
75 /* If true, then we must emit a .regstk directive. */
79 /* Register names for ia64_expand_prologue. */
80 char *ia64_reg_numbers
[96] =
81 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
82 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
83 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
84 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
85 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
86 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
87 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
88 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
89 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
90 "r104","r105","r106","r107","r108","r109","r110","r111",
91 "r112","r113","r114","r115","r116","r117","r118","r119",
92 "r120","r121","r122","r123","r124","r125","r126","r127"};
94 /* ??? These strings could be shared with REGISTER_NAMES. */
95 char *ia64_input_reg_names
[8] =
96 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
98 /* ??? These strings could be shared with REGISTER_NAMES. */
99 char *ia64_local_reg_names
[80] =
100 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
101 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
102 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
103 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
104 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
105 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
106 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
107 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
108 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
109 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
111 /* ??? These strings could be shared with REGISTER_NAMES. */
112 char *ia64_output_reg_names
[8] =
113 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
115 /* String used with the -mfixed-range= option. */
116 const char *ia64_fixed_range_string
;
118 /* Variables which are this size or smaller are put in the sdata/sbss
121 int ia64_section_threshold
;
123 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
126 call_operand (op
, mode
)
128 enum machine_mode mode
;
130 if (mode
!= GET_MODE (op
))
133 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
134 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
137 /* Return 1 if OP refers to a symbol in the sdata section. */
140 sdata_symbolic_operand (op
, mode
)
142 enum machine_mode mode
;
144 switch (GET_CODE (op
))
147 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
150 return (GET_CODE (XEXP (op
, 0)) == PLUS
151 && GET_CODE (XEXP (XEXP (op
, 0), 0)) == SYMBOL_REF
152 && XSTR (XEXP (XEXP (op
, 0), 0), 0)[0] == SDATA_NAME_FLAG_CHAR
);
160 /* Return 1 if OP refers to a symbol. */
163 symbolic_operand (op
, mode
)
165 enum machine_mode mode
;
167 switch (GET_CODE (op
))
180 /* Return 1 if OP refers to a function. */
183 function_operand (op
, mode
)
185 enum machine_mode mode
;
187 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
193 /* Return 1 if OP is setjmp or a similar function. */
195 /* ??? This is an unsatisfying solution. Should rethink. */
198 setjmp_operand (op
, mode
)
200 enum machine_mode mode
;
205 if (GET_CODE (op
) != SYMBOL_REF
)
210 /* The following code is borrowed from special_function_p in calls.c. */
212 /* Disregard prefix _, __ or __x. */
215 if (name
[1] == '_' && name
[2] == 'x')
217 else if (name
[1] == '_')
227 && (! strcmp (name
, "setjmp")
228 || ! strcmp (name
, "setjmp_syscall")))
230 && ! strcmp (name
, "sigsetjmp"))
232 && ! strcmp (name
, "savectx")));
234 else if ((name
[0] == 'q' && name
[1] == 's'
235 && ! strcmp (name
, "qsetjmp"))
236 || (name
[0] == 'v' && name
[1] == 'f'
237 && ! strcmp (name
, "vfork")))
243 /* Return 1 if OP is a general operand, but when pic exclude symbolic
246 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
247 from PREDICATE_CODES. */
250 move_operand (op
, mode
)
252 enum machine_mode mode
;
254 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
257 return general_operand (op
, mode
);
260 /* Return 1 if OP is a register operand, or zero. */
263 reg_or_0_operand (op
, mode
)
265 enum machine_mode mode
;
267 return (op
== const0_rtx
|| register_operand (op
, mode
));
270 /* Return 1 if OP is a register operand, or a 6 bit immediate operand. */
273 reg_or_6bit_operand (op
, mode
)
275 enum machine_mode mode
;
277 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
278 || GET_CODE (op
) == CONSTANT_P_RTX
279 || register_operand (op
, mode
));
282 /* Return 1 if OP is a register operand, or an 8 bit immediate operand. */
285 reg_or_8bit_operand (op
, mode
)
287 enum machine_mode mode
;
289 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
290 || GET_CODE (op
) == CONSTANT_P_RTX
291 || register_operand (op
, mode
));
294 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
298 reg_or_8bit_adjusted_operand (op
, mode
)
300 enum machine_mode mode
;
302 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
303 || GET_CODE (op
) == CONSTANT_P_RTX
304 || register_operand (op
, mode
));
307 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
308 immediate and an 8 bit adjusted immediate operand. This is necessary
309 because when we emit a compare, we don't know what the condition will be,
310 so we need the union of the immediates accepted by GT and LT. */
313 reg_or_8bit_and_adjusted_operand (op
, mode
)
315 enum machine_mode mode
;
317 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
318 && CONST_OK_FOR_L (INTVAL (op
)))
319 || GET_CODE (op
) == CONSTANT_P_RTX
320 || register_operand (op
, mode
));
323 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
326 reg_or_14bit_operand (op
, mode
)
328 enum machine_mode mode
;
330 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
331 || GET_CODE (op
) == CONSTANT_P_RTX
332 || register_operand (op
, mode
));
335 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
338 reg_or_22bit_operand (op
, mode
)
340 enum machine_mode mode
;
342 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
343 || GET_CODE (op
) == CONSTANT_P_RTX
344 || register_operand (op
, mode
));
347 /* Return 1 if OP is a 6 bit immediate operand. */
350 shift_count_operand (op
, mode
)
352 enum machine_mode mode
;
354 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
355 || GET_CODE (op
) == CONSTANT_P_RTX
);
358 /* Return 1 if OP is a 5 bit immediate operand. */
361 shift_32bit_count_operand (op
, mode
)
363 enum machine_mode mode
;
365 return ((GET_CODE (op
) == CONST_INT
366 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
367 || GET_CODE (op
) == CONSTANT_P_RTX
);
370 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
373 shladd_operand (op
, mode
)
375 enum machine_mode mode
;
377 return (GET_CODE (op
) == CONST_INT
378 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
379 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
382 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
385 fetchadd_operand (op
, mode
)
387 enum machine_mode mode
;
389 return (GET_CODE (op
) == CONST_INT
390 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
391 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
392 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
393 INTVAL (op
) == 8 || INTVAL (op
) == 16));
396 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
399 reg_or_fp01_operand (op
, mode
)
401 enum machine_mode mode
;
403 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
404 || GET_CODE (op
) == CONSTANT_P_RTX
405 || register_operand (op
, mode
));
408 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
409 signed immediate operand. */
412 normal_comparison_operator (op
, mode
)
414 enum machine_mode mode
;
416 enum rtx_code code
= GET_CODE (op
);
417 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
418 && (code
== EQ
|| code
== NE
419 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
422 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
423 signed immediate operand. */
426 adjusted_comparison_operator (op
, mode
)
428 enum machine_mode mode
;
430 enum rtx_code code
= GET_CODE (op
);
431 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
432 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
435 /* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL
436 and the first section has already been tested. */
439 call_multiple_values_operation (op
, mode
)
441 enum machine_mode mode ATTRIBUTE_UNUSED
;
443 int count
= XVECLEN (op
, 0) - 2;
447 /* Perform a quick check so we don't block up below. */
449 || GET_CODE (XVECEXP (op
, 0, 0)) != SET
450 || GET_CODE (SET_DEST (XVECEXP (op
, 0, 0))) != REG
451 || GET_CODE (SET_SRC (XVECEXP (op
, 0, 0))) != CALL
)
454 dest_regno
= REGNO (SET_DEST (XVECEXP (op
, 0, 0)));
456 for (i
= 1; i
< count
; i
++)
458 rtx elt
= XVECEXP (op
, 0, i
+ 2);
460 if (GET_CODE (elt
) != SET
461 || GET_CODE (SET_SRC (elt
)) != CALL
462 || GET_CODE (SET_DEST (elt
)) != REG
463 || REGNO (SET_DEST (elt
)) != dest_regno
+ i
)
471 /* Structure to be filled in by ia64_compute_frame_size with register
472 save masks and offsets for the current function. */
474 struct ia64_frame_info
476 long total_size
; /* # bytes that the entire frame takes up. */
477 long var_size
; /* # bytes that variables take up. */
478 long args_size
; /* # bytes that outgoing arguments take up. */
479 long pretend_size
; /* # bytes that stdarg arguments take up. */
480 long pretend_pad_size
; /* # bytes padding to align stdarg args. */
481 long extra_size
; /* # bytes of extra gunk. */
482 long gr_size
; /* # bytes needed to store general regs. */
483 long fr_size
; /* # bytes needed to store FP regs. */
484 long fr_pad_size
; /* # bytes needed to align FP save area. */
485 long pr_size
; /* # bytes needed to store predicate regs. */
486 long br_size
; /* # bytes needed to store branch regs. */
487 HARD_REG_SET mask
; /* mask of saved registers. */
488 int initialized
; /* != 0 is frame size already calculated. */
491 /* Current frame information calculated by compute_frame_size. */
492 struct ia64_frame_info current_frame_info
;
494 /* Helper function for INITIAL_ELIMINATION_OFFSET. Return the offset from the
495 frame pointer where b0 is saved. */
498 ia64_rap_fp_offset ()
500 return - current_frame_info
.br_size
;
503 /* Returns the number of bytes offset between the frame pointer and the stack
504 pointer for the current function. SIZE is the number of bytes of space
505 needed for local variables. */
507 ia64_compute_frame_size (size
)
517 int pretend_pad_size
= 0;
522 /* Reload used to round the frame size to STACK_BOUNDARY. Now we do it
524 size
= IA64_STACK_ALIGN (size
);
526 CLEAR_HARD_REG_SET (mask
);
528 /* Calculate space needed for general registers. */
529 /* We never need to save any of the stacked registers, which are regs
531 for (regno
= GR_REG (0); regno
<= GR_REG (31); regno
++)
532 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
534 SET_HARD_REG_BIT (mask
, regno
);
538 /* Allocate space to save/restore the unat from. */
540 || current_function_varargs
|| current_function_stdarg
)
543 /* Calculate space needed for FP registers. */
544 for (regno
= FR_REG (0); regno
<= FR_REG (127); regno
++)
545 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
547 SET_HARD_REG_BIT (mask
, regno
);
551 /* Calculate space needed for predicate registers. */
552 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
553 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
555 SET_HARD_REG_BIT (mask
, regno
);
559 /* Calculate space needed for branch registers. */
560 for (regno
= BR_REG (0); regno
<= BR_REG (7); regno
++)
561 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
563 SET_HARD_REG_BIT (mask
, regno
);
567 /* The FR save area needs to be 16-byte aligned. */
570 tmp
= (size
+ fr_size
+ br_size
);
571 fr_pad_size
= IA64_STACK_ALIGN (tmp
) - tmp
;
576 /* If we have an odd number of words of pretend arguments written to the
577 stack, then the FR save area will be unaligned. We pad below this area
578 to keep things 16 byte aligned. This needs to be kept distinct, to
579 avoid confusing it with padding added below the GR save area, which does
580 not affect the FR area alignment. */
581 pretend_pad_size
= current_function_pretend_args_size
% 16;
583 /* The 16 bytes is for the scratch area. */
584 tmp
= (size
+ gr_size
+ fr_pad_size
+ fr_size
+ pr_size
+ br_size
585 + current_function_outgoing_args_size
+ 16);
586 tmp
+= (current_function_pretend_args_size
587 ? current_function_pretend_args_size
- 16
588 : 0) + pretend_pad_size
;
589 total_size
= IA64_STACK_ALIGN (tmp
);
590 extra_size
= total_size
- tmp
+ 16;
592 /* If this is a leaf routine (BR_REG (0) is not live), and if there is no
593 stack space needed for register saves, then don't allocate the 16 byte
595 if (total_size
== 16 && ! regs_ever_live
[BR_REG (0)])
601 current_frame_info
.total_size
= total_size
;
602 current_frame_info
.var_size
= size
;
603 current_frame_info
.args_size
= current_function_outgoing_args_size
;
604 current_frame_info
.pretend_size
605 = (current_function_pretend_args_size
606 ? current_function_pretend_args_size
- 16
608 current_frame_info
.pretend_pad_size
= pretend_pad_size
;
609 current_frame_info
.extra_size
= extra_size
;
610 current_frame_info
.gr_size
= gr_size
;
611 current_frame_info
.fr_size
= fr_size
;
612 current_frame_info
.fr_pad_size
= fr_pad_size
;
613 current_frame_info
.pr_size
= pr_size
;
614 current_frame_info
.br_size
= br_size
;
615 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
616 current_frame_info
.initialized
= reload_completed
;
622 save_restore_insns (save_p
)
627 if (current_frame_info
.gr_size
+ current_frame_info
.fr_size
628 + current_frame_info
.br_size
+ current_frame_info
.pr_size
)
630 rtx tmp_reg
= gen_rtx_REG (DImode
, GR_REG (2));
631 rtx tmp_post_inc
= gen_rtx_POST_INC (DImode
, tmp_reg
);
632 rtx tmp2_reg
= gen_rtx_REG (DImode
, GR_REG (3));
633 int offset
= (current_frame_info
.total_size
634 - (current_frame_info
.gr_size
+ current_frame_info
.fr_size
635 + current_frame_info
.fr_pad_size
636 + current_frame_info
.br_size
637 + current_frame_info
.pr_size
638 + current_frame_info
.var_size
639 + current_frame_info
.pretend_size
640 + current_frame_info
.pretend_pad_size
));
644 /* If there is a frame pointer, then we use it instead of the stack
645 pointer, so that the stack pointer does not need to be valid when
646 the epilogue starts. See EXIT_IGNORE_STACK. */
647 if (frame_pointer_needed
)
648 offset
= offset
- current_frame_info
.total_size
;
650 if (CONST_OK_FOR_I (offset
))
651 offset_rtx
= GEN_INT (offset
);
654 offset_rtx
= tmp_reg
;
655 insn
= emit_insn (gen_movdi (tmp_reg
, GEN_INT (offset
)));
657 RTX_FRAME_RELATED_P (insn
) = 1;
659 insn
= emit_insn (gen_adddi3 (tmp_reg
,
660 (frame_pointer_needed
? frame_pointer_rtx
661 : stack_pointer_rtx
),
664 RTX_FRAME_RELATED_P (insn
) = 1;
666 /* If one is used, we save/restore all of them. */
667 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
668 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
670 rtx mem
= gen_rtx_MEM (DImode
, tmp_post_inc
);
673 insn
= emit_insn (gen_pr_spill (tmp2_reg
));
674 RTX_FRAME_RELATED_P (insn
) = 1;
675 insn
= emit_insn (gen_movdi (mem
, tmp2_reg
));
676 RTX_FRAME_RELATED_P (insn
) = 1;
680 insn
= emit_insn (gen_movdi (tmp2_reg
, mem
));
681 insn
= emit_insn (gen_pr_restore (tmp2_reg
));
686 /* Must save/restore ar.unat if any GR is spilled/restored. */
687 if (current_frame_info
.gr_size
!= 0
688 || current_function_varargs
|| current_function_stdarg
)
690 rtx mem
= gen_rtx_MEM (DImode
, tmp_post_inc
);
693 insn
= emit_insn (gen_unat_spill (tmp2_reg
));
695 RTX_FRAME_RELATED_P (insn
) = 1;
696 insn
= emit_insn (gen_movdi (mem
, tmp2_reg
));
698 RTX_FRAME_RELATED_P (insn
) = 1;
702 insn
= emit_insn (gen_movdi (tmp2_reg
, mem
));
704 RTX_FRAME_RELATED_P (insn
) = 1;
705 /* The restore happens after the last ld8.fill instruction. */
709 for (regno
= GR_REG (0); regno
<= GR_REG (127); regno
++)
710 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
712 rtx mem
= gen_rtx_MEM (DImode
, tmp_post_inc
);
714 insn
= emit_insn (gen_gr_spill (mem
,
715 gen_rtx_REG (DImode
, regno
)));
717 insn
= emit_insn (gen_gr_restore (gen_rtx_REG (DImode
, regno
),
720 RTX_FRAME_RELATED_P (insn
) = 1;
723 /* Now restore the unat register if necessary. */
724 if ((current_frame_info
.gr_size
!= 0
725 || current_function_varargs
|| current_function_stdarg
)
727 emit_insn (gen_unat_restore (tmp2_reg
));
729 for (regno
= FR_REG (0); regno
<= FR_REG (127); regno
++)
730 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
732 rtx mem
= gen_rtx_MEM (XFmode
, tmp_post_inc
);
734 insn
= emit_insn (gen_fr_spill (mem
,
735 gen_rtx_REG (XFmode
, regno
)));
737 insn
= emit_insn (gen_fr_restore (gen_rtx_REG (XFmode
, regno
),
740 RTX_FRAME_RELATED_P (insn
) = 1;
743 for (regno
= BR_REG (0); regno
<= BR_REG (7); regno
++)
744 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
750 src
= gen_rtx_REG (DImode
, regno
);
751 dest
= gen_rtx_MEM (DImode
, tmp_post_inc
);
755 src
= gen_rtx_MEM (DImode
, tmp_post_inc
);
756 dest
= gen_rtx_REG (DImode
, regno
);
759 insn
= emit_insn (gen_movdi (tmp2_reg
, src
));
761 RTX_FRAME_RELATED_P (insn
) = 1;
762 insn
= emit_insn (gen_movdi (dest
, tmp2_reg
));
764 RTX_FRAME_RELATED_P (insn
) = 1;
770 /* Called after register allocation to add any instructions needed for the
771 prologue. Using a prologue insn is favored compared to putting all of the
772 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
773 to intermix instructions with the saves of the caller saved registers. In
774 some cases, it might be necessary to emit a barrier instruction as the last
775 insn to prevent such scheduling.
777 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
778 so that the debug info generation code can handle them properly. */
780 /* ??? Get inefficient code when the frame size is larger than can fit in an
783 /* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the
786 /* ??? Should not reserve a local register for rp/ar.pfs. Should
787 instead check to see if any local registers are unused, and if so,
788 allocate them to rp/ar.pfs in that order. Not sure what to do about
789 fp, we may still need to reserve a local register for it. */
792 ia64_expand_prologue ()
795 int i
, locals
, inputs
, outputs
, rotates
;
796 int frame_size
= ia64_compute_frame_size (get_frame_size ());
801 /* ??? This seems like a leaf_function_p bug. It calls get_insns which
802 returns the first insn of the current sequence, not the first insn
803 of the function. We work around this by pushing to the topmost
805 push_topmost_sequence ();
806 leaf_function
= leaf_function_p ();
807 pop_topmost_sequence ();
809 /* If there is no epilogue, then we don't need some prologue insns. We
810 need to avoid emitting the dead prologue insns, because flow will complain
814 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
815 if ((e
->flags
& EDGE_FAKE
) == 0
816 && (e
->flags
& EDGE_FALLTHRU
) != 0)
818 epilogue_p
= (e
!= NULL
);
823 /* Find the highest local register used. */
824 /* We have only 80 local registers, because we reserve 8 for the inputs
825 and 8 for the outputs. */
827 for (i
= LOC_REG (79); i
>= LOC_REG (0); i
--)
828 if (regs_ever_live
[i
])
830 locals
= i
- LOC_REG (0) + 1;
832 /* Likewise for inputs. */
834 for (i
= IN_REG (7); i
>= IN_REG (0); i
--)
835 if (regs_ever_live
[i
])
837 inputs
= i
- IN_REG (0) + 1;
840 /* If the function was declared with syscall_linkage, then we may need to
841 preserve all declared input registers, even if they weren't used.
842 Currently, syscall_linkage does not have this effect. */
844 if (lookup_attribute ("syscall_linkage",
845 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
846 inputs
= MAX (inputs
, current_function_args_info
.words
);
849 /* Likewise for outputs. */
851 for (i
= OUT_REG (7); i
>= OUT_REG (0); i
--)
852 if (regs_ever_live
[i
])
854 outputs
= i
- OUT_REG (0) + 1;
856 /* When -p profiling, we need one output register for the mcount argument.
857 Likwise for -a profiling for the bb_init_func argument. For -ax
858 profiling, we need two output registers for the two bb_init_trace_func
860 if (profile_flag
|| profile_block_flag
== 1)
861 outputs
= MAX (outputs
, 1);
862 else if (profile_block_flag
== 2)
863 outputs
= MAX (outputs
, 2);
865 /* No rotating register support as yet. */
869 /* Allocate two extra locals for saving/restoring rp and ar.pfs. Also
870 allocate one local for use as the frame pointer if frame_pointer_needed
872 /* ??? If this is a leaf function, then we aren't using one of these local
873 registers for the RP anymore. */
874 locals
+= 2 + frame_pointer_needed
;
876 /* Save these values in global registers for debugging info. */
877 ia64_input_regs
= inputs
;
878 ia64_local_regs
= locals
;
880 /* Set the local, input, and output register names. We need to do this
881 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
882 half. If we use in/loc/out register names, then we get assembler errors
883 in crtn.S because there is no alloc insn or regstk directive in there.
884 We give in/loc/out names to unused registers, to make invalid uses of
885 them easy to spot. */
886 if (! TARGET_REG_NAMES
)
888 for (i
= 0; i
< 8; i
++)
891 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
893 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
895 for (i
= 0; i
< 80; i
++)
898 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
900 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
902 for (i
= 0; i
< 8; i
++)
905 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
907 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
911 /* Set the frame pointer register name now that it is known, and the
912 local register names are known. */
913 if (frame_pointer_needed
)
915 reg_names
[FRAME_POINTER_REGNUM
]
916 = reg_names
[LOC_REG (locals
- 3)];
917 ia64_fp_regno
= LOC_REG (inputs
+ locals
- 3);
922 /* We don't need an alloc instruction if this is a leaf function, and the
923 locals and outputs are both zero sized. Since we have already allocated
924 two locals for rp and ar.pfs, we check for two locals. */
925 /* Leaf functions can use output registers as call-clobbered temporaries. */
926 if (locals
== 2 && outputs
== 0 && leaf_function
)
928 /* If there is no alloc, but there are input registers used, then we
929 need a .regstk directive. */
930 if (TARGET_REG_NAMES
)
931 ia64_need_regstk
= 1;
933 ia64_need_regstk
= 0;
935 ia64_arpfs_regno
= 0;
940 ia64_need_regstk
= 0;
941 ia64_arpfs_regno
= LOC_REG (locals
- 1);
943 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, ia64_arpfs_regno
),
944 GEN_INT (inputs
), GEN_INT (locals
),
945 GEN_INT (outputs
), GEN_INT (rotates
)));
946 RTX_FRAME_RELATED_P (insn
) = 1;
948 /* Emit a save of BR_REG (0) if we call other functions.
949 Do this even if this function doesn't return, as EH
950 depends on this to be able to unwind the stack. */
955 ia64_rp_regno
= LOC_REG (locals
- 2);
956 reg_names
[RETURN_ADDRESS_REGNUM
] = reg_names
[ia64_rp_regno
];
958 ia64_rp_reg
= gen_rtx_REG (DImode
, ia64_rp_regno
);
959 insn
= emit_move_insn (ia64_rp_reg
, gen_rtx_REG (DImode
,
961 RTX_FRAME_RELATED_P (insn
) = 1;
964 /* If we don't have an epilogue, then the return value
965 doesn't appear to be needed and the above store will
966 appear dead and will elicit a warning from flow. */
967 emit_insn (gen_rtx_USE (VOIDmode
, ia64_rp_reg
));
974 /* Set up frame pointer and stack pointer. */
975 if (frame_pointer_needed
)
977 insn
= emit_insn (gen_movdi (hard_frame_pointer_rtx
, stack_pointer_rtx
));
978 RTX_FRAME_RELATED_P (insn
) = 1;
982 if (CONST_OK_FOR_I (-frame_size
))
983 offset
= GEN_INT (-frame_size
);
986 offset
= gen_rtx_REG (DImode
, GR_REG (2));
987 insn
= emit_insn (gen_movdi (offset
, GEN_INT (-frame_size
)));
988 RTX_FRAME_RELATED_P (insn
) = 1;
990 /* If there is a frame pointer, then we need to make the stack pointer
991 decrement depend on the frame pointer, so that the stack pointer
992 update won't be moved past fp-relative stores to the frame. */
993 if (frame_pointer_needed
)
994 insn
= emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx
,
997 hard_frame_pointer_rtx
));
999 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1001 RTX_FRAME_RELATED_P (insn
) = 1;
1004 /* Save registers to frame. */
1005 save_restore_insns (1);
1008 /* Called after register allocation to add any instructions needed for the
1009 epilogue. Using a epilogue insn is favored compared to putting all of the
1010 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1011 to intermix instructions with the saves of the caller saved registers. In
1012 some cases, it might be necessary to emit a barrier instruction as the last
1013 insn to prevent such scheduling. */
1016 ia64_expand_epilogue ()
1018 /* Restore registers from frame. */
1019 save_restore_insns (0);
1021 /* ??? The gen_epilogue_deallocate_stack call below does not work. This
1022 is mainly because there is no fp+offset addressing mode, so most loads
1023 from the frame do not actually use the frame pointer; they use a pseudo
1024 computed from the frame pointer. The same problem exists with the
1025 stack pointer when there is no frame pointer. I think this can be
1026 fixed only by making the dependency analysis code in sched smarter, so
1027 that it recognizes references to the frame, and makes succeeding stack
1028 pointer updates anti-dependent on them. */
1029 emit_insn (gen_blockage ());
1031 if (cfun
->machine
->ia64_eh_epilogue_sp
== NULL_RTX
)
1033 if (frame_pointer_needed
)
1035 /* If there is a frame pointer, then we need to make the stack pointer
1036 restore depend on the frame pointer, so that the stack pointer
1037 restore won't be moved up past fp-relative loads from the frame. */
1038 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx
,
1039 hard_frame_pointer_rtx
));
1043 int frame_size
= current_frame_info
.total_size
;
1046 if (frame_size
!= 0)
1048 if (CONST_OK_FOR_I (frame_size
))
1049 offset
= GEN_INT (frame_size
);
1052 offset
= gen_rtx_REG (DImode
, GR_REG (2));
1053 emit_insn (gen_movdi (offset
, GEN_INT (frame_size
)));
1055 emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1060 /* Return via eh_epilogue, so we already have our new stack pointer. */
1062 emit_insn (gen_movdi (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
));
1064 if (ia64_arpfs_regno
)
1065 emit_insn (gen_pfs_restore (gen_rtx_REG (DImode
, ia64_arpfs_regno
)));
1068 emit_move_insn (gen_rtx_REG (DImode
, BR_REG (0)),
1069 gen_rtx_REG (DImode
, ia64_rp_regno
));
1071 if (cfun
->machine
->ia64_eh_epilogue_bsp
!= NULL_RTX
)
1073 /* We have to restore the bsp. */
1074 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
1076 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
1079 /* Emit the function prologue. */
1082 ia64_function_prologue (file
, size
)
1087 if (ia64_need_regstk
)
1088 fprintf (file
, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs
);
1090 if (!flag_unwind_tables
&& (!flag_exceptions
|| exceptions_via_longjmp
))
1093 /* Emit the .prologue directive. in order to do this, we need to find
1094 where the stack pointer is moved toa GR, if it is, and mark it. */
1096 for (insn
= get_insns (); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
1098 if (RTX_FRAME_RELATED_P (insn
) && GET_CODE (insn
) == INSN
)
1100 rtx pat
= PATTERN (insn
);
1101 if (GET_CODE (pat
) == SET
)
1103 rtx dest
= SET_DEST (pat
);
1104 rtx src
= SET_SRC (pat
);
1105 if (GET_CODE (src
) == REG
&& REGNO (src
) == STACK_POINTER_REGNUM
1106 && GET_CODE (dest
) == REG
)
1108 int reg
= REGNO (dest
);
1109 if (REGNO (dest
) == FRAME_POINTER_REGNUM
)
1110 reg
= ia64_fp_regno
;
1111 fprintf (file
, "\t.prologue 0x2, %d\n", reg
);
1117 if (insn
== NULL_RTX
)
1118 fprintf (file
, "\t.prologue\n");
1121 /* Emit the function epilogue. */
1124 ia64_function_epilogue (file
, size
)
1130 /* Return 1 if br.ret can do all the work required to return from a
1134 ia64_direct_return ()
1136 return (reload_completed
&& ! frame_pointer_needed
1137 && ia64_compute_frame_size (get_frame_size ()));
1141 /* Do any needed setup for a variadic function. CUM has not been updated
1142 for the last named argument which has type TYPE and mode MODE. */
1144 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
1145 CUMULATIVE_ARGS cum
;
1151 /* If this is a stdarg function, then don't save the current argument. */
1152 int offset
= ! current_function_varargs
;
1154 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
1159 int first_reg
= GR_ARG_FIRST
+ cum
.words
+ offset
;
1160 rtx tmp_reg
= gen_rtx_REG (DImode
, GR_REG (16));
1161 rtx tmp_post_inc
= gen_rtx_POST_INC (DImode
, tmp_reg
);
1162 rtx mem
= gen_rtx_MEM (DImode
, tmp_post_inc
);
1165 /* We must emit st8.spill insns instead of st8 because we might
1166 be saving non-argument registers, and non-argument registers might
1167 not contain valid values. */
1168 emit_move_insn (tmp_reg
, virtual_incoming_args_rtx
);
1169 for (i
= first_reg
; i
< GR_ARG_FIRST
+ 8; i
++)
1171 insn
= emit_insn (gen_gr_spill (mem
, gen_rtx_REG (DImode
, i
)));
1172 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_INC
, tmp_reg
, 0);
1175 *pretend_size
= ((MAX_ARGUMENT_SLOTS
- cum
.words
- offset
)
1180 /* Check whether TYPE is a homogeneous floating point aggregate. If
1181 it is, return the mode of the floating point type that appears
1182 in all leafs. If it is not, return VOIDmode.
1184 An aggregate is a homogeneous floating point aggregate is if all
1185 fields/elements in it have the same floating point type (e.g,
1186 SFmode). 128-bit quad-precision floats are excluded. */
1188 static enum machine_mode
1189 hfa_element_mode (type
, nested
)
1193 enum machine_mode element_mode
= VOIDmode
;
1194 enum machine_mode mode
;
1195 enum tree_code code
= TREE_CODE (type
);
1196 int know_element_mode
= 0;
1201 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
1202 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
1203 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
1204 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
1208 /* Fortran complex types are supposed to be HFAs, so we need to handle
1209 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
1212 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
1213 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
1214 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
1219 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
1220 mode if this is contained within an aggregate. */
1222 return TYPE_MODE (type
);
1227 return TYPE_MODE (TREE_TYPE (type
));
1231 case QUAL_UNION_TYPE
:
1232 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
1234 if (TREE_CODE (t
) != FIELD_DECL
)
1237 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
1238 if (know_element_mode
)
1240 if (mode
!= element_mode
)
1243 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
1247 know_element_mode
= 1;
1248 element_mode
= mode
;
1251 return element_mode
;
1254 /* If we reach here, we probably have some front-end specific type
1255 that the backend doesn't know about. This can happen via the
1256 aggregate_value_p call in init_function_start. All we can do is
1257 ignore unknown tree types. */
1264 /* Return rtx for register where argument is passed, or zero if it is passed
1267 /* ??? 128-bit quad-precision floats are always passed in general
1271 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
1272 CUMULATIVE_ARGS
*cum
;
1273 enum machine_mode mode
;
1278 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
1279 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
1280 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
1283 enum machine_mode hfa_mode
= VOIDmode
;
1285 /* Arguments larger than 8 bytes start at the next even boundary. */
1286 if (words
> 1 && (cum
->words
& 1))
1289 /* If all argument slots are used, then it must go on the stack. */
1290 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
1293 /* Check for and handle homogeneous FP aggregates. */
1295 hfa_mode
= hfa_element_mode (type
, 0);
1297 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
1298 and unprototyped hfas are passed specially. */
1299 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
1303 int fp_regs
= cum
->fp_regs
;
1304 int int_regs
= cum
->words
+ offset
;
1305 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
1309 /* If prototyped, pass it in FR regs then GR regs.
1310 If not prototyped, pass it in both FR and GR regs.
1312 If this is an SFmode aggregate, then it is possible to run out of
1313 FR regs while GR regs are still left. In that case, we pass the
1314 remaining part in the GR regs. */
1316 /* Fill the FP regs. We do this always. We stop if we reach the end
1317 of the argument, the last FP register, or the last argument slot. */
1319 byte_size
= ((mode
== BLKmode
)
1320 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
1321 args_byte_size
= int_regs
* UNITS_PER_WORD
;
1323 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
1324 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
1326 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
1327 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
1330 /* ??? Padding for XFmode type? */
1332 args_byte_size
+= hfa_size
;
1336 /* If no prototype, then the whole thing must go in GR regs. */
1337 if (! cum
->prototype
)
1339 /* If this is an SFmode aggregate, then we might have some left over
1340 that needs to go in GR regs. */
1341 else if (byte_size
!= offset
)
1342 int_regs
+= offset
/ UNITS_PER_WORD
;
1344 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
1346 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
1348 enum machine_mode gr_mode
= DImode
;
1350 /* If we have an odd 4 byte hunk because we ran out of FR regs,
1351 then this goes in a GR reg left adjusted/little endian, right
1352 adjusted/big endian. */
1353 /* ??? Currently this is handled wrong, because 4-byte hunks are
1354 always right adjusted/little endian. */
1357 /* If we have an even 4 byte hunk because the aggregate is a
1358 multiple of 4 bytes in size, then this goes in a GR reg right
1359 adjusted/little endian. */
1360 else if (byte_size
- offset
== 4)
1363 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
1364 gen_rtx_REG (gr_mode
, (basereg
1367 offset
+= GET_MODE_SIZE (gr_mode
);
1371 /* If we ended up using just one location, just return that one loc. */
1373 return XEXP (loc
[0], 0);
1375 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
1378 /* Integral and aggregates go in general registers. If we have run out of
1379 FR registers, then FP values must also go in general registers. This can
1380 happen when we have a SFmode HFA. */
1381 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
1382 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
1384 /* If there is a prototype, then FP values go in a FR register when
1385 named, and in a GR registeer when unnamed. */
1386 else if (cum
->prototype
)
1389 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
1391 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
1393 /* If there is no prototype, then FP values go in both FR and GR
1397 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
1398 gen_rtx_REG (mode
, (FR_ARG_FIRST
1401 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
1403 (basereg
+ cum
->words
1407 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
1411 /* Return number of words, at the beginning of the argument, that must be
1412 put in registers. 0 is the argument is entirely in registers or entirely
1416 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
1417 CUMULATIVE_ARGS
*cum
;
1418 enum machine_mode mode
;
1422 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
1423 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
1427 /* Arguments larger than 8 bytes start at the next even boundary. */
1428 if (words
> 1 && (cum
->words
& 1))
1431 /* If all argument slots are used, then it must go on the stack. */
1432 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
1435 /* It doesn't matter whether the argument goes in FR or GR regs. If
1436 it fits within the 8 argument slots, then it goes entirely in
1437 registers. If it extends past the last argument slot, then the rest
1438 goes on the stack. */
1440 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
1443 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
1446 /* Update CUM to point after this argument. This is patterned after
1447 ia64_function_arg. */
1450 ia64_function_arg_advance (cum
, mode
, type
, named
)
1451 CUMULATIVE_ARGS
*cum
;
1452 enum machine_mode mode
;
1456 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
1457 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
1460 enum machine_mode hfa_mode
= VOIDmode
;
1462 /* If all arg slots are already full, then there is nothing to do. */
1463 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
1466 /* Arguments larger than 8 bytes start at the next even boundary. */
1467 if (words
> 1 && (cum
->words
& 1))
1470 cum
->words
+= words
+ offset
;
1472 /* Check for and handle homogeneous FP aggregates. */
1474 hfa_mode
= hfa_element_mode (type
, 0);
1476 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
1477 and unprototyped hfas are passed specially. */
1478 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
1480 int fp_regs
= cum
->fp_regs
;
1481 /* This is the original value of cum->words + offset. */
1482 int int_regs
= cum
->words
- words
;
1483 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
1487 /* If prototyped, pass it in FR regs then GR regs.
1488 If not prototyped, pass it in both FR and GR regs.
1490 If this is an SFmode aggregate, then it is possible to run out of
1491 FR regs while GR regs are still left. In that case, we pass the
1492 remaining part in the GR regs. */
1494 /* Fill the FP regs. We do this always. We stop if we reach the end
1495 of the argument, the last FP register, or the last argument slot. */
1497 byte_size
= ((mode
== BLKmode
)
1498 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
1499 args_byte_size
= int_regs
* UNITS_PER_WORD
;
1501 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
1502 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
1504 /* ??? Padding for XFmode type? */
1506 args_byte_size
+= hfa_size
;
1510 cum
->fp_regs
= fp_regs
;
1513 /* Integral and aggregates go in general registers. If we have run out of
1514 FR registers, then FP values must also go in general registers. This can
1515 happen when we have a SFmode HFA. */
1516 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
1519 /* If there is a prototype, then FP values go in a FR register when
1520 named, and in a GR registeer when unnamed. */
1521 else if (cum
->prototype
)
1526 /* ??? Complex types should not reach here. */
1527 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
1529 /* If there is no prototype, then FP values go in both FR and GR
1532 /* ??? Complex types should not reach here. */
1533 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
1538 /* Implement va_start. */
1541 ia64_va_start (stdarg_p
, valist
, nextarg
)
1549 arg_words
= current_function_args_info
.words
;
1554 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
1556 nextarg
= plus_constant (nextarg
, ofs
);
1557 std_expand_builtin_va_start (1, valist
, nextarg
);
1560 /* Implement va_arg. */
1563 ia64_va_arg (valist
, type
)
1569 /* Arguments larger than 8 bytes are 16 byte aligned. */
1570 size
= int_size_in_bytes (type
);
1571 if (size
> UNITS_PER_WORD
)
1573 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
1574 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
1575 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
1576 build_int_2 (-2 * UNITS_PER_WORD
, -1));
1577 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
1578 TREE_SIDE_EFFECTS (t
) = 1;
1579 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
1582 return std_expand_builtin_va_arg (valist
, type
);
1585 /* Return 1 if function return value returned in memory. Return 0 if it is
1589 ia64_return_in_memory (valtype
)
1592 enum machine_mode mode
;
1593 enum machine_mode hfa_mode
;
1596 mode
= TYPE_MODE (valtype
);
1597 byte_size
= ((mode
== BLKmode
)
1598 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
1600 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
1602 hfa_mode
= hfa_element_mode (valtype
, 0);
1603 if (hfa_mode
!= VOIDmode
)
1605 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
1607 /* ??? Padding for XFmode type? */
1608 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
1614 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
1620 /* Return rtx for register that holds the function return value. */
1623 ia64_function_value (valtype
, func
)
1627 enum machine_mode mode
;
1628 enum machine_mode hfa_mode
;
1630 mode
= TYPE_MODE (valtype
);
1631 hfa_mode
= hfa_element_mode (valtype
, 0);
1633 if (hfa_mode
!= VOIDmode
)
1641 hfa_size
= GET_MODE_SIZE (hfa_mode
);
1642 byte_size
= ((mode
== BLKmode
)
1643 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
1645 for (i
= 0; offset
< byte_size
; i
++)
1647 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
1648 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
1650 /* ??? Padding for XFmode type? */
1655 return XEXP (loc
[0], 0);
1657 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
1659 else if (FLOAT_TYPE_P (valtype
))
1660 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
1662 return gen_rtx_REG (mode
, GR_RET_FIRST
);
1665 /* Print a memory address as an operand to reference that memory location. */
1667 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
1668 also call this from ia64_print_operand for memory addresses. */
1671 ia64_print_operand_address (stream
, address
)
1677 /* Print an operand to a assembler instruction.
1678 B Work arounds for hardware bugs.
1679 C Swap and print a comparison operator.
1680 D Print an FP comparison operator.
1681 E Print 32 - constant, for SImode shifts as extract.
1682 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
1683 a floating point register emitted normally.
1684 I Invert a predicate register by adding 1.
1685 O Append .acq for volatile load.
1686 P Postincrement of a MEM.
1687 Q Append .rel for volatile store.
1688 S Shift amount for shladd instruction.
1689 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
1690 for Intel assembler.
1691 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
1692 for Intel assembler.
1693 r Print register name, or constant 0 as r0. HP compatibility for
1696 ia64_print_operand (file
, x
, code
)
1703 /* XXX Add other codes here. */
1706 /* Handled below. */
1711 fputs (" ;; nop 0 ;; nop 0 ;;", file
);
1716 enum rtx_code c
= swap_condition (GET_CODE (x
));
1717 fputs (GET_RTX_NAME (c
), file
);
1722 fputs (GET_CODE (x
) == NE
? "neq" : GET_RTX_NAME (GET_CODE (x
)), file
);
1726 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
1730 if (x
== CONST0_RTX (GET_MODE (x
)))
1731 fputs (reg_names
[FR_REG (0)], file
);
1732 else if (x
== CONST1_RTX (GET_MODE (x
)))
1733 fputs (reg_names
[FR_REG (1)], file
);
1734 else if (GET_CODE (x
) == REG
)
1735 fputs (reg_names
[REGNO (x
)], file
);
1741 fputs (reg_names
[REGNO (x
) + 1], file
);
1745 if (MEM_VOLATILE_P (x
))
1746 fputs(".acq", file
);
1753 if (GET_CODE (XEXP (x
, 0)) != POST_INC
1754 && GET_CODE (XEXP (x
, 0)) != POST_DEC
)
1759 value
= GET_MODE_SIZE (GET_MODE (x
));
1761 /* ??? This is for ldf.fill and stf.spill which use XFmode, but which
1762 actually need 16 bytes increments. Perhaps we can change them
1763 to use TFmode instead. Or don't use POST_DEC/POST_INC for them.
1764 Currently, there are no other uses of XFmode, so hacking it here
1769 if (GET_CODE (XEXP (x
, 0)) == POST_DEC
)
1772 fprintf (file
, "%d", value
);
1777 if (MEM_VOLATILE_P (x
))
1778 fputs(".rel", file
);
1782 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, exact_log2 (INTVAL (x
)));
1786 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
1788 fprintf (file
, "0x%x", INTVAL (x
) & 0xffffffff);
1794 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
1796 char *prefix
= "0x";
1797 if (INTVAL (x
) & 0x80000000)
1799 fprintf (file
, "0xffffffff");
1802 fprintf (file
, "%s%x", prefix
, INTVAL (x
) & 0xffffffff);
1808 /* If this operand is the constant zero, write it as zero. */
1809 if (GET_CODE (x
) == REG
)
1810 fputs (reg_names
[REGNO (x
)], file
);
1811 else if (x
== CONST0_RTX (GET_MODE (x
)))
1814 output_operand_lossage ("invalid %%r value");
1818 output_operand_lossage ("ia64_print_operand: unknown code");
1822 switch (GET_CODE (x
))
1824 /* This happens for the spill/restore instructions. */
1827 /* ... fall through ... */
1830 fputs (reg_names
[REGNO (x
)], file
);
1835 rtx addr
= XEXP (x
, 0);
1836 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
1837 addr
= XEXP (addr
, 0);
1838 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
1843 output_addr_const (file
, x
);
1852 /* This function returns the register class required for a secondary
1853 register when copying between one of the registers in CLASS, and X,
1854 using MODE. A return value of NO_REGS means that no secondary register
1858 ia64_secondary_reload_class (class, mode
, x
)
1859 enum reg_class
class;
1860 enum machine_mode mode
;
1865 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
1866 regno
= true_regnum (x
);
1868 /* ??? This is required because of a bad gcse/cse/global interaction.
1869 We end up with two pseudos with overlapping lifetimes both of which are
1870 equiv to the same constant, and both which need to be in BR_REGS. This
1871 results in a BR_REGS to BR_REGS copy which doesn't exist. To reproduce,
1872 return NO_REGS here, and compile divdi3 in libgcc2.c. This seems to be
1873 a cse bug. cse_basic_block_end changes depending on the path length,
1874 which means the qty_first_reg check in make_regs_eqv can give different
1875 answers at different times. */
1876 /* ??? At some point I'll probably need a reload_indi pattern to handle
1878 if (class == BR_REGS
&& BR_REGNO_P (regno
))
1881 /* This is needed if a pseudo used as a call_operand gets spilled to a
1883 if (class == BR_REGS
&& GET_CODE (x
) == MEM
)
1886 /* This can happen when a paradoxical subreg is an operand to the muldi3
1888 /* ??? This shouldn't be necessary after instruction scheduling is enabled,
1889 because paradoxical subregs are not accepted by register_operand when
1890 INSN_SCHEDULING is defined. Or alternatively, stop the paradoxical subreg
1891 stupidity in the *_operand functions in recog.c. */
1892 if ((class == FR_REGS
|| class == FR_INT_REGS
|| class == FR_FP_REGS
)
1893 && GET_CODE (x
) == MEM
1894 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
1895 || GET_MODE (x
) == QImode
))
1898 /* This can happen because of the ior/and/etc patterns that accept FP
1899 registers as operands. If the third operand is a constant, then it
1900 needs to be reloaded into a FP register. */
1901 if ((class == FR_REGS
|| class == FR_INT_REGS
|| class == FR_FP_REGS
)
1902 && GET_CODE (x
) == CONST_INT
)
1905 /* Moving a integer from an FP register to memory requires a general register
1906 as an intermediary. This is not necessary if we are moving a DImode
1907 subreg of a DFmode value from an FP register to memory, since stfd will
1908 do the right thing in this case. */
1909 if (class == FR_INT_REGS
&& GET_CODE (x
) == MEM
&& GET_MODE (x
) == DImode
)
1912 /* ??? This happens if we cse/gcse a CCmode value across a call, and the
1913 function has a nonlocal goto. This is because global does not allocate
1914 call crossing pseudos to hard registers when current_function_has_
1915 nonlocal_goto is true. This is relatively common for C++ programs that
1916 use exceptions. To reproduce, return NO_REGS and compile libstdc++. */
1917 if (class == PR_REGS
&& GET_CODE (x
) == MEM
)
1924 /* Emit text to declare externally defined variables and functions, because
1925 the Intel assembler does not support undefined externals. */
1928 ia64_asm_output_external (file
, decl
, name
)
1933 int save_referenced
;
1935 /* GNU as does not need anything here. */
1939 /* ??? The Intel assembler creates a reference that needs to be satisfied by
1940 the linker when we do this, so we need to be careful not to do this for
1941 builtin functions which have no library equivalent. Unfortunately, we
1942 can't tell here whether or not a function will actually be called by
1943 expand_expr, so we pull in library functions even if we may not need
1945 if (! strcmp (name
, "__builtin_next_arg")
1946 || ! strcmp (name
, "alloca")
1947 || ! strcmp (name
, "__builtin_constant_p")
1948 || ! strcmp (name
, "__builtin_args_info"))
1951 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
1953 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
1954 if (TREE_CODE (decl
) == FUNCTION_DECL
)
1956 fprintf (file
, "\t%s\t ", TYPE_ASM_OP
);
1957 assemble_name (file
, name
);
1959 fprintf (file
, TYPE_OPERAND_FMT
, "function");
1962 ASM_GLOBALIZE_LABEL (file
, name
);
1963 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
1966 /* Parse the -mfixed-range= option string. */
1975 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
1976 REG2 are either register names or register numbers. The effect
1977 of this option is to mark the registers in the range from REG1 to
1978 REG2 as ``fixed'' so they won't be used by the compiler. This is
1979 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
1983 dash
= strchr (str
, '-');
1986 warning ("value of -mfixed-range must have form REG1-REG2");
1991 comma
= strchr (dash
+ 1, ',');
1995 first
= decode_reg_name (str
);
1998 warning ("unknown register name: %s", str
);
2002 last
= decode_reg_name (dash
+ 1);
2005 warning ("unknown register name: %s", dash
+ 1);
2013 warning ("%s-%s is an empty range", str
, dash
+ 1);
2017 for (i
= first
; i
<= last
; ++i
)
2018 fixed_regs
[i
] = call_used_regs
[i
] = 1;
2028 /* Called to register all of our global variables with the garbage
2032 ia64_add_gc_roots ()
2034 ggc_add_rtx_root (&ia64_compare_op0
, 1);
2035 ggc_add_rtx_root (&ia64_compare_op1
, 1);
2039 ia64_init_machine_status (p
)
2043 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
2047 ia64_mark_machine_status (p
)
2050 ggc_mark_rtx (p
->machine
->ia64_eh_epilogue_sp
);
2051 ggc_mark_rtx (p
->machine
->ia64_eh_epilogue_bsp
);
2055 /* Handle TARGET_OPTIONS switches. */
2058 ia64_override_options ()
2060 if (ia64_fixed_range_string
)
2061 fix_range (ia64_fixed_range_string
);
2063 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
2065 init_machine_status
= ia64_init_machine_status
;
2066 mark_machine_status
= ia64_mark_machine_status
;
2068 ia64_add_gc_roots ();
2071 /* The following collection of routines emit instruction group stop bits as
2072 necessary to avoid dependencies. */
2074 /* Need to track some additional registers as far as serialization is
2075 concerned so we can properly handle br.call and br.ret. We could
2076 make these registers visible to gcc, but since these registers are
2077 never explicitly used in gcc generated code, it seems wasteful to
2078 do so (plus it would make the call and return patterns needlessly
2080 #define REG_GP (GR_REG (1))
2081 #define REG_RP (BR_REG (0))
2082 #define REG_AR_PFS (FIRST_PSEUDO_REGISTER)
2083 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
2084 /* ??? This will eventually need to be a hard register. */
2085 #define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2)
2086 /* This is used for volatile asms which may require a stop bit immediately
2087 before and after them. */
2088 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3)
2089 #define NUM_REGS (FIRST_PSEUDO_REGISTER + 4)
2091 /* For each register, we keep track of how many times it has been
2092 written in the current instruction group. If a register is written
2093 unconditionally (no qualifying predicate), WRITE_COUNT is set to 2
2094 and FIRST_PRED is ignored. If a register is written if its
2095 qualifying predicate P is true, we set WRITE_COUNT to 1 and
2096 FIRST_PRED to P. Later on, the same register may be written again
2097 by the complement of P (P+1 if P is even, P-1, otherwise) and when
2098 this happens, WRITE_COUNT gets set to 2. The result of this is
2099 that whenever an insn attempts to write a register whose
2100 WRITE_COUNT is two, we need to issue a insn group barrier first. */
2101 struct reg_write_state
2104 char written_by_fp
; /* Was register written by a floating-point insn? */
2105 short first_pred
; /* 0 means ``no predicate'' */
2108 /* Cumulative info for the current instruction group. */
2109 struct reg_write_state rws_sum
[NUM_REGS
];
2110 /* Info for the current instruction. This gets copied to rws_sum after a
2111 stop bit is emitted. */
2112 struct reg_write_state rws_insn
[NUM_REGS
];
2114 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
2115 RTL for one instruction. */
2118 unsigned int is_write
: 1; /* Is register being written? */
2119 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
2120 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
2123 /* Update *RWS for REGNO, which is being written by the current instruction,
2124 with predicate PRED, and associated register flags in FLAGS. */
2127 rws_update (rws
, regno
, flags
, pred
)
2128 struct reg_write_state
*rws
;
2130 struct reg_flags flags
;
2133 rws
[regno
].write_count
+= pred
? 1 : 2;
2134 rws
[regno
].written_by_fp
|= flags
.is_fp
;
2135 rws
[regno
].first_pred
= pred
;
2138 /* Handle an access to register REGNO of type FLAGS using predicate register
2139 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
2140 a dependency with an earlier instruction in the same group. */
2143 rws_access_reg (regno
, flags
, pred
)
2145 struct reg_flags flags
;
2148 int need_barrier
= 0;
2149 int is_predicate_reg
;
2151 if (regno
>= NUM_REGS
)
2156 /* One insn writes same reg multiple times? */
2157 if (rws_insn
[regno
].write_count
> 0)
2160 /* Update info for current instruction. */
2161 rws_update (rws_insn
, regno
, flags
, pred
);
2163 /* ??? This is necessary because predicate regs require two hard
2164 registers. However, this should be using HARD_REGNO_NREGS so that
2165 it works for all multi-reg hard registers, instead of only for
2166 predicate registers. */
2167 is_predicate_reg
= REGNO_REG_CLASS (regno
) == PR_REGS
;
2168 if (is_predicate_reg
)
2169 rws_update (rws_insn
, regno
+ 1, flags
, pred
);
2171 switch (rws_sum
[regno
].write_count
)
2174 /* The register has not been written yet. */
2175 rws_update (rws_sum
, regno
, flags
, pred
);
2176 if (is_predicate_reg
)
2177 rws_update (rws_sum
, regno
+ 1, flags
, pred
);
2181 /* The register has been written via a predicate. If this is
2182 not a complementary predicate, then we need a barrier. */
2183 /* ??? This assumes that P and P+1 are always complementary
2184 predicates for P even. */
2185 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
2187 rws_update (rws_sum
, regno
, flags
, pred
);
2188 if (is_predicate_reg
)
2189 rws_update (rws_sum
, regno
+ 1, flags
, pred
);
2193 /* The register has been unconditionally written already. We
2204 if (flags
.is_branch
)
2206 /* Branches have several RAW exceptions that allow to avoid
2209 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== REG_AR_PFS
)
2210 /* RAW dependencies on branch regs are permissible as long
2211 as the writer is a non-branch instruction. Since we
2212 never generate code that uses a branch register written
2213 by a branch instruction, handling this case is
2215 /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop,
2216 br.wexit, br.wtop. This is true currently. */
2219 if (REGNO_REG_CLASS (regno
) == PR_REGS
2220 && ! rws_sum
[regno
].written_by_fp
)
2221 /* The predicates of a branch are available within the
2222 same insn group as long as the predicate was written by
2223 something other than a floating-point instruction. */
2227 switch (rws_sum
[regno
].write_count
)
2230 /* The register has not been written yet. */
2234 /* The register has been written via a predicate. If this is
2235 not a complementary predicate, then we need a barrier. */
2236 /* ??? This assumes that P and P+1 are always complementary
2237 predicates for P even. */
2238 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
2243 /* The register has been unconditionally written already. We
2253 return need_barrier
;
2256 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
2257 Return 1 is this access creates a dependency with an earlier instruction
2258 in the same group. */
2261 rtx_needs_barrier (x
, flags
, pred
)
2263 struct reg_flags flags
;
2267 int is_complemented
= 0;
2268 int need_barrier
= 0;
2269 const char *format_ptr
;
2270 struct reg_flags new_flags
;
2279 switch (GET_CODE (x
))
2283 switch (GET_CODE (src
))
2286 /* We don't need to worry about the result registers that
2287 get written by subroutine call. */
2288 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
2289 return need_barrier
;
2292 if (SET_DEST (x
) == pc_rtx
)
2294 /* X is a conditional branch. */
2295 /* ??? This seems redundant, as the caller sets this bit for
2297 new_flags
.is_branch
= 1;
2298 need_barrier
= rtx_needs_barrier (src
, new_flags
, pred
);
2299 return need_barrier
;
2303 /* X is a conditional move. */
2304 cond
= XEXP (src
, 0);
2305 if (GET_CODE (cond
) == EQ
)
2306 is_complemented
= 1;
2307 cond
= XEXP (cond
, 0);
2308 if (GET_CODE (cond
) != REG
2309 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
2312 if (XEXP (src
, 1) == SET_DEST (x
)
2313 || XEXP (src
, 2) == SET_DEST (x
))
2315 /* X is a conditional move that conditionally writes the
2318 /* We need another complement in this case. */
2319 if (XEXP (src
, 1) == SET_DEST (x
))
2320 is_complemented
= ! is_complemented
;
2322 pred
= REGNO (cond
);
2323 if (is_complemented
)
2327 /* ??? If this is a conditional write to the dest, then this
2328 instruction does not actually read one source. This probably
2329 doesn't matter, because that source is also the dest. */
2330 /* ??? Multiple writes to predicate registers are allowed
2331 if they are all AND type compares, or if they are all OR
2332 type compares. We do not generate such instructions
2335 /* ... fall through ... */
2338 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
2339 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
2340 /* Set new_flags.is_fp to 1 so that we know we're dealing
2341 with a floating point comparison when processing the
2342 destination of the SET. */
2343 new_flags
.is_fp
= 1;
2346 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
2347 /* This instruction unconditionally uses a predicate register. */
2349 need_barrier
|= rws_access_reg (REGNO (cond
), flags
, 0);
2352 if (GET_CODE (dst
) == ZERO_EXTRACT
)
2354 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
2355 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
2356 dst
= XEXP (dst
, 0);
2358 new_flags
.is_write
= 1;
2359 need_barrier
|= rtx_needs_barrier (dst
, new_flags
, pred
);
2363 new_flags
.is_write
= 0;
2364 /* ??? Why is this here? It seems unnecessary. */
2365 need_barrier
|= rws_access_reg (REG_GP
, new_flags
, pred
);
2366 need_barrier
|= rws_access_reg (REG_AR_EC
, new_flags
, pred
);
2368 /* Avoid multiple register writes, in case this is a pattern with
2369 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2370 /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM,
2371 and that we don't have predicated calls/returns. */
2372 if (! rws_insn
[REG_AR_CFM
].write_count
)
2374 new_flags
.is_write
= 1;
2375 need_barrier
|= rws_access_reg (REG_RP
, new_flags
, pred
);
2376 need_barrier
|= rws_access_reg (REG_AR_PFS
, new_flags
, pred
);
2377 need_barrier
|= rws_access_reg (REG_AR_CFM
, new_flags
, pred
);
2384 /* We must handle USE here in case it occurs within a PARALLEL.
2385 For instance, the mov ar.pfs= instruction has a USE which requires
2386 a barrier between it and an immediately preceeding alloc. */
2388 /* Clobber & use are for earlier compiler-phases only. */
2393 /* We always emit stop bits for traditional asms. We emit stop bits
2394 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
2395 if (GET_CODE (x
) != ASM_OPERANDS
2396 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
2398 /* Avoid writing the register multiple times if we have multiple
2399 asm outputs. This avoids an abort in rws_access_reg. */
2400 if (! rws_insn
[REG_VOLATILE
].write_count
)
2402 new_flags
.is_write
= 1;
2403 rws_access_reg (REG_VOLATILE
, new_flags
, pred
);
2408 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
2409 We can not just fall through here since then we would be confused
2410 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
2411 traditional asms unlike their normal usage. */
2413 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
2414 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
2419 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
2420 if (rtx_needs_barrier (XVECEXP (x
, 0, i
), flags
, pred
))
2428 need_barrier
= rws_access_reg (REGNO (x
), flags
, pred
);
2432 /* Find the regs used in memory address computation. */
2433 new_flags
.is_write
= 0;
2434 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
2437 case CONST_INT
: case CONST_DOUBLE
:
2438 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
2441 /* Operators with side-effects. */
2442 case POST_INC
: case POST_DEC
:
2443 if (GET_CODE (XEXP (x
, 0)) != REG
)
2446 new_flags
.is_write
= 0;
2447 need_barrier
= rws_access_reg (REGNO (XEXP (x
, 0)), new_flags
, pred
);
2448 new_flags
.is_write
= 1;
2449 need_barrier
|= rws_access_reg (REGNO (XEXP (x
, 0)), new_flags
, pred
);
2452 /* Handle common unary and binary ops for efficiency. */
2453 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
2454 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
2455 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
2456 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
2457 case NE
: case EQ
: case GE
: case GT
: case LE
:
2458 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
2459 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
2460 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
2463 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
2464 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
2465 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
2466 case SQRT
: case FFS
:
2467 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
2471 switch (XINT (x
, 1))
2473 /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat
2474 dependencies as long as we don't have both a spill and fill in
2475 the same instruction group. We need to check for that. */
2476 case 1: /* st8.spill */
2477 case 2: /* ld8.fill */
2478 case 3: /* stf.spill */
2479 case 4: /* ldf.spill */
2480 case 8: /* popcnt */
2481 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
2484 case 5: /* mov =pr */
2485 /* This reads all predicate registers. */
2486 for (i
= PR_REG (1); i
< PR_REG (64); i
++)
2487 need_barrier
|= rws_access_reg (i
, flags
, pred
);
2494 /* ??? Should track unat reads and writes. */
2495 case 9: /* mov =ar.unat */
2496 case 10: /* mov ar.unat= */
2498 case 11: /* mov ar.ccv= */
2502 case 13: /* cmpxchg_acq */
2504 case 14: /* val_compare_and_swap */
2506 case 15: /* lock_release */
2508 case 16: /* lock_test_and_set */
2510 case 17: /* _and_fetch */
2512 case 18: /* fetch_and_ */
2514 case 19: /* fetchadd_acq */
2516 case 20: /* mov = ar.bsp */
2524 case UNSPEC_VOLATILE
:
2525 switch (XINT (x
, 1))
2528 /* Alloc must always be the first instruction. Currently, we
2529 only emit it at the function start, so we don't need to worry
2530 about emitting a stop bit before it. */
2531 need_barrier
= rws_access_reg (REG_AR_PFS
, flags
, pred
);
2533 new_flags
.is_write
= 1;
2534 need_barrier
|= rws_access_reg (REG_AR_CFM
, new_flags
, pred
);
2535 return need_barrier
;
2537 case 1: /* blockage */
2538 case 2: /* insn group barrier */
2541 case 3: /* flush_cache */
2542 return rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
2544 case 4: /* mov ar.pfs= */
2545 new_flags
.is_write
= 1;
2546 need_barrier
= rws_access_reg (REG_AR_PFS
, new_flags
, pred
);
2549 case 6: /* mov pr= */
2550 /* This writes all predicate registers. */
2551 new_flags
.is_write
= 1;
2552 /* We need to skip by two, because rws_access_reg always writes
2553 to two predicate registers at a time. */
2554 /* ??? Strictly speaking, we shouldn't be counting writes to pr0. */
2555 for (i
= PR_REG (0); i
< PR_REG (64); i
+= 2)
2556 need_barrier
|= rws_access_reg (i
, new_flags
, pred
);
2559 case 5: /* set_bsp */
2569 new_flags
.is_write
= 0;
2570 need_barrier
= rws_access_reg (REG_RP
, flags
, pred
);
2571 need_barrier
|= rws_access_reg (REG_AR_PFS
, flags
, pred
);
2573 new_flags
.is_write
= 1;
2574 need_barrier
|= rws_access_reg (REG_AR_EC
, new_flags
, pred
);
2575 need_barrier
|= rws_access_reg (REG_AR_CFM
, new_flags
, pred
);
2579 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
2580 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
2581 switch (format_ptr
[i
])
2583 case '0': /* unused field */
2584 case 'i': /* integer */
2585 case 'n': /* note */
2586 case 'w': /* wide integer */
2587 case 's': /* pointer to string */
2588 case 'S': /* optional pointer to string */
2592 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
2597 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
2598 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
2606 return need_barrier
;
2609 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
2610 as necessary to eliminate dependendencies. */
2613 emit_insn_group_barriers (insns
)
2616 int need_barrier
= 0;
2617 int exception_nesting
;
2618 struct reg_flags flags
;
2619 rtx insn
, prev_insn
;
2621 memset (rws_sum
, 0, sizeof (rws_sum
));
2624 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
2626 memset (&flags
, 0, sizeof (flags
));
2627 switch (GET_CODE (insn
))
2630 switch (NOTE_LINE_NUMBER (insn
))
2632 case NOTE_INSN_EH_REGION_BEG
:
2633 exception_nesting
++;
2636 case NOTE_INSN_EH_REGION_END
:
2637 exception_nesting
--;
2640 case NOTE_INSN_EPILOGUE_BEG
:
2650 flags
.is_branch
= 1;
2652 if (GET_CODE (PATTERN (insn
)) == USE
)
2653 /* Don't care about USE "insns"---those are used to
2654 indicate to the optimizer that it shouldn't get rid of
2655 certain operations. */
2659 memset (rws_insn
, 0, sizeof (rws_insn
));
2660 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
2662 /* Check to see if the previous instruction was a volatile
2665 need_barrier
= rws_access_reg (REG_VOLATILE
, flags
, 0);
2669 /* PREV_INSN null can happen if the very first insn is a
2672 emit_insn_after (gen_insn_group_barrier (), prev_insn
);
2673 memcpy (rws_sum
, rws_insn
, sizeof (rws_sum
));
2681 /* A barrier doesn't imply an instruction group boundary. */
2685 /* Leave prev_insn alone so the barrier gets generated in front
2686 of the label, if one is needed. */
2695 /* Perform machine dependent operations on the rtl chain INSNS. */
2701 emit_insn_group_barriers (insns
);
2704 /* Return true if REGNO is used by the epilogue. */
2707 ia64_epilogue_uses (regno
)
2710 /* For functions defined with the syscall_linkage attribute, all input
2711 registers are marked as live at all function exits. This prevents the
2712 register allocator from using the input registers, which in turn makes it
2713 possible to restart a system call after an interrupt without having to
2714 save/restore the input registers. */
2716 if (IN_REGNO_P (regno
)
2717 && (regno
< IN_REG (current_function_args_info
.words
))
2718 && lookup_attribute ("syscall_linkage",
2719 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2725 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
2728 ia64_valid_type_attribute (type
, attributes
, identifier
, args
)
2730 tree attributes ATTRIBUTE_UNUSED
;
2734 /* We only support an attribute for function calls. */
2736 if (TREE_CODE (type
) != FUNCTION_TYPE
2737 && TREE_CODE (type
) != METHOD_TYPE
)
2740 /* The "syscall_linkage" attribute says the callee is a system call entry
2741 point. This affects ia64_epilogue_uses. */
2743 if (is_attribute_p ("syscall_linkage", identifier
))
2744 return args
== NULL_TREE
;
2749 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
2751 We add @ to the name if this goes in small data/bss. We can only put
2752 a variable in small data/bss if it is defined in this module or a module
2753 that we are statically linked with. We can't check the second condition,
2754 but TREE_STATIC gives us the first one. */
2756 /* ??? If we had IPA, we could check the second condition. We could support
2757 programmer added section attributes if the variable is not defined in this
2760 /* ??? See the v850 port for a cleaner way to do this. */
2762 /* ??? We could also support own long data here. Generating movl/add/ld8
2763 instead of addl,ld8/ld8. This makes the code bigger, but should make the
2764 code faster because there is one less load. This also includes incomplete
2765 types which can't go in sdata/sbss. */
2767 /* ??? See select_section. We must put short own readonly variables in
2768 sdata/sbss instead of the more natural rodata, because we can't perform
2769 the DECL_READONLY_SECTION test here. */
2771 extern struct obstack
* saveable_obstack
;
2774 ia64_encode_section_info (decl
)
2777 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2778 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
2779 /* We assume that -fpic is used only to create a shared library (dso).
2780 With -fpic, no global data can ever be sdata.
2781 Without -fpic, global common uninitialized data can never be sdata, since
2782 it can unify with a real definition in a dso. */
2783 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
2784 to access them. The linker may then be able to do linker relaxation to
2785 optimize references to them. Currently sdata implies use of gprel. */
2786 else if (! TARGET_NO_SDATA
2787 && TREE_CODE (decl
) == VAR_DECL
2788 && TREE_STATIC (decl
)
2789 && ! (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
2790 && ! (TREE_PUBLIC (decl
)
2792 || (DECL_COMMON (decl
)
2793 && (DECL_INITIAL (decl
) == 0
2794 || DECL_INITIAL (decl
) == error_mark_node
))))
2795 /* Either the variable must be declared without a section attribute,
2796 or the section must be sdata or sbss. */
2797 && (DECL_SECTION_NAME (decl
) == 0
2798 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
2800 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
2803 int size
= int_size_in_bytes (TREE_TYPE (decl
));
2804 char *str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2807 /* ??? We should redeclare CTOR_LIST, DTOR_END so that we don't have to
2808 special case them here. Currently we put them in ctor/dtors sections
2809 behind the compiler's back. We should use section attributes
2811 if (! strcmp (str
, "__CTOR_LIST__")
2812 || ! strcmp (str
, "__DTOR_END__"))
2815 /* If this is an incomplete type with size 0, then we can't put it in
2816 sdata because it might be too big when completed. */
2817 else if (size
> 0 && size
<= ia64_section_threshold
2818 && str
[0] != SDATA_NAME_FLAG_CHAR
)
2820 int len
= strlen (str
);
2821 char *newstr
= obstack_alloc (saveable_obstack
, len
+ 2);
2823 strcpy (newstr
+ 1, str
);
2824 *newstr
= SDATA_NAME_FLAG_CHAR
;
2825 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = newstr
;
2828 /* This decl is marked as being in small data/bss but it shouldn't
2829 be; one likely explanation for this is that the decl has been
2830 moved into a different section from the one it was in when
2831 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
2832 else if (TREE_CODE (decl
) == VAR_DECL
2833 && (XSTR (XEXP (DECL_RTL (decl
), 0), 0)[0]
2834 == SDATA_NAME_FLAG_CHAR
))
2836 char *str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2837 int len
= strlen (str
);
2838 char *newstr
= obstack_alloc (saveable_obstack
, len
);
2840 strcpy (newstr
, str
+ 1);
2841 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = newstr
;
2845 /* Output assmebly directives for prologue regions. */
2847 static int spill_offset
;
2848 static int sp_offset
;
2849 static int spill_offset_emitted
= 1;
2850 static rtx tmp_reg
= NULL_RTX
;
2851 static int tmp_saved
= -1;
2854 /* This function processes a SET pattern looking for specific patterns
2855 which result in emitting an assembly directive required for unwinding. */
2857 process_set (asm_out_file
, pat
)
2861 rtx src
= SET_SRC (pat
);
2862 rtx dest
= SET_DEST (pat
);
2863 static rtx frame_reg
= NULL_RTX
;
2864 static int frame_size
= 0;
2866 /* Look for the ALLOC insn. reg = alloc .... */
2867 if (GET_CODE (src
) == UNSPEC_VOLATILE
&& XINT (src
, 1) == 0
2868 && GET_CODE (dest
) == REG
&& GR_REGNO_P (REGNO (dest
)))
2870 /* Assume this is a stack allocate insn. */
2871 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
2872 REGNO (dest
) + ia64_input_regs
);
2876 /* look for SP = .... */
2877 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
2879 if (GET_CODE (src
) == PLUS
)
2881 rtx op0
= XEXP (src
, 0);
2882 rtx op1
= XEXP (src
, 1);
2883 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
2885 fprintf (asm_out_file
, "\t.fframe %d\n", -INTVAL (op1
));
2886 frame_size
= INTVAL (op1
);
2890 if (op0
== dest
&& GET_CODE (op1
) == REG
)
2892 fprintf (asm_out_file
, "\t.vframe r%d\n", REGNO (op1
));
2898 /* Look for a frame offset. */
2899 if (GET_CODE (dest
) == REG
)
2901 if (GET_CODE (src
) == PLUS
)
2903 rtx op0
= XEXP (src
, 0);
2904 rtx op1
= XEXP (src
, 1);
2905 if (GET_CODE (op0
) == REG
&& REGNO (op0
) == FRAME_POINTER_REGNUM
2906 && GET_CODE (op1
) == CONST_INT
)
2908 sp_offset
= -frame_size
+ INTVAL (op1
);
2909 spill_offset
= INTVAL (op1
);
2910 spill_offset_emitted
= 0;
2912 /* We delay issuing the spill offset since we might
2913 be saving non-spill things off this register,
2914 thus adjusting its offset before a spill is seen. */
2920 /* Register move we need to look at. */
2921 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
2923 int regno
= REGNO (src
);
2924 if (BR_REGNO_P (regno
))
2926 /* Saving return address pointer. */
2927 if (regno
== BR_REG (0))
2929 fprintf (asm_out_file
, "\t.save rp, r%d\n",
2930 REGNO (dest
) + ia64_input_regs
);
2933 /* If its br1 to br5, we copy them to temp regs, then save the
2934 temp reg to memory next. */
2935 if (regno
>= BR_REG (1) && regno
<= BR_REG (5))
2943 /* Search for special reg moves. */
2944 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == UNSPEC
)
2946 int unspec_code
= XINT (src
, 1);
2947 /* Copied to a temp register, save it until we see the temp
2949 if (unspec_code
== 5 || unspec_code
== 9)
2952 tmp_saved
= unspec_code
;
2956 if (GET_CODE (dest
) == MEM
&& GET_CODE (XEXP (dest
, 0)) == POST_INC
2957 && GET_CODE (XEXP (XEXP (dest
, 0), 0)) == REG
)
2959 int spill_unspec
= 0;
2960 /* We adjust the spill_offset early, so we dont miss it later. */
2963 if (GET_CODE (src
) == UNSPEC
)
2965 spill_unspec
= XINT (src
, 1);
2966 /* 1 and 3 are unspecs for the GR and FR spills. */
2967 if (spill_unspec
!= 1 && spill_unspec
!= 3)
2970 /* ST8 or st8.spill insn. */
2971 if ((GET_CODE (src
) == REG
) || spill_unspec
!= 0)
2974 if (spill_unspec
!= 0)
2976 regno
= REGNO (XVECEXP (src
, 0, 0));
2977 if (!spill_offset_emitted
)
2979 fprintf (asm_out_file
, "\t.spill %d\n",
2980 /* (frame_size + 16 - spill_offset ) / 4); */
2981 (-(spill_offset
- 8) + 16) / 4);
2982 spill_offset_emitted
= 1;
2986 regno
= REGNO (src
);
2988 if (GR_REGNO_P (regno
))
2990 if (regno
>= GR_REG (4) && regno
<= GR_REG (7))
2991 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
2992 1 << (regno
- GR_REG (4)));
2993 else if (tmp_reg
!= NULL_RTX
&& regno
== REGNO (tmp_reg
))
2995 /* We saved a special reg to a temp reg, and now we're
2996 dumping it to memory. */
2998 /* register 9 is ar.unat. */
3000 fprintf (asm_out_file
, "\t.savesp ar.unat, %d\n",
3001 (sp_offset
- 8) / 4);
3002 else if (tmp_saved
== 5)
3003 fprintf (asm_out_file
, "\t.savesp pr, %d\n",
3004 (sp_offset
- 8) / 4);
3005 else if (tmp_saved
>= BR_REG (1) && tmp_saved
<= BR_REG (5))
3007 /* BR regs are saved this way too. */
3008 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
3009 1 << (tmp_saved
- BR_REG (1)));
3015 if (FR_REGNO_P (regno
))
3017 if (regno
>= FR_REG (2) && regno
<= FR_REG (5))
3018 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
3019 1 << (regno
- FR_REG (2)));
3021 if (regno
>= FR_REG (16) && regno
<= FR_REG (31))
3022 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
3023 1 << (regno
- FR_REG (12)));
3034 /* This function looks at a single insn and emits any directives
3035 required to unwind this insn. */
3037 process_for_unwind_directive (asm_out_file
, insn
)
3041 if ((flag_unwind_tables
3042 || (flag_exceptions
&& !exceptions_via_longjmp
))
3043 && RTX_FRAME_RELATED_P (insn
))
3046 pat
= PATTERN (insn
);
3048 switch (GET_CODE (pat
))
3052 process_set (asm_out_file
, pat
);
3058 int limit
= XVECLEN (pat
, 0);
3059 for (par_index
= 0; par_index
< limit
; par_index
++)
3061 rtx x
= XVECEXP (pat
, 0, par_index
);
3062 if (GET_CODE (x
) == SET
)
3063 process_set (asm_out_file
, x
);
3071 #define def_builtin(name, type, code) \
3072 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
3074 struct builtin_description
3076 enum insn_code icode
;
3078 enum ia64_builtins code
;
3079 enum rtx_code comparison
;
3083 /* All 32 bit intrinsics that take 2 arguments. */
3084 static struct builtin_description bdesc_2argsi
[] =
3086 { CODE_FOR_fetch_and_add_si
, "__sync_fetch_and_add_si", IA64_BUILTIN_FETCH_AND_ADD_SI
, 0, 0 },
3087 { CODE_FOR_fetch_and_sub_si
, "__sync_fetch_and_sub_si", IA64_BUILTIN_FETCH_AND_SUB_SI
, 0, 0 },
3088 { CODE_FOR_fetch_and_or_si
, "__sync_fetch_and_or_si", IA64_BUILTIN_FETCH_AND_OR_SI
, 0, 0 },
3089 { CODE_FOR_fetch_and_and_si
, "__sync_fetch_and_and_si", IA64_BUILTIN_FETCH_AND_AND_SI
, 0, 0 },
3090 { CODE_FOR_fetch_and_xor_si
, "__sync_fetch_and_xor_si", IA64_BUILTIN_FETCH_AND_XOR_SI
, 0, 0 },
3091 { CODE_FOR_fetch_and_nand_si
, "__sync_fetch_and_nand_si", IA64_BUILTIN_FETCH_AND_NAND_SI
, 0, 0 },
3092 { CODE_FOR_add_and_fetch_si
, "__sync_add_and_fetch_si", IA64_BUILTIN_ADD_AND_FETCH_SI
, 0, 0 },
3093 { CODE_FOR_sub_and_fetch_si
, "__sync_sub_and_fetch_si", IA64_BUILTIN_SUB_AND_FETCH_SI
, 0, 0 },
3094 { CODE_FOR_or_and_fetch_si
, "__sync_or_and_fetch_si", IA64_BUILTIN_OR_AND_FETCH_SI
, 0, 0 },
3095 { CODE_FOR_and_and_fetch_si
, "__sync_and_and_fetch_si", IA64_BUILTIN_AND_AND_FETCH_SI
, 0, 0 },
3096 { CODE_FOR_xor_and_fetch_si
, "__sync_xor_and_fetch_si", IA64_BUILTIN_XOR_AND_FETCH_SI
, 0, 0 },
3097 { CODE_FOR_nand_and_fetch_si
, "__sync_nand_and_fetch_si", IA64_BUILTIN_NAND_AND_FETCH_SI
, 0, 0 }
3100 /* All 64 bit intrinsics that take 2 arguments. */
3101 static struct builtin_description bdesc_2argdi
[] =
3103 { CODE_FOR_fetch_and_add_di
, "__sync_fetch_and_add_di", IA64_BUILTIN_FETCH_AND_ADD_DI
, 0, 0 },
3104 { CODE_FOR_fetch_and_sub_di
, "__sync_fetch_and_sub_di", IA64_BUILTIN_FETCH_AND_SUB_DI
, 0, 0 },
3105 { CODE_FOR_fetch_and_or_di
, "__sync_fetch_and_or_di", IA64_BUILTIN_FETCH_AND_OR_DI
, 0, 0 },
3106 { CODE_FOR_fetch_and_and_di
, "__sync_fetch_and_and_di", IA64_BUILTIN_FETCH_AND_AND_DI
, 0, 0 },
3107 { CODE_FOR_fetch_and_xor_di
, "__sync_fetch_and_xor_di", IA64_BUILTIN_FETCH_AND_XOR_DI
, 0, 0 },
3108 { CODE_FOR_fetch_and_nand_di
, "__sync_fetch_and_nand_di", IA64_BUILTIN_FETCH_AND_NAND_DI
, 0, 0 },
3109 { CODE_FOR_add_and_fetch_di
, "__sync_add_and_fetch_di", IA64_BUILTIN_ADD_AND_FETCH_DI
, 0, 0 },
3110 { CODE_FOR_sub_and_fetch_di
, "__sync_sub_and_fetch_di", IA64_BUILTIN_SUB_AND_FETCH_DI
, 0, 0 },
3111 { CODE_FOR_or_and_fetch_di
, "__sync_or_and_fetch_di", IA64_BUILTIN_OR_AND_FETCH_DI
, 0, 0 },
3112 { CODE_FOR_and_and_fetch_di
, "__sync_and_and_fetch_di", IA64_BUILTIN_AND_AND_FETCH_DI
, 0, 0 },
3113 { CODE_FOR_xor_and_fetch_di
, "__sync_xor_and_fetch_di", IA64_BUILTIN_XOR_AND_FETCH_DI
, 0, 0 },
3114 { CODE_FOR_nand_and_fetch_di
, "__sync_nand_and_fetch_di", IA64_BUILTIN_NAND_AND_FETCH_DI
, 0, 0 }
3118 ia64_init_builtins ()
3121 struct builtin_description
*d
;
3123 tree psi_type_node
= build_pointer_type (integer_type_node
);
3124 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
3125 tree endlink
= tree_cons (NULL_TREE
, void_type_node
, NULL_TREE
);
3128 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
3129 tree si_ftype_psi_si_si
3130 = build_function_type (integer_type_node
,
3131 tree_cons (NULL_TREE
, psi_type_node
,
3132 tree_cons (NULL_TREE
, integer_type_node
,
3133 tree_cons (NULL_TREE
, integer_type_node
,
3136 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
3137 tree di_ftype_pdi_di_di
3138 = build_function_type (long_integer_type_node
,
3139 tree_cons (NULL_TREE
, pdi_type_node
,
3140 tree_cons (NULL_TREE
, long_integer_type_node
,
3141 tree_cons (NULL_TREE
, long_integer_type_node
,
3143 /* __sync_synchronize */
3144 tree void_ftype_void
3145 = build_function_type (void_type_node
, endlink
);
3147 /* __sync_lock_test_and_set_si */
3148 tree si_ftype_psi_si
3149 = build_function_type (integer_type_node
,
3150 tree_cons (NULL_TREE
, psi_type_node
,
3151 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
3153 /* __sync_lock_test_and_set_di */
3154 tree di_ftype_pdi_di
3155 = build_function_type (long_integer_type_node
,
3156 tree_cons (NULL_TREE
, pdi_type_node
,
3157 tree_cons (NULL_TREE
, long_integer_type_node
, endlink
)));
3159 /* __sync_lock_release_si */
3161 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
, endlink
));
3163 /* __sync_lock_release_di */
3165 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
, endlink
));
3167 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
3169 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
3171 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
3173 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
3175 def_builtin ("__sync_synchronize", void_ftype_void
, IA64_BUILTIN_SYNCHRONIZE
);
3177 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
, IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
3179 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
, IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
3181 def_builtin ("__sync_lock_release_si", void_ftype_psi
, IA64_BUILTIN_LOCK_RELEASE_SI
);
3183 def_builtin ("__sync_lock_release_di", void_ftype_pdi
, IA64_BUILTIN_LOCK_RELEASE_DI
);
3185 /* Add all builtins that are operations on two args. */
3186 for (i
=0, d
= bdesc_2argsi
; i
< sizeof(bdesc_2argsi
) / sizeof *d
; i
++, d
++)
3187 def_builtin (d
->name
, si_ftype_psi_si
, d
->code
);
3188 for (i
=0, d
= bdesc_2argdi
; i
< sizeof(bdesc_2argdi
) / sizeof *d
; i
++, d
++)
3189 def_builtin (d
->name
, di_ftype_pdi_di
, d
->code
);
3192 /* Expand fetch_and_op intrinsics. The basic code sequence is:
3195 ldsz return = [ptr];
3201 cmpxchgsz.acq tmp = [ptr], tmp
3202 cmpxchgsz.acq tmp = [ptr], tmp
3203 } while (tmp != oldval)
3206 ia64_expand_fetch_and_op (code
, mode
, operands
)
3207 enum fetchop_code code
;
3208 enum machine_mode mode
;
3211 rtx oldval
, newlabel
;
3212 rtx tmp_reg
= gen_rtx_REG (mode
, GR_REG(0));
3213 rtx mfreg
= gen_rtx_MEM (BLKmode
, tmp_reg
);
3214 RTX_UNCHANGING_P (mfreg
) = 1;
3215 emit_insn (gen_mf (mfreg
));
3216 tmp_reg
= gen_reg_rtx (mode
);
3217 oldval
= gen_reg_rtx (mode
);
3221 emit_insn (gen_movsi (operands
[0], operands
[1]));
3222 emit_insn (gen_movsi (tmp_reg
, operands
[0]));
3226 emit_insn (gen_movdi (operands
[0], operands
[1]));
3227 emit_insn (gen_movdi (tmp_reg
, operands
[0]));
3230 newlabel
= gen_label_rtx ();
3231 emit_label (newlabel
);
3234 emit_insn (gen_movsi (oldval
, tmp_reg
));
3235 emit_insn (gen_ccv_restore_si (tmp_reg
));
3239 emit_insn (gen_movdi (oldval
, tmp_reg
));
3240 emit_insn (gen_ccv_restore_di (tmp_reg
));
3243 /* Perform the specific operation. */
3249 if (GET_CODE (operands
[2]) == CONST_INT
)
3250 reg
= gen_reg_rtx (mode
);
3255 if (reg
!= operands
[2])
3256 emit_insn (gen_movsi (reg
, operands
[2]));
3257 emit_insn (gen_addsi3 (tmp_reg
, tmp_reg
, reg
));
3261 if (reg
!= operands
[2])
3262 emit_insn (gen_movdi (reg
, operands
[2]));
3263 emit_insn (gen_adddi3 (tmp_reg
, tmp_reg
, reg
));
3270 emit_insn (gen_subsi3 (tmp_reg
, tmp_reg
, operands
[2]));
3272 emit_insn (gen_subdi3 (tmp_reg
, tmp_reg
, operands
[2]));
3276 emit_insn (gen_iordi3 (tmp_reg
, tmp_reg
, operands
[2]));
3280 emit_insn (gen_anddi3 (tmp_reg
, tmp_reg
, operands
[2]));
3284 emit_insn (gen_xordi3 (tmp_reg
, tmp_reg
, operands
[2]));
3288 emit_insn (gen_anddi3 (tmp_reg
, tmp_reg
, operands
[2]));
3290 emit_insn (gen_one_cmplsi2 (tmp_reg
, operands
[0]));
3292 emit_insn (gen_one_cmpldi2 (tmp_reg
, operands
[0]));
3300 emit_insn (gen_cmpxchg_acq_si (tmp_reg
, operands
[1], tmp_reg
));
3302 emit_insn (gen_cmpxchg_acq_di (tmp_reg
, operands
[1], tmp_reg
));
3304 emit_cmp_and_jump_insns (tmp_reg
, oldval
, NE
, 0, mode
, 1, 0, newlabel
);
3307 /* Expand op_and_fetch intrinsics. The basic code sequence is:
3310 ldsz return = [ptr];
3314 return = tmp + value;
3315 cmpxchgsz.acq tmp = [ptr], return
3316 } while (tmp != oldval)
3319 ia64_expand_op_and_fetch (code
, mode
, operands
)
3320 enum fetchop_code code
;
3321 enum machine_mode mode
;
3324 rtx oldval
, newlabel
;
3325 rtx tmp_reg
, tmp2_reg
= gen_rtx_REG (mode
, GR_REG(0));
3326 rtx mfreg
= gen_rtx_MEM (BLKmode
, tmp2_reg
);
3327 RTX_UNCHANGING_P (mfreg
) = 1;
3329 emit_insn (gen_mf (mfreg
));
3330 tmp_reg
= gen_reg_rtx (mode
);
3332 emit_insn (gen_movsi (tmp_reg
, operands
[1]));
3334 emit_insn (gen_movdi (tmp_reg
, operands
[1]));
3336 newlabel
= gen_label_rtx ();
3337 emit_label (newlabel
);
3338 oldval
= gen_reg_rtx (mode
);
3341 emit_insn (gen_movsi (oldval
, tmp_reg
));
3342 emit_insn (gen_ccv_restore_si (tmp_reg
));
3346 emit_insn (gen_movdi (oldval
, tmp_reg
));
3347 emit_insn (gen_ccv_restore_di (tmp_reg
));
3350 /* Perform the specific operation. */
3355 emit_insn (gen_addsi3 (operands
[0], tmp_reg
, operands
[2]));
3357 emit_insn (gen_adddi3 (operands
[0], tmp_reg
, operands
[2]));
3362 emit_insn (gen_subsi3 (operands
[0], tmp_reg
, operands
[2]));
3364 emit_insn (gen_subdi3 (operands
[0], tmp_reg
, operands
[2]));
3368 emit_insn (gen_iordi3 (operands
[0], tmp_reg
, operands
[2]));
3372 emit_insn (gen_anddi3 (operands
[0], tmp_reg
, operands
[2]));
3376 emit_insn (gen_xordi3 (operands
[0], tmp_reg
, operands
[2]));
3380 emit_insn (gen_anddi3 (operands
[0], tmp_reg
, operands
[2]));
3382 emit_insn (gen_one_cmplsi2 (operands
[0], operands
[0]));
3384 emit_insn (gen_one_cmpldi2 (operands
[0], operands
[0]));
3392 emit_insn (gen_cmpxchg_acq_si (tmp_reg
, operands
[1], operands
[0]));
3394 emit_insn (gen_cmpxchg_acq_di (tmp_reg
, operands
[1], operands
[0]));
3396 emit_cmp_and_jump_insns (tmp_reg
, oldval
, NE
, 0, mode
, 1, 0, newlabel
);
3399 /* Expand val_ and bool_compare_and_swap. For val_ we want:
3403 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
3406 For bool_ it's the same except return ret == oldval.
3409 ia64_expand_compare_and_swap (icode
, arglist
, target
, boolcode
)
3410 enum insn_code icode
;
3415 tree arg0
, arg1
, arg2
;
3416 rtx newlabel
, newlabel2
, op0
, op1
, op2
, pat
;
3417 enum machine_mode tmode
, mode0
, mode1
, mode2
;
3419 arg0
= TREE_VALUE (arglist
);
3420 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
3421 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
3422 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
3423 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
3424 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
3425 tmode
= insn_data
[icode
].operand
[0].mode
;
3426 mode0
= insn_data
[icode
].operand
[1].mode
;
3427 mode1
= insn_data
[icode
].operand
[2].mode
;
3428 mode2
= insn_data
[icode
].operand
[3].mode
;
3430 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
3431 RTX_UNCHANGING_P (op0
) = 1;
3432 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
3433 op1
= copy_to_mode_reg (mode1
, op1
);
3434 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
3435 op2
= copy_to_mode_reg (mode2
, op2
);
3437 || GET_MODE (target
) != tmode
3438 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
3439 target
= gen_reg_rtx (tmode
);
3441 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
3447 if (tmode
== SImode
)
3449 emit_insn (gen_cmpsi (target
, op1
));
3450 emit_insn (gen_seq (gen_lowpart (DImode
, target
)));
3454 emit_insn (gen_cmpdi (target
, op1
));
3455 emit_insn (gen_seq (target
));
3461 /* Expand all intrinsics that take 2 arguments. */
3463 ia64_expand_binop_builtin (icode
, arglist
, target
)
3464 enum insn_code icode
;
3469 tree arg0
= TREE_VALUE (arglist
);
3470 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
3471 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
3472 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
3473 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
3474 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
3475 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
3478 || GET_MODE (target
) != tmode
3479 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
3480 target
= gen_reg_rtx (tmode
);
3482 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
3483 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
3484 op1
= copy_to_mode_reg (mode1
, op1
);
3486 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
3494 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
3498 enum machine_mode mode
;
3501 rtx op0
, op1
, op2
, op3
, pat
;
3503 rtx newlabel
, newlabel2
;
3504 tree arg0
, arg1
, arg2
, arg3
;
3505 tree arglist
= TREE_OPERAND (exp
, 1);
3506 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
3507 int fcode
= DECL_FUNCTION_CODE (fndecl
);
3508 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
;
3509 enum insn_code icode
;
3512 struct builtin_description
*d
;
3516 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
3517 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si
, arglist
, target
, 1);
3518 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
3519 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si
, arglist
, target
, 0);
3520 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
3521 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di
, arglist
, target
, 1);
3522 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
3523 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di
, arglist
, target
, 0);
3524 case IA64_BUILTIN_SYNCHRONIZE
:
3525 /* Pass a volatile memory operand. */
3526 tmp_reg
= gen_rtx_REG (DImode
, GR_REG(0));
3527 target
= gen_rtx_MEM (BLKmode
, tmp_reg
);
3528 emit_insn (gen_mf (target
));
3531 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
3532 icode
= CODE_FOR_lock_test_and_set_si
;
3533 arg0
= TREE_VALUE (arglist
);
3534 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
3535 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
3536 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
3537 tmode
= insn_data
[icode
].operand
[0].mode
;
3538 mode0
= insn_data
[icode
].operand
[1].mode
;
3539 mode1
= insn_data
[icode
].operand
[2].mode
;
3540 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
3541 RTX_UNCHANGING_P (op0
) = 1;
3542 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
3543 op1
= copy_to_mode_reg (mode1
, op1
);
3545 || GET_MODE (target
) != tmode
3546 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
3547 target
= gen_reg_rtx (tmode
);
3548 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
3554 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
3555 icode
= CODE_FOR_lock_test_and_set_di
;
3556 arg0
= TREE_VALUE (arglist
);
3557 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
3558 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
3559 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
3560 tmode
= insn_data
[icode
].operand
[0].mode
;
3561 mode0
= insn_data
[icode
].operand
[1].mode
;
3562 mode1
= insn_data
[icode
].operand
[2].mode
;
3563 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
3564 RTX_UNCHANGING_P (op0
) = 1;
3565 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
3566 op1
= copy_to_mode_reg (mode1
, op1
);
3568 || GET_MODE (target
) != tmode
3569 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
3570 target
= gen_reg_rtx (tmode
);
3571 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
3577 case IA64_BUILTIN_LOCK_RELEASE_SI
:
3578 arg0
= TREE_VALUE (arglist
);
3579 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
3580 op0
= gen_rtx_MEM (SImode
, copy_to_mode_reg (Pmode
, op0
));
3581 MEM_VOLATILE_P (op0
) = 1;
3582 emit_insn (gen_movsi (op0
, GEN_INT(0)));
3585 case IA64_BUILTIN_LOCK_RELEASE_DI
:
3586 arg0
= TREE_VALUE (arglist
);
3587 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
3588 op0
= gen_rtx_MEM (DImode
, copy_to_mode_reg (Pmode
, op0
));
3589 MEM_VOLATILE_P (op0
) = 1;
3590 emit_insn (gen_movdi (op0
, GEN_INT(0)));
3597 /* Expand all 32 bit intrinsics that take 2 arguments. */
3598 for (i
=0, d
= bdesc_2argsi
; i
< sizeof (bdesc_2argsi
) / sizeof *d
; i
++, d
++)
3599 if (d
->code
== fcode
)
3600 return ia64_expand_binop_builtin (d
->icode
, arglist
, target
);
3602 /* Expand all 64 bit intrinsics that take 2 arguments. */
3603 for (i
=0, d
= bdesc_2argdi
; i
< sizeof (bdesc_2argdi
) / sizeof *d
; i
++, d
++)
3604 if (d
->code
== fcode
)
3605 return ia64_expand_binop_builtin (d
->icode
, arglist
, target
);