1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
43 #include "sched-int.h"
46 #include "target-def.h"
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label
= 0;
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def
* ia64_compare_op0
;
56 struct rtx_def
* ia64_compare_op1
;
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers
[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names
[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names
[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names
[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string
;
97 /* Determines whether we use adds, addl, or movl to generate our
98 TLS immediate offsets. */
99 int ia64_tls_size
= 22;
101 /* String used with the -mtls-size= option. */
102 const char *ia64_tls_size_string
;
104 /* Determines whether we run our final scheduling pass or not. We always
105 avoid the normal second scheduling pass. */
106 static int ia64_flag_schedule_insns2
;
108 /* Variables which are this size or smaller are put in the sdata/sbss
111 unsigned int ia64_section_threshold
;
113 static rtx gen_tls_get_addr
PARAMS ((void));
114 static rtx gen_thread_pointer
PARAMS ((void));
115 static int find_gr_spill
PARAMS ((int));
116 static int next_scratch_gr_reg
PARAMS ((void));
117 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
118 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
119 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
120 static void finish_spill_pointers
PARAMS ((void));
121 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
122 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
123 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
124 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
125 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
126 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
128 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
129 static void fix_range
PARAMS ((const char *));
130 static struct machine_function
* ia64_init_machine_status
PARAMS ((void));
131 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
132 static void emit_all_insn_group_barriers
PARAMS ((FILE *, rtx
));
133 static void emit_predicate_relation_info
PARAMS ((void));
134 static bool ia64_in_small_data_p
PARAMS ((tree
));
135 static void ia64_encode_section_info
PARAMS ((tree
, int));
136 static const char *ia64_strip_name_encoding
PARAMS ((const char *));
137 static void process_epilogue
PARAMS ((void));
138 static int process_set
PARAMS ((FILE *, rtx
));
140 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
142 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
144 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
146 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
148 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
149 static bool ia64_assemble_integer
PARAMS ((rtx
, unsigned int, int));
150 static void ia64_output_function_prologue
PARAMS ((FILE *, HOST_WIDE_INT
));
151 static void ia64_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
152 static void ia64_output_function_end_prologue
PARAMS ((FILE *));
154 static int ia64_issue_rate
PARAMS ((void));
155 static int ia64_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
156 static void ia64_sched_init
PARAMS ((FILE *, int, int));
157 static void ia64_sched_finish
PARAMS ((FILE *, int));
158 static int ia64_internal_sched_reorder
PARAMS ((FILE *, int, rtx
*,
160 static int ia64_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
161 static int ia64_sched_reorder2
PARAMS ((FILE *, int, rtx
*, int *, int));
162 static int ia64_variable_issue
PARAMS ((FILE *, int, rtx
, int));
164 static void ia64_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
165 unsigned HOST_WIDE_INT
));
166 static void ia64_aix_select_section
PARAMS ((tree
, int,
167 unsigned HOST_WIDE_INT
))
169 static void ia64_aix_unique_section
PARAMS ((tree
, int))
171 static void ia64_aix_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
172 unsigned HOST_WIDE_INT
))
175 /* Table of valid machine attributes. */
176 static const struct attribute_spec ia64_attribute_table
[] =
178 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
179 { "syscall_linkage", 0, 0, false, true, true, NULL
},
180 { NULL
, 0, 0, false, false, false, NULL
}
183 /* Initialize the GCC target structure. */
184 #undef TARGET_ATTRIBUTE_TABLE
185 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
187 #undef TARGET_INIT_BUILTINS
188 #define TARGET_INIT_BUILTINS ia64_init_builtins
190 #undef TARGET_EXPAND_BUILTIN
191 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
193 #undef TARGET_ASM_BYTE_OP
194 #define TARGET_ASM_BYTE_OP "\tdata1\t"
195 #undef TARGET_ASM_ALIGNED_HI_OP
196 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
197 #undef TARGET_ASM_ALIGNED_SI_OP
198 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
199 #undef TARGET_ASM_ALIGNED_DI_OP
200 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
201 #undef TARGET_ASM_UNALIGNED_HI_OP
202 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
203 #undef TARGET_ASM_UNALIGNED_SI_OP
204 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
205 #undef TARGET_ASM_UNALIGNED_DI_OP
206 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
207 #undef TARGET_ASM_INTEGER
208 #define TARGET_ASM_INTEGER ia64_assemble_integer
210 #undef TARGET_ASM_FUNCTION_PROLOGUE
211 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
212 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
213 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
214 #undef TARGET_ASM_FUNCTION_EPILOGUE
215 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
217 #undef TARGET_IN_SMALL_DATA_P
218 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
219 #undef TARGET_ENCODE_SECTION_INFO
220 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
221 #undef TARGET_STRIP_NAME_ENCODING
222 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
224 #undef TARGET_SCHED_ADJUST_COST
225 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
226 #undef TARGET_SCHED_ISSUE_RATE
227 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
228 #undef TARGET_SCHED_VARIABLE_ISSUE
229 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
230 #undef TARGET_SCHED_INIT
231 #define TARGET_SCHED_INIT ia64_sched_init
232 #undef TARGET_SCHED_FINISH
233 #define TARGET_SCHED_FINISH ia64_sched_finish
234 #undef TARGET_SCHED_REORDER
235 #define TARGET_SCHED_REORDER ia64_sched_reorder
236 #undef TARGET_SCHED_REORDER2
237 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
240 #undef TARGET_HAVE_TLS
241 #define TARGET_HAVE_TLS true
244 struct gcc_target targetm
= TARGET_INITIALIZER
;
246 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
249 call_operand (op
, mode
)
251 enum machine_mode mode
;
253 if (mode
!= GET_MODE (op
))
256 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
257 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
260 /* Return 1 if OP refers to a symbol in the sdata section. */
263 sdata_symbolic_operand (op
, mode
)
265 enum machine_mode mode ATTRIBUTE_UNUSED
;
267 switch (GET_CODE (op
))
270 if (GET_CODE (XEXP (op
, 0)) != PLUS
271 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
273 op
= XEXP (XEXP (op
, 0), 0);
277 if (CONSTANT_POOL_ADDRESS_P (op
))
278 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
281 const char *str
= XSTR (op
, 0);
282 return (str
[0] == ENCODE_SECTION_INFO_CHAR
&& str
[1] == 's');
292 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
295 got_symbolic_operand (op
, mode
)
297 enum machine_mode mode ATTRIBUTE_UNUSED
;
299 switch (GET_CODE (op
))
303 if (GET_CODE (op
) != PLUS
)
305 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
308 if (GET_CODE (op
) != CONST_INT
)
313 /* Ok if we're not using GOT entries at all. */
314 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
317 /* "Ok" while emitting rtl, since otherwise we won't be provided
318 with the entire offset during emission, which makes it very
319 hard to split the offset into high and low parts. */
320 if (rtx_equal_function_value_matters
)
323 /* Force the low 14 bits of the constant to zero so that we do not
324 use up so many GOT entries. */
325 return (INTVAL (op
) & 0x3fff) == 0;
337 /* Return 1 if OP refers to a symbol. */
340 symbolic_operand (op
, mode
)
342 enum machine_mode mode ATTRIBUTE_UNUSED
;
344 switch (GET_CODE (op
))
357 /* Return tls_model if OP refers to a TLS symbol. */
360 tls_symbolic_operand (op
, mode
)
362 enum machine_mode mode ATTRIBUTE_UNUSED
;
366 if (GET_CODE (op
) != SYMBOL_REF
)
369 if (str
[0] != ENCODE_SECTION_INFO_CHAR
)
374 return TLS_MODEL_GLOBAL_DYNAMIC
;
376 return TLS_MODEL_LOCAL_DYNAMIC
;
378 return TLS_MODEL_INITIAL_EXEC
;
380 return TLS_MODEL_LOCAL_EXEC
;
386 /* Return 1 if OP refers to a function. */
389 function_operand (op
, mode
)
391 enum machine_mode mode ATTRIBUTE_UNUSED
;
393 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
399 /* Return 1 if OP is setjmp or a similar function. */
401 /* ??? This is an unsatisfying solution. Should rethink. */
404 setjmp_operand (op
, mode
)
406 enum machine_mode mode ATTRIBUTE_UNUSED
;
411 if (GET_CODE (op
) != SYMBOL_REF
)
416 /* The following code is borrowed from special_function_p in calls.c. */
418 /* Disregard prefix _, __ or __x. */
421 if (name
[1] == '_' && name
[2] == 'x')
423 else if (name
[1] == '_')
433 && (! strcmp (name
, "setjmp")
434 || ! strcmp (name
, "setjmp_syscall")))
436 && ! strcmp (name
, "sigsetjmp"))
438 && ! strcmp (name
, "savectx")));
440 else if ((name
[0] == 'q' && name
[1] == 's'
441 && ! strcmp (name
, "qsetjmp"))
442 || (name
[0] == 'v' && name
[1] == 'f'
443 && ! strcmp (name
, "vfork")))
449 /* Return 1 if OP is a general operand, but when pic exclude symbolic
452 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
453 from PREDICATE_CODES. */
456 move_operand (op
, mode
)
458 enum machine_mode mode
;
460 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
463 return general_operand (op
, mode
);
466 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
469 gr_register_operand (op
, mode
)
471 enum machine_mode mode
;
473 if (! register_operand (op
, mode
))
475 if (GET_CODE (op
) == SUBREG
)
476 op
= SUBREG_REG (op
);
477 if (GET_CODE (op
) == REG
)
479 unsigned int regno
= REGNO (op
);
480 if (regno
< FIRST_PSEUDO_REGISTER
)
481 return GENERAL_REGNO_P (regno
);
486 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
489 fr_register_operand (op
, mode
)
491 enum machine_mode mode
;
493 if (! register_operand (op
, mode
))
495 if (GET_CODE (op
) == SUBREG
)
496 op
= SUBREG_REG (op
);
497 if (GET_CODE (op
) == REG
)
499 unsigned int regno
= REGNO (op
);
500 if (regno
< FIRST_PSEUDO_REGISTER
)
501 return FR_REGNO_P (regno
);
506 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
509 grfr_register_operand (op
, mode
)
511 enum machine_mode mode
;
513 if (! register_operand (op
, mode
))
515 if (GET_CODE (op
) == SUBREG
)
516 op
= SUBREG_REG (op
);
517 if (GET_CODE (op
) == REG
)
519 unsigned int regno
= REGNO (op
);
520 if (regno
< FIRST_PSEUDO_REGISTER
)
521 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
526 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
529 gr_nonimmediate_operand (op
, mode
)
531 enum machine_mode mode
;
533 if (! nonimmediate_operand (op
, mode
))
535 if (GET_CODE (op
) == SUBREG
)
536 op
= SUBREG_REG (op
);
537 if (GET_CODE (op
) == REG
)
539 unsigned int regno
= REGNO (op
);
540 if (regno
< FIRST_PSEUDO_REGISTER
)
541 return GENERAL_REGNO_P (regno
);
546 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
549 fr_nonimmediate_operand (op
, mode
)
551 enum machine_mode mode
;
553 if (! nonimmediate_operand (op
, mode
))
555 if (GET_CODE (op
) == SUBREG
)
556 op
= SUBREG_REG (op
);
557 if (GET_CODE (op
) == REG
)
559 unsigned int regno
= REGNO (op
);
560 if (regno
< FIRST_PSEUDO_REGISTER
)
561 return FR_REGNO_P (regno
);
566 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
569 grfr_nonimmediate_operand (op
, mode
)
571 enum machine_mode mode
;
573 if (! nonimmediate_operand (op
, mode
))
575 if (GET_CODE (op
) == SUBREG
)
576 op
= SUBREG_REG (op
);
577 if (GET_CODE (op
) == REG
)
579 unsigned int regno
= REGNO (op
);
580 if (regno
< FIRST_PSEUDO_REGISTER
)
581 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
586 /* Return 1 if OP is a GR register operand, or zero. */
589 gr_reg_or_0_operand (op
, mode
)
591 enum machine_mode mode
;
593 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
596 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
599 gr_reg_or_5bit_operand (op
, mode
)
601 enum machine_mode mode
;
603 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
604 || GET_CODE (op
) == CONSTANT_P_RTX
605 || gr_register_operand (op
, mode
));
608 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
611 gr_reg_or_6bit_operand (op
, mode
)
613 enum machine_mode mode
;
615 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
616 || GET_CODE (op
) == CONSTANT_P_RTX
617 || gr_register_operand (op
, mode
));
620 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
623 gr_reg_or_8bit_operand (op
, mode
)
625 enum machine_mode mode
;
627 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
628 || GET_CODE (op
) == CONSTANT_P_RTX
629 || gr_register_operand (op
, mode
));
632 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
635 grfr_reg_or_8bit_operand (op
, mode
)
637 enum machine_mode mode
;
639 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
640 || GET_CODE (op
) == CONSTANT_P_RTX
641 || grfr_register_operand (op
, mode
));
644 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
648 gr_reg_or_8bit_adjusted_operand (op
, mode
)
650 enum machine_mode mode
;
652 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
653 || GET_CODE (op
) == CONSTANT_P_RTX
654 || gr_register_operand (op
, mode
));
657 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
658 immediate and an 8 bit adjusted immediate operand. This is necessary
659 because when we emit a compare, we don't know what the condition will be,
660 so we need the union of the immediates accepted by GT and LT. */
663 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
665 enum machine_mode mode
;
667 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
668 && CONST_OK_FOR_L (INTVAL (op
)))
669 || GET_CODE (op
) == CONSTANT_P_RTX
670 || gr_register_operand (op
, mode
));
673 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
676 gr_reg_or_14bit_operand (op
, mode
)
678 enum machine_mode mode
;
680 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
681 || GET_CODE (op
) == CONSTANT_P_RTX
682 || gr_register_operand (op
, mode
));
685 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
688 gr_reg_or_22bit_operand (op
, mode
)
690 enum machine_mode mode
;
692 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
693 || GET_CODE (op
) == CONSTANT_P_RTX
694 || gr_register_operand (op
, mode
));
697 /* Return 1 if OP is a 6 bit immediate operand. */
700 shift_count_operand (op
, mode
)
702 enum machine_mode mode ATTRIBUTE_UNUSED
;
704 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
705 || GET_CODE (op
) == CONSTANT_P_RTX
);
708 /* Return 1 if OP is a 5 bit immediate operand. */
711 shift_32bit_count_operand (op
, mode
)
713 enum machine_mode mode ATTRIBUTE_UNUSED
;
715 return ((GET_CODE (op
) == CONST_INT
716 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
717 || GET_CODE (op
) == CONSTANT_P_RTX
);
720 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
723 shladd_operand (op
, mode
)
725 enum machine_mode mode ATTRIBUTE_UNUSED
;
727 return (GET_CODE (op
) == CONST_INT
728 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
729 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
732 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
735 fetchadd_operand (op
, mode
)
737 enum machine_mode mode ATTRIBUTE_UNUSED
;
739 return (GET_CODE (op
) == CONST_INT
740 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
741 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
742 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
743 INTVAL (op
) == 8 || INTVAL (op
) == 16));
746 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
749 fr_reg_or_fp01_operand (op
, mode
)
751 enum machine_mode mode
;
753 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
754 || fr_register_operand (op
, mode
));
757 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
758 POST_MODIFY with a REG as displacement. */
761 destination_operand (op
, mode
)
763 enum machine_mode mode
;
765 if (! nonimmediate_operand (op
, mode
))
767 if (GET_CODE (op
) == MEM
768 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
769 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
774 /* Like memory_operand, but don't allow post-increments. */
777 not_postinc_memory_operand (op
, mode
)
779 enum machine_mode mode
;
781 return (memory_operand (op
, mode
)
782 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
785 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
786 signed immediate operand. */
789 normal_comparison_operator (op
, mode
)
791 enum machine_mode mode
;
793 enum rtx_code code
= GET_CODE (op
);
794 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
795 && (code
== EQ
|| code
== NE
796 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
799 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
800 signed immediate operand. */
803 adjusted_comparison_operator (op
, mode
)
805 enum machine_mode mode
;
807 enum rtx_code code
= GET_CODE (op
);
808 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
809 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
812 /* Return 1 if this is a signed inequality operator. */
815 signed_inequality_operator (op
, mode
)
817 enum machine_mode mode
;
819 enum rtx_code code
= GET_CODE (op
);
820 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
821 && (code
== GE
|| code
== GT
822 || code
== LE
|| code
== LT
));
825 /* Return 1 if this operator is valid for predication. */
828 predicate_operator (op
, mode
)
830 enum machine_mode mode
;
832 enum rtx_code code
= GET_CODE (op
);
833 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
834 && (code
== EQ
|| code
== NE
));
837 /* Return 1 if this operator can be used in a conditional operation. */
840 condop_operator (op
, mode
)
842 enum machine_mode mode
;
844 enum rtx_code code
= GET_CODE (op
);
845 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
846 && (code
== PLUS
|| code
== MINUS
|| code
== AND
847 || code
== IOR
|| code
== XOR
));
850 /* Return 1 if this is the ar.lc register. */
853 ar_lc_reg_operand (op
, mode
)
855 enum machine_mode mode
;
857 return (GET_MODE (op
) == DImode
858 && (mode
== DImode
|| mode
== VOIDmode
)
859 && GET_CODE (op
) == REG
860 && REGNO (op
) == AR_LC_REGNUM
);
863 /* Return 1 if this is the ar.ccv register. */
866 ar_ccv_reg_operand (op
, mode
)
868 enum machine_mode mode
;
870 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
871 && GET_CODE (op
) == REG
872 && REGNO (op
) == AR_CCV_REGNUM
);
875 /* Return 1 if this is the ar.pfs register. */
878 ar_pfs_reg_operand (op
, mode
)
880 enum machine_mode mode
;
882 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
883 && GET_CODE (op
) == REG
884 && REGNO (op
) == AR_PFS_REGNUM
);
887 /* Like general_operand, but don't allow (mem (addressof)). */
890 general_tfmode_operand (op
, mode
)
892 enum machine_mode mode
;
894 if (! general_operand (op
, mode
))
896 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
904 destination_tfmode_operand (op
, mode
)
906 enum machine_mode mode
;
908 if (! destination_operand (op
, mode
))
910 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
918 tfreg_or_fp01_operand (op
, mode
)
920 enum machine_mode mode
;
922 if (GET_CODE (op
) == SUBREG
)
924 return fr_reg_or_fp01_operand (op
, mode
);
927 /* Return 1 if OP is valid as a base register in a reg + offset address. */
930 basereg_operand (op
, mode
)
932 enum machine_mode mode
;
934 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
935 checks from pa.c basereg_operand as well? Seems to be OK without them
938 return (register_operand (op
, mode
) &&
939 REG_POINTER ((GET_CODE (op
) == SUBREG
) ? SUBREG_REG (op
) : op
));
942 /* Return 1 if the operands of a move are ok. */
945 ia64_move_ok (dst
, src
)
948 /* If we're under init_recog_no_volatile, we'll not be able to use
949 memory_operand. So check the code directly and don't worry about
950 the validity of the underlying address, which should have been
951 checked elsewhere anyway. */
952 if (GET_CODE (dst
) != MEM
)
954 if (GET_CODE (src
) == MEM
)
956 if (register_operand (src
, VOIDmode
))
959 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
960 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
961 return src
== const0_rtx
;
963 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
966 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
967 Return the length of the field, or <= 0 on failure. */
970 ia64_depz_field_mask (rop
, rshift
)
973 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
974 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
976 /* Get rid of the zero bits we're shifting in. */
979 /* We must now have a solid block of 1's at bit 0. */
980 return exact_log2 (op
+ 1);
983 /* Expand a symbolic constant load. */
984 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
987 ia64_expand_load_address (dest
, src
, scratch
)
988 rtx dest
, src
, scratch
;
992 /* The destination could be a MEM during initial rtl generation,
993 which isn't a valid destination for the PIC load address patterns. */
994 if (! register_operand (dest
, DImode
))
995 temp
= gen_reg_rtx (DImode
);
999 if (tls_symbolic_operand (src
, Pmode
))
1002 if (TARGET_AUTO_PIC
)
1003 emit_insn (gen_load_gprel64 (temp
, src
));
1004 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
1005 emit_insn (gen_load_fptr (temp
, src
));
1006 else if (sdata_symbolic_operand (src
, DImode
))
1007 emit_insn (gen_load_gprel (temp
, src
));
1008 else if (GET_CODE (src
) == CONST
1009 && GET_CODE (XEXP (src
, 0)) == PLUS
1010 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
1011 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
1013 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
1014 rtx sym
= XEXP (XEXP (src
, 0), 0);
1015 HOST_WIDE_INT ofs
, hi
, lo
;
1017 /* Split the offset into a sign extended 14-bit low part
1018 and a complementary high part. */
1019 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
1020 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
1024 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
1026 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
1028 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
1034 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
1036 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
1037 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
1041 emit_move_insn (dest
, temp
);
1044 static GTY(()) rtx gen_tls_tga
;
1050 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1055 static GTY(()) rtx thread_pointer_rtx
;
1057 gen_thread_pointer ()
1059 if (!thread_pointer_rtx
)
1061 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1062 RTX_UNCHANGING_P (thread_pointer_rtx
);
1068 ia64_expand_move (op0
, op1
)
1071 enum machine_mode mode
= GET_MODE (op0
);
1073 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1074 op1
= force_reg (mode
, op1
);
1078 enum tls_model tls_kind
;
1079 if ((tls_kind
= tls_symbolic_operand (op1
, Pmode
)))
1081 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
1085 case TLS_MODEL_GLOBAL_DYNAMIC
:
1088 tga_op1
= gen_reg_rtx (Pmode
);
1089 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
1090 tga_op1
= gen_rtx_MEM (Pmode
, tga_op1
);
1091 RTX_UNCHANGING_P (tga_op1
) = 1;
1093 tga_op2
= gen_reg_rtx (Pmode
);
1094 emit_insn (gen_load_ltoff_dtprel (tga_op2
, op1
));
1095 tga_op2
= gen_rtx_MEM (Pmode
, tga_op2
);
1096 RTX_UNCHANGING_P (tga_op2
) = 1;
1098 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1099 LCT_CONST
, Pmode
, 2, tga_op1
,
1100 Pmode
, tga_op2
, Pmode
);
1102 insns
= get_insns ();
1105 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1108 case TLS_MODEL_LOCAL_DYNAMIC
:
1109 /* ??? This isn't the completely proper way to do local-dynamic
1110 If the call to __tls_get_addr is used only by a single symbol,
1111 then we should (somehow) move the dtprel to the second arg
1112 to avoid the extra add. */
1115 tga_op1
= gen_reg_rtx (Pmode
);
1116 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
1117 tga_op1
= gen_rtx_MEM (Pmode
, tga_op1
);
1118 RTX_UNCHANGING_P (tga_op1
) = 1;
1120 tga_op2
= const0_rtx
;
1122 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1123 LCT_CONST
, Pmode
, 2, tga_op1
,
1124 Pmode
, tga_op2
, Pmode
);
1126 insns
= get_insns ();
1129 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1131 tmp
= gen_reg_rtx (Pmode
);
1132 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1134 if (register_operand (op0
, Pmode
))
1137 tga_ret
= gen_reg_rtx (Pmode
);
1140 emit_insn (gen_load_dtprel (tga_ret
, op1
));
1141 emit_insn (gen_adddi3 (tga_ret
, tmp
, tga_ret
));
1144 emit_insn (gen_add_dtprel (tga_ret
, tmp
, op1
));
1150 case TLS_MODEL_INITIAL_EXEC
:
1151 tmp
= gen_reg_rtx (Pmode
);
1152 emit_insn (gen_load_ltoff_tprel (tmp
, op1
));
1153 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1154 RTX_UNCHANGING_P (tmp
) = 1;
1155 tmp
= force_reg (Pmode
, tmp
);
1157 if (register_operand (op0
, Pmode
))
1160 op1
= gen_reg_rtx (Pmode
);
1161 emit_insn (gen_adddi3 (op1
, tmp
, gen_thread_pointer ()));
1166 case TLS_MODEL_LOCAL_EXEC
:
1167 if (register_operand (op0
, Pmode
))
1170 tmp
= gen_reg_rtx (Pmode
);
1173 emit_insn (gen_load_tprel (tmp
, op1
));
1174 emit_insn (gen_adddi3 (tmp
, gen_thread_pointer (), tmp
));
1177 emit_insn (gen_add_tprel (tmp
, gen_thread_pointer (), op1
));
1187 else if (!TARGET_NO_PIC
&& symbolic_operand (op1
, DImode
))
1189 /* Before optimization starts, delay committing to any particular
1190 type of PIC address load. If this function gets deferred, we
1191 may acquire information that changes the value of the
1192 sdata_symbolic_operand predicate.
1194 But don't delay for function pointers. Loading a function address
1195 actually loads the address of the descriptor not the function.
1196 If we represent these as SYMBOL_REFs, then they get cse'd with
1197 calls, and we end up with calls to the descriptor address instead
1198 of calls to the function address. Functions are not candidates
1201 Don't delay for LABEL_REF because the splitter loses REG_LABEL
1202 notes. Don't delay for pool addresses on general principals;
1203 they'll never become non-local behind our back. */
1205 if (rtx_equal_function_value_matters
1206 && GET_CODE (op1
) != LABEL_REF
1207 && ! (GET_CODE (op1
) == SYMBOL_REF
1208 && (SYMBOL_REF_FLAG (op1
)
1209 || CONSTANT_POOL_ADDRESS_P (op1
)
1210 || STRING_POOL_ADDRESS_P (op1
))))
1211 emit_insn (gen_movdi_symbolic (op0
, op1
));
1213 ia64_expand_load_address (op0
, op1
, NULL_RTX
);
1222 ia64_gp_save_reg (setjmp_p
)
1225 rtx save
= cfun
->machine
->ia64_gp_save
;
1229 /* We can't save GP in a pseudo if we are calling setjmp, because
1230 pseudos won't be restored by longjmp. For now, we save it in r4. */
1231 /* ??? It would be more efficient to save this directly into a stack
1232 slot. Unfortunately, the stack slot address gets cse'd across
1233 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1236 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1237 in place, since this rtx is used in exception handling receivers.
1238 Moreover, we must get this rtx out of regno_reg_rtx or reload
1239 will do the wrong thing. */
1240 unsigned int old_regno
= REGNO (save
);
1241 if (setjmp_p
&& old_regno
!= GR_REG (4))
1243 REGNO (save
) = GR_REG (4);
1244 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
1250 save
= gen_rtx_REG (DImode
, GR_REG (4));
1251 else if (! optimize
)
1252 save
= gen_rtx_REG (DImode
, LOC_REG (0));
1254 save
= gen_reg_rtx (DImode
);
1255 cfun
->machine
->ia64_gp_save
= save
;
1261 /* Split a post-reload TImode reference into two DImode components. */
1264 ia64_split_timode (out
, in
, scratch
)
1268 switch (GET_CODE (in
))
1271 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
1272 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1277 rtx base
= XEXP (in
, 0);
1279 switch (GET_CODE (base
))
1282 out
[0] = adjust_address (in
, DImode
, 0);
1285 base
= XEXP (base
, 0);
1286 out
[0] = adjust_address (in
, DImode
, 0);
1289 /* Since we're changing the mode, we need to change to POST_MODIFY
1290 as well to preserve the size of the increment. Either that or
1291 do the update in two steps, but we've already got this scratch
1292 register handy so let's use it. */
1294 base
= XEXP (base
, 0);
1296 = change_address (in
, DImode
,
1298 (Pmode
, base
, plus_constant (base
, 16)));
1301 base
= XEXP (base
, 0);
1303 = change_address (in
, DImode
,
1305 (Pmode
, base
, plus_constant (base
, -16)));
1311 if (scratch
== NULL_RTX
)
1313 out
[1] = change_address (in
, DImode
, scratch
);
1314 return gen_adddi3 (scratch
, base
, GEN_INT (8));
1319 split_double (in
, &out
[0], &out
[1]);
1327 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1328 through memory plus an extra GR scratch register. Except that you can
1329 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1330 SECONDARY_RELOAD_CLASS, but not both.
1332 We got into problems in the first place by allowing a construct like
1333 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1334 This solution attempts to prevent this situation from occurring. When
1335 we see something like the above, we spill the inner register to memory. */
1338 spill_tfmode_operand (in
, force
)
1342 if (GET_CODE (in
) == SUBREG
1343 && GET_MODE (SUBREG_REG (in
)) == TImode
1344 && GET_CODE (SUBREG_REG (in
)) == REG
)
1346 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
1347 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1349 else if (force
&& GET_CODE (in
) == REG
)
1351 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
1352 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1354 else if (GET_CODE (in
) == MEM
1355 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
1356 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
1361 /* Emit comparison instruction if necessary, returning the expression
1362 that holds the compare result in the proper mode. */
1365 ia64_expand_compare (code
, mode
)
1367 enum machine_mode mode
;
1369 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1372 /* If we have a BImode input, then we already have a compare result, and
1373 do not need to emit another comparison. */
1374 if (GET_MODE (op0
) == BImode
)
1376 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1383 cmp
= gen_reg_rtx (BImode
);
1384 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1385 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1389 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1392 /* Emit the appropriate sequence for a call. */
1395 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1401 rtx insn
, b0
, pfs
, gp_save
, narg_rtx
, dest
;
1405 addr
= XEXP (addr
, 0);
1406 b0
= gen_rtx_REG (DImode
, R_BR (0));
1407 pfs
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
1411 else if (IN_REGNO_P (REGNO (nextarg
)))
1412 narg
= REGNO (nextarg
) - IN_REG (0);
1414 narg
= REGNO (nextarg
) - OUT_REG (0);
1415 narg_rtx
= GEN_INT (narg
);
1417 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1420 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
, pfs
);
1422 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1424 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1425 emit_call_insn (insn
);
1429 indirect_p
= ! symbolic_operand (addr
, VOIDmode
);
1431 if (sibcall_p
|| (TARGET_CONST_GP
&& !indirect_p
))
1434 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1437 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1439 /* If this is an indirect call, then we have the address of a descriptor. */
1442 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1443 emit_move_insn (pic_offset_table_rtx
,
1444 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1450 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
, pfs
);
1452 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1454 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1455 emit_call_insn (insn
);
1458 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1461 /* Begin the assembly file. */
1464 emit_safe_across_calls (f
)
1467 unsigned int rs
, re
;
1474 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1478 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1482 fputs ("\t.pred.safe_across_calls ", f
);
1488 fprintf (f
, "p%u", rs
);
1490 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1498 /* Structure to be filled in by ia64_compute_frame_size with register
1499 save masks and offsets for the current function. */
1501 struct ia64_frame_info
1503 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1504 the caller's scratch area. */
1505 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1506 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1507 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1508 HARD_REG_SET mask
; /* mask of saved registers. */
1509 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1510 registers or long-term scratches. */
1511 int n_spilled
; /* number of spilled registers. */
1512 int reg_fp
; /* register for fp. */
1513 int reg_save_b0
; /* save register for b0. */
1514 int reg_save_pr
; /* save register for prs. */
1515 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1516 int reg_save_ar_unat
; /* save register for ar.unat. */
1517 int reg_save_ar_lc
; /* save register for ar.lc. */
1518 int n_input_regs
; /* number of input registers used. */
1519 int n_local_regs
; /* number of local registers used. */
1520 int n_output_regs
; /* number of output registers used. */
1521 int n_rotate_regs
; /* number of rotating registers used. */
1523 char need_regstk
; /* true if a .regstk directive needed. */
1524 char initialized
; /* true if the data is finalized. */
1527 /* Current frame information calculated by ia64_compute_frame_size. */
1528 static struct ia64_frame_info current_frame_info
;
1530 /* Helper function for ia64_compute_frame_size: find an appropriate general
1531 register to spill some special register to. SPECIAL_SPILL_MASK contains
1532 bits in GR0 to GR31 that have already been allocated by this routine.
1533 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1536 find_gr_spill (try_locals
)
1541 /* If this is a leaf function, first try an otherwise unused
1542 call-clobbered register. */
1543 if (current_function_is_leaf
)
1545 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1546 if (! regs_ever_live
[regno
]
1547 && call_used_regs
[regno
]
1548 && ! fixed_regs
[regno
]
1549 && ! global_regs
[regno
]
1550 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1552 current_frame_info
.gr_used_mask
|= 1 << regno
;
1559 regno
= current_frame_info
.n_local_regs
;
1560 /* If there is a frame pointer, then we can't use loc79, because
1561 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1562 reg_name switching code in ia64_expand_prologue. */
1563 if (regno
< (80 - frame_pointer_needed
))
1565 current_frame_info
.n_local_regs
= regno
+ 1;
1566 return LOC_REG (0) + regno
;
1570 /* Failed to find a general register to spill to. Must use stack. */
1574 /* In order to make for nice schedules, we try to allocate every temporary
1575 to a different register. We must of course stay away from call-saved,
1576 fixed, and global registers. We must also stay away from registers
1577 allocated in current_frame_info.gr_used_mask, since those include regs
1578 used all through the prologue.
1580 Any register allocated here must be used immediately. The idea is to
1581 aid scheduling, not to solve data flow problems. */
1583 static int last_scratch_gr_reg
;
1586 next_scratch_gr_reg ()
1590 for (i
= 0; i
< 32; ++i
)
1592 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1593 if (call_used_regs
[regno
]
1594 && ! fixed_regs
[regno
]
1595 && ! global_regs
[regno
]
1596 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1598 last_scratch_gr_reg
= regno
;
1603 /* There must be _something_ available. */
1607 /* Helper function for ia64_compute_frame_size, called through
1608 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1611 mark_reg_gr_used_mask (reg
, data
)
1613 void *data ATTRIBUTE_UNUSED
;
1615 unsigned int regno
= REGNO (reg
);
1618 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1619 for (i
= 0; i
< n
; ++i
)
1620 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1624 /* Returns the number of bytes offset between the frame pointer and the stack
1625 pointer for the current function. SIZE is the number of bytes of space
1626 needed for local variables. */
1629 ia64_compute_frame_size (size
)
1632 HOST_WIDE_INT total_size
;
1633 HOST_WIDE_INT spill_size
= 0;
1634 HOST_WIDE_INT extra_spill_size
= 0;
1635 HOST_WIDE_INT pretend_args_size
;
1638 int spilled_gr_p
= 0;
1639 int spilled_fr_p
= 0;
1643 if (current_frame_info
.initialized
)
1646 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1647 CLEAR_HARD_REG_SET (mask
);
1649 /* Don't allocate scratches to the return register. */
1650 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1652 /* Don't allocate scratches to the EH scratch registers. */
1653 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1654 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1655 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1656 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1658 /* Find the size of the register stack frame. We have only 80 local
1659 registers, because we reserve 8 for the inputs and 8 for the
1662 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1663 since we'll be adjusting that down later. */
1664 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1665 for (; regno
>= LOC_REG (0); regno
--)
1666 if (regs_ever_live
[regno
])
1668 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1670 /* For functions marked with the syscall_linkage attribute, we must mark
1671 all eight input registers as in use, so that locals aren't visible to
1674 if (cfun
->machine
->n_varargs
> 0
1675 || lookup_attribute ("syscall_linkage",
1676 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1677 current_frame_info
.n_input_regs
= 8;
1680 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1681 if (regs_ever_live
[regno
])
1683 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1686 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1687 if (regs_ever_live
[regno
])
1689 i
= regno
- OUT_REG (0) + 1;
1691 /* When -p profiling, we need one output register for the mcount argument.
1692 Likwise for -a profiling for the bb_init_func argument. For -ax
1693 profiling, we need two output registers for the two bb_init_trace_func
1695 if (current_function_profile
)
1697 current_frame_info
.n_output_regs
= i
;
1699 /* ??? No rotating register support yet. */
1700 current_frame_info
.n_rotate_regs
= 0;
1702 /* Discover which registers need spilling, and how much room that
1703 will take. Begin with floating point and general registers,
1704 which will always wind up on the stack. */
1706 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1707 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1709 SET_HARD_REG_BIT (mask
, regno
);
1715 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1716 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1718 SET_HARD_REG_BIT (mask
, regno
);
1724 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1725 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1727 SET_HARD_REG_BIT (mask
, regno
);
1732 /* Now come all special registers that might get saved in other
1733 general registers. */
1735 if (frame_pointer_needed
)
1737 current_frame_info
.reg_fp
= find_gr_spill (1);
1738 /* If we did not get a register, then we take LOC79. This is guaranteed
1739 to be free, even if regs_ever_live is already set, because this is
1740 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1741 as we don't count loc79 above. */
1742 if (current_frame_info
.reg_fp
== 0)
1744 current_frame_info
.reg_fp
= LOC_REG (79);
1745 current_frame_info
.n_local_regs
++;
1749 if (! current_function_is_leaf
)
1751 /* Emit a save of BR0 if we call other functions. Do this even
1752 if this function doesn't return, as EH depends on this to be
1753 able to unwind the stack. */
1754 SET_HARD_REG_BIT (mask
, BR_REG (0));
1756 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1757 if (current_frame_info
.reg_save_b0
== 0)
1763 /* Similarly for ar.pfs. */
1764 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1765 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1766 if (current_frame_info
.reg_save_ar_pfs
== 0)
1768 extra_spill_size
+= 8;
1774 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1776 SET_HARD_REG_BIT (mask
, BR_REG (0));
1782 /* Unwind descriptor hackery: things are most efficient if we allocate
1783 consecutive GR save registers for RP, PFS, FP in that order. However,
1784 it is absolutely critical that FP get the only hard register that's
1785 guaranteed to be free, so we allocated it first. If all three did
1786 happen to be allocated hard regs, and are consecutive, rearrange them
1787 into the preferred order now. */
1788 if (current_frame_info
.reg_fp
!= 0
1789 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1790 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1792 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1793 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1794 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1797 /* See if we need to store the predicate register block. */
1798 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1799 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1801 if (regno
<= PR_REG (63))
1803 SET_HARD_REG_BIT (mask
, PR_REG (0));
1804 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1805 if (current_frame_info
.reg_save_pr
== 0)
1807 extra_spill_size
+= 8;
1811 /* ??? Mark them all as used so that register renaming and such
1812 are free to use them. */
1813 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1814 regs_ever_live
[regno
] = 1;
1817 /* If we're forced to use st8.spill, we're forced to save and restore
1819 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1821 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1822 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1823 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1824 if (current_frame_info
.reg_save_ar_unat
== 0)
1826 extra_spill_size
+= 8;
1831 if (regs_ever_live
[AR_LC_REGNUM
])
1833 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1834 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1835 if (current_frame_info
.reg_save_ar_lc
== 0)
1837 extra_spill_size
+= 8;
1842 /* If we have an odd number of words of pretend arguments written to
1843 the stack, then the FR save area will be unaligned. We round the
1844 size of this area up to keep things 16 byte aligned. */
1846 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1848 pretend_args_size
= current_function_pretend_args_size
;
1850 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1851 + current_function_outgoing_args_size
);
1852 total_size
= IA64_STACK_ALIGN (total_size
);
1854 /* We always use the 16-byte scratch area provided by the caller, but
1855 if we are a leaf function, there's no one to which we need to provide
1857 if (current_function_is_leaf
)
1858 total_size
= MAX (0, total_size
- 16);
1860 current_frame_info
.total_size
= total_size
;
1861 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1862 current_frame_info
.spill_size
= spill_size
;
1863 current_frame_info
.extra_spill_size
= extra_spill_size
;
1864 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1865 current_frame_info
.n_spilled
= n_spilled
;
1866 current_frame_info
.initialized
= reload_completed
;
1869 /* Compute the initial difference between the specified pair of registers. */
1872 ia64_initial_elimination_offset (from
, to
)
1875 HOST_WIDE_INT offset
;
1877 ia64_compute_frame_size (get_frame_size ());
1880 case FRAME_POINTER_REGNUM
:
1881 if (to
== HARD_FRAME_POINTER_REGNUM
)
1883 if (current_function_is_leaf
)
1884 offset
= -current_frame_info
.total_size
;
1886 offset
= -(current_frame_info
.total_size
1887 - current_function_outgoing_args_size
- 16);
1889 else if (to
== STACK_POINTER_REGNUM
)
1891 if (current_function_is_leaf
)
1894 offset
= 16 + current_function_outgoing_args_size
;
1900 case ARG_POINTER_REGNUM
:
1901 /* Arguments start above the 16 byte save area, unless stdarg
1902 in which case we store through the 16 byte save area. */
1903 if (to
== HARD_FRAME_POINTER_REGNUM
)
1904 offset
= 16 - current_function_pretend_args_size
;
1905 else if (to
== STACK_POINTER_REGNUM
)
1906 offset
= (current_frame_info
.total_size
1907 + 16 - current_function_pretend_args_size
);
1912 case RETURN_ADDRESS_POINTER_REGNUM
:
1923 /* If there are more than a trivial number of register spills, we use
1924 two interleaved iterators so that we can get two memory references
1927 In order to simplify things in the prologue and epilogue expanders,
1928 we use helper functions to fix up the memory references after the
1929 fact with the appropriate offsets to a POST_MODIFY memory mode.
1930 The following data structure tracks the state of the two iterators
1931 while insns are being emitted. */
1933 struct spill_fill_data
1935 rtx init_after
; /* point at which to emit initializations */
1936 rtx init_reg
[2]; /* initial base register */
1937 rtx iter_reg
[2]; /* the iterator registers */
1938 rtx
*prev_addr
[2]; /* address of last memory use */
1939 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1940 HOST_WIDE_INT prev_off
[2]; /* last offset */
1941 int n_iter
; /* number of iterators in use */
1942 int next_iter
; /* next iterator to use */
1943 unsigned int save_gr_used_mask
;
1946 static struct spill_fill_data spill_fill_data
;
1949 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1952 HOST_WIDE_INT cfa_off
;
1956 spill_fill_data
.init_after
= get_last_insn ();
1957 spill_fill_data
.init_reg
[0] = init_reg
;
1958 spill_fill_data
.init_reg
[1] = init_reg
;
1959 spill_fill_data
.prev_addr
[0] = NULL
;
1960 spill_fill_data
.prev_addr
[1] = NULL
;
1961 spill_fill_data
.prev_insn
[0] = NULL
;
1962 spill_fill_data
.prev_insn
[1] = NULL
;
1963 spill_fill_data
.prev_off
[0] = cfa_off
;
1964 spill_fill_data
.prev_off
[1] = cfa_off
;
1965 spill_fill_data
.next_iter
= 0;
1966 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1968 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1969 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1971 int regno
= next_scratch_gr_reg ();
1972 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1973 current_frame_info
.gr_used_mask
|= 1 << regno
;
1978 finish_spill_pointers ()
1980 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1984 spill_restore_mem (reg
, cfa_off
)
1986 HOST_WIDE_INT cfa_off
;
1988 int iter
= spill_fill_data
.next_iter
;
1989 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1990 rtx disp_rtx
= GEN_INT (disp
);
1993 if (spill_fill_data
.prev_addr
[iter
])
1995 if (CONST_OK_FOR_N (disp
))
1997 *spill_fill_data
.prev_addr
[iter
]
1998 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1999 gen_rtx_PLUS (DImode
,
2000 spill_fill_data
.iter_reg
[iter
],
2002 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
2003 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
2004 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
2008 /* ??? Could use register post_modify for loads. */
2009 if (! CONST_OK_FOR_I (disp
))
2011 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2012 emit_move_insn (tmp
, disp_rtx
);
2015 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2016 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
2019 /* Micro-optimization: if we've created a frame pointer, it's at
2020 CFA 0, which may allow the real iterator to be initialized lower,
2021 slightly increasing parallelism. Also, if there are few saves
2022 it may eliminate the iterator entirely. */
2024 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
2025 && frame_pointer_needed
)
2027 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
2028 set_mem_alias_set (mem
, get_varargs_alias_set ());
2036 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
2037 spill_fill_data
.init_reg
[iter
]);
2042 if (! CONST_OK_FOR_I (disp
))
2044 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2045 emit_move_insn (tmp
, disp_rtx
);
2049 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2050 spill_fill_data
.init_reg
[iter
],
2053 seq
= gen_sequence ();
2057 /* Careful for being the first insn in a sequence. */
2058 if (spill_fill_data
.init_after
)
2059 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
2062 rtx first
= get_insns ();
2064 insn
= emit_insn_before (seq
, first
);
2066 insn
= emit_insn (seq
);
2068 spill_fill_data
.init_after
= insn
;
2070 /* If DISP is 0, we may or may not have a further adjustment
2071 afterward. If we do, then the load/store insn may be modified
2072 to be a post-modify. If we don't, then this copy may be
2073 eliminated by copyprop_hardreg_forward, which makes this
2074 insn garbage, which runs afoul of the sanity check in
2075 propagate_one_insn. So mark this insn as legal to delete. */
2077 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
2081 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
2083 /* ??? Not all of the spills are for varargs, but some of them are.
2084 The rest of the spills belong in an alias set of their own. But
2085 it doesn't actually hurt to include them here. */
2086 set_mem_alias_set (mem
, get_varargs_alias_set ());
2088 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
2089 spill_fill_data
.prev_off
[iter
] = cfa_off
;
2091 if (++iter
>= spill_fill_data
.n_iter
)
2093 spill_fill_data
.next_iter
= iter
;
2099 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
2100 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
2102 HOST_WIDE_INT cfa_off
;
2104 int iter
= spill_fill_data
.next_iter
;
2107 mem
= spill_restore_mem (reg
, cfa_off
);
2108 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
2109 spill_fill_data
.prev_insn
[iter
] = insn
;
2116 RTX_FRAME_RELATED_P (insn
) = 1;
2118 /* Don't even pretend that the unwind code can intuit its way
2119 through a pair of interleaved post_modify iterators. Just
2120 provide the correct answer. */
2122 if (frame_pointer_needed
)
2124 base
= hard_frame_pointer_rtx
;
2129 base
= stack_pointer_rtx
;
2130 off
= current_frame_info
.total_size
- cfa_off
;
2134 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2135 gen_rtx_SET (VOIDmode
,
2136 gen_rtx_MEM (GET_MODE (reg
),
2137 plus_constant (base
, off
)),
2144 do_restore (move_fn
, reg
, cfa_off
)
2145 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
2147 HOST_WIDE_INT cfa_off
;
2149 int iter
= spill_fill_data
.next_iter
;
2152 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
2153 GEN_INT (cfa_off
)));
2154 spill_fill_data
.prev_insn
[iter
] = insn
;
2157 /* Wrapper functions that discards the CONST_INT spill offset. These
2158 exist so that we can give gr_spill/gr_fill the offset they need and
2159 use a consistant function interface. */
2162 gen_movdi_x (dest
, src
, offset
)
2164 rtx offset ATTRIBUTE_UNUSED
;
2166 return gen_movdi (dest
, src
);
2170 gen_fr_spill_x (dest
, src
, offset
)
2172 rtx offset ATTRIBUTE_UNUSED
;
2174 return gen_fr_spill (dest
, src
);
2178 gen_fr_restore_x (dest
, src
, offset
)
2180 rtx offset ATTRIBUTE_UNUSED
;
2182 return gen_fr_restore (dest
, src
);
2185 /* Called after register allocation to add any instructions needed for the
2186 prologue. Using a prologue insn is favored compared to putting all of the
2187 instructions in output_function_prologue(), since it allows the scheduler
2188 to intermix instructions with the saves of the caller saved registers. In
2189 some cases, it might be necessary to emit a barrier instruction as the last
2190 insn to prevent such scheduling.
2192 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2193 so that the debug info generation code can handle them properly.
2195 The register save area is layed out like so:
2197 [ varargs spill area ]
2198 [ fr register spill area ]
2199 [ br register spill area ]
2200 [ ar register spill area ]
2201 [ pr register spill area ]
2202 [ gr register spill area ] */
2204 /* ??? Get inefficient code when the frame size is larger than can fit in an
2205 adds instruction. */
2208 ia64_expand_prologue ()
2210 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2211 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2214 ia64_compute_frame_size (get_frame_size ());
2215 last_scratch_gr_reg
= 15;
2217 /* If there is no epilogue, then we don't need some prologue insns.
2218 We need to avoid emitting the dead prologue insns, because flow
2219 will complain about them. */
2224 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
2225 if ((e
->flags
& EDGE_FAKE
) == 0
2226 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2228 epilogue_p
= (e
!= NULL
);
2233 /* Set the local, input, and output register names. We need to do this
2234 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2235 half. If we use in/loc/out register names, then we get assembler errors
2236 in crtn.S because there is no alloc insn or regstk directive in there. */
2237 if (! TARGET_REG_NAMES
)
2239 int inputs
= current_frame_info
.n_input_regs
;
2240 int locals
= current_frame_info
.n_local_regs
;
2241 int outputs
= current_frame_info
.n_output_regs
;
2243 for (i
= 0; i
< inputs
; i
++)
2244 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2245 for (i
= 0; i
< locals
; i
++)
2246 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2247 for (i
= 0; i
< outputs
; i
++)
2248 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2251 /* Set the frame pointer register name. The regnum is logically loc79,
2252 but of course we'll not have allocated that many locals. Rather than
2253 worrying about renumbering the existing rtxs, we adjust the name. */
2254 /* ??? This code means that we can never use one local register when
2255 there is a frame pointer. loc79 gets wasted in this case, as it is
2256 renamed to a register that will never be used. See also the try_locals
2257 code in find_gr_spill. */
2258 if (current_frame_info
.reg_fp
)
2260 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2261 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2262 = reg_names
[current_frame_info
.reg_fp
];
2263 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2266 /* Fix up the return address placeholder. */
2267 /* ??? We can fail if __builtin_return_address is used, and we didn't
2268 allocate a register in which to save b0. I can't think of a way to
2269 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2270 then be sure that I got the right one. Further, reload doesn't seem
2271 to care if an eliminable register isn't used, and "eliminates" it
2273 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
2274 && current_frame_info
.reg_save_b0
!= 0)
2275 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
2277 /* We don't need an alloc instruction if we've used no outputs or locals. */
2278 if (current_frame_info
.n_local_regs
== 0
2279 && current_frame_info
.n_output_regs
== 0
2280 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
)
2282 /* If there is no alloc, but there are input registers used, then we
2283 need a .regstk directive. */
2284 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2285 ar_pfs_save_reg
= NULL_RTX
;
2289 current_frame_info
.need_regstk
= 0;
2291 if (current_frame_info
.reg_save_ar_pfs
)
2292 regno
= current_frame_info
.reg_save_ar_pfs
;
2294 regno
= next_scratch_gr_reg ();
2295 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2297 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2298 GEN_INT (current_frame_info
.n_input_regs
),
2299 GEN_INT (current_frame_info
.n_local_regs
),
2300 GEN_INT (current_frame_info
.n_output_regs
),
2301 GEN_INT (current_frame_info
.n_rotate_regs
)));
2302 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2305 /* Set up frame pointer, stack pointer, and spill iterators. */
2307 n_varargs
= cfun
->machine
->n_varargs
;
2308 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2309 stack_pointer_rtx
, 0);
2311 if (frame_pointer_needed
)
2313 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2314 RTX_FRAME_RELATED_P (insn
) = 1;
2317 if (current_frame_info
.total_size
!= 0)
2319 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2322 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2323 offset
= frame_size_rtx
;
2326 regno
= next_scratch_gr_reg ();
2327 offset
= gen_rtx_REG (DImode
, regno
);
2328 emit_move_insn (offset
, frame_size_rtx
);
2331 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2332 stack_pointer_rtx
, offset
));
2334 if (! frame_pointer_needed
)
2336 RTX_FRAME_RELATED_P (insn
) = 1;
2337 if (GET_CODE (offset
) != CONST_INT
)
2340 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2341 gen_rtx_SET (VOIDmode
,
2343 gen_rtx_PLUS (DImode
,
2350 /* ??? At this point we must generate a magic insn that appears to
2351 modify the stack pointer, the frame pointer, and all spill
2352 iterators. This would allow the most scheduling freedom. For
2353 now, just hard stop. */
2354 emit_insn (gen_blockage ());
2357 /* Must copy out ar.unat before doing any integer spills. */
2358 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2360 if (current_frame_info
.reg_save_ar_unat
)
2362 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2365 alt_regno
= next_scratch_gr_reg ();
2366 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2367 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2370 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2371 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2372 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2374 /* Even if we're not going to generate an epilogue, we still
2375 need to save the register so that EH works. */
2376 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2377 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2380 ar_unat_save_reg
= NULL_RTX
;
2382 /* Spill all varargs registers. Do this before spilling any GR registers,
2383 since we want the UNAT bits for the GR registers to override the UNAT
2384 bits from varargs, which we don't care about. */
2387 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2389 reg
= gen_rtx_REG (DImode
, regno
);
2390 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2393 /* Locate the bottom of the register save area. */
2394 cfa_off
= (current_frame_info
.spill_cfa_off
2395 + current_frame_info
.spill_size
2396 + current_frame_info
.extra_spill_size
);
2398 /* Save the predicate register block either in a register or in memory. */
2399 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2401 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2402 if (current_frame_info
.reg_save_pr
!= 0)
2404 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2405 insn
= emit_move_insn (alt_reg
, reg
);
2407 /* ??? Denote pr spill/fill by a DImode move that modifies all
2408 64 hard registers. */
2409 RTX_FRAME_RELATED_P (insn
) = 1;
2411 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2412 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2415 /* Even if we're not going to generate an epilogue, we still
2416 need to save the register so that EH works. */
2418 emit_insn (gen_prologue_use (alt_reg
));
2422 alt_regno
= next_scratch_gr_reg ();
2423 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2424 insn
= emit_move_insn (alt_reg
, reg
);
2425 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2430 /* Handle AR regs in numerical order. All of them get special handling. */
2431 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2432 && current_frame_info
.reg_save_ar_unat
== 0)
2434 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2435 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2439 /* The alloc insn already copied ar.pfs into a general register. The
2440 only thing we have to do now is copy that register to a stack slot
2441 if we'd not allocated a local register for the job. */
2442 if (current_frame_info
.reg_save_ar_pfs
== 0
2443 && ! current_function_is_leaf
)
2445 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2446 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2450 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2452 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2453 if (current_frame_info
.reg_save_ar_lc
!= 0)
2455 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2456 insn
= emit_move_insn (alt_reg
, reg
);
2457 RTX_FRAME_RELATED_P (insn
) = 1;
2459 /* Even if we're not going to generate an epilogue, we still
2460 need to save the register so that EH works. */
2462 emit_insn (gen_prologue_use (alt_reg
));
2466 alt_regno
= next_scratch_gr_reg ();
2467 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2468 emit_move_insn (alt_reg
, reg
);
2469 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2474 /* We should now be at the base of the gr/br/fr spill area. */
2475 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2476 + current_frame_info
.spill_size
))
2479 /* Spill all general registers. */
2480 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2481 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2483 reg
= gen_rtx_REG (DImode
, regno
);
2484 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2488 /* Handle BR0 specially -- it may be getting stored permanently in
2489 some GR register. */
2490 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2492 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2493 if (current_frame_info
.reg_save_b0
!= 0)
2495 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2496 insn
= emit_move_insn (alt_reg
, reg
);
2497 RTX_FRAME_RELATED_P (insn
) = 1;
2499 /* Even if we're not going to generate an epilogue, we still
2500 need to save the register so that EH works. */
2502 emit_insn (gen_prologue_use (alt_reg
));
2506 alt_regno
= next_scratch_gr_reg ();
2507 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2508 emit_move_insn (alt_reg
, reg
);
2509 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2514 /* Spill the rest of the BR registers. */
2515 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2516 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2518 alt_regno
= next_scratch_gr_reg ();
2519 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2520 reg
= gen_rtx_REG (DImode
, regno
);
2521 emit_move_insn (alt_reg
, reg
);
2522 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2526 /* Align the frame and spill all FR registers. */
2527 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2528 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2532 reg
= gen_rtx_REG (TFmode
, regno
);
2533 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2537 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2540 finish_spill_pointers ();
2543 /* Called after register allocation to add any instructions needed for the
2544 epilogue. Using an epilogue insn is favored compared to putting all of the
2545 instructions in output_function_prologue(), since it allows the scheduler
2546 to intermix instructions with the saves of the caller saved registers. In
2547 some cases, it might be necessary to emit a barrier instruction as the last
2548 insn to prevent such scheduling. */
2551 ia64_expand_epilogue (sibcall_p
)
2554 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2555 int regno
, alt_regno
, cfa_off
;
2557 ia64_compute_frame_size (get_frame_size ());
2559 /* If there is a frame pointer, then we use it instead of the stack
2560 pointer, so that the stack pointer does not need to be valid when
2561 the epilogue starts. See EXIT_IGNORE_STACK. */
2562 if (frame_pointer_needed
)
2563 setup_spill_pointers (current_frame_info
.n_spilled
,
2564 hard_frame_pointer_rtx
, 0);
2566 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2567 current_frame_info
.total_size
);
2569 if (current_frame_info
.total_size
!= 0)
2571 /* ??? At this point we must generate a magic insn that appears to
2572 modify the spill iterators and the frame pointer. This would
2573 allow the most scheduling freedom. For now, just hard stop. */
2574 emit_insn (gen_blockage ());
2577 /* Locate the bottom of the register save area. */
2578 cfa_off
= (current_frame_info
.spill_cfa_off
2579 + current_frame_info
.spill_size
2580 + current_frame_info
.extra_spill_size
);
2582 /* Restore the predicate registers. */
2583 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2585 if (current_frame_info
.reg_save_pr
!= 0)
2586 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2589 alt_regno
= next_scratch_gr_reg ();
2590 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2591 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2594 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2595 emit_move_insn (reg
, alt_reg
);
2598 /* Restore the application registers. */
2600 /* Load the saved unat from the stack, but do not restore it until
2601 after the GRs have been restored. */
2602 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2604 if (current_frame_info
.reg_save_ar_unat
!= 0)
2606 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2609 alt_regno
= next_scratch_gr_reg ();
2610 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2611 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2612 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2617 ar_unat_save_reg
= NULL_RTX
;
2619 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2621 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2622 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2623 emit_move_insn (reg
, alt_reg
);
2625 else if (! current_function_is_leaf
)
2627 alt_regno
= next_scratch_gr_reg ();
2628 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2629 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2631 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2632 emit_move_insn (reg
, alt_reg
);
2635 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2637 if (current_frame_info
.reg_save_ar_lc
!= 0)
2638 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2641 alt_regno
= next_scratch_gr_reg ();
2642 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2643 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2646 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2647 emit_move_insn (reg
, alt_reg
);
2650 /* We should now be at the base of the gr/br/fr spill area. */
2651 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2652 + current_frame_info
.spill_size
))
2655 /* Restore all general registers. */
2656 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2657 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2659 reg
= gen_rtx_REG (DImode
, regno
);
2660 do_restore (gen_gr_restore
, reg
, cfa_off
);
2664 /* Restore the branch registers. Handle B0 specially, as it may
2665 have gotten stored in some GR register. */
2666 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2668 if (current_frame_info
.reg_save_b0
!= 0)
2669 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2672 alt_regno
= next_scratch_gr_reg ();
2673 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2674 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2677 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2678 emit_move_insn (reg
, alt_reg
);
2681 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2682 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2684 alt_regno
= next_scratch_gr_reg ();
2685 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2686 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2688 reg
= gen_rtx_REG (DImode
, regno
);
2689 emit_move_insn (reg
, alt_reg
);
2692 /* Restore floating point registers. */
2693 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2694 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2698 reg
= gen_rtx_REG (TFmode
, regno
);
2699 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2703 /* Restore ar.unat for real. */
2704 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2706 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2707 emit_move_insn (reg
, ar_unat_save_reg
);
2710 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2713 finish_spill_pointers ();
2715 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2717 /* ??? At this point we must generate a magic insn that appears to
2718 modify the spill iterators, the stack pointer, and the frame
2719 pointer. This would allow the most scheduling freedom. For now,
2721 emit_insn (gen_blockage ());
2724 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2725 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2726 else if (frame_pointer_needed
)
2728 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2729 RTX_FRAME_RELATED_P (insn
) = 1;
2731 else if (current_frame_info
.total_size
)
2733 rtx offset
, frame_size_rtx
;
2735 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2736 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2737 offset
= frame_size_rtx
;
2740 regno
= next_scratch_gr_reg ();
2741 offset
= gen_rtx_REG (DImode
, regno
);
2742 emit_move_insn (offset
, frame_size_rtx
);
2745 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2748 RTX_FRAME_RELATED_P (insn
) = 1;
2749 if (GET_CODE (offset
) != CONST_INT
)
2752 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2753 gen_rtx_SET (VOIDmode
,
2755 gen_rtx_PLUS (DImode
,
2762 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2763 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2766 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2769 int fp
= GR_REG (2);
2770 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2771 first available call clobbered register. If there was a frame_pointer
2772 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2773 so we have to make sure we're using the string "r2" when emitting
2774 the register name for the assmbler. */
2775 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2776 fp
= HARD_FRAME_POINTER_REGNUM
;
2778 /* We must emit an alloc to force the input registers to become output
2779 registers. Otherwise, if the callee tries to pass its parameters
2780 through to another call without an intervening alloc, then these
2782 /* ??? We don't need to preserve all input registers. We only need to
2783 preserve those input registers used as arguments to the sibling call.
2784 It is unclear how to compute that number here. */
2785 if (current_frame_info
.n_input_regs
!= 0)
2786 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2787 GEN_INT (0), GEN_INT (0),
2788 GEN_INT (current_frame_info
.n_input_regs
),
2793 /* Return 1 if br.ret can do all the work required to return from a
2797 ia64_direct_return ()
2799 if (reload_completed
&& ! frame_pointer_needed
)
2801 ia64_compute_frame_size (get_frame_size ());
2803 return (current_frame_info
.total_size
== 0
2804 && current_frame_info
.n_spilled
== 0
2805 && current_frame_info
.reg_save_b0
== 0
2806 && current_frame_info
.reg_save_pr
== 0
2807 && current_frame_info
.reg_save_ar_pfs
== 0
2808 && current_frame_info
.reg_save_ar_unat
== 0
2809 && current_frame_info
.reg_save_ar_lc
== 0);
2815 ia64_hard_regno_rename_ok (from
, to
)
2819 /* Don't clobber any of the registers we reserved for the prologue. */
2820 if (to
== current_frame_info
.reg_fp
2821 || to
== current_frame_info
.reg_save_b0
2822 || to
== current_frame_info
.reg_save_pr
2823 || to
== current_frame_info
.reg_save_ar_pfs
2824 || to
== current_frame_info
.reg_save_ar_unat
2825 || to
== current_frame_info
.reg_save_ar_lc
)
2828 if (from
== current_frame_info
.reg_fp
2829 || from
== current_frame_info
.reg_save_b0
2830 || from
== current_frame_info
.reg_save_pr
2831 || from
== current_frame_info
.reg_save_ar_pfs
2832 || from
== current_frame_info
.reg_save_ar_unat
2833 || from
== current_frame_info
.reg_save_ar_lc
)
2836 /* Don't use output registers outside the register frame. */
2837 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2840 /* Retain even/oddness on predicate register pairs. */
2841 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2842 return (from
& 1) == (to
& 1);
2844 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2845 if (from
== GR_REG (4) && current_function_calls_setjmp
)
2851 /* Target hook for assembling integer objects. Handle word-sized
2852 aligned objects and detect the cases when @fptr is needed. */
2855 ia64_assemble_integer (x
, size
, aligned_p
)
2860 if (size
== UNITS_PER_WORD
&& aligned_p
2861 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2862 && GET_CODE (x
) == SYMBOL_REF
2863 && SYMBOL_REF_FLAG (x
))
2865 fputs ("\tdata8\t@fptr(", asm_out_file
);
2866 output_addr_const (asm_out_file
, x
);
2867 fputs (")\n", asm_out_file
);
2870 return default_assemble_integer (x
, size
, aligned_p
);
2873 /* Emit the function prologue. */
2876 ia64_output_function_prologue (file
, size
)
2878 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2880 int mask
, grsave
, grsave_prev
;
2882 if (current_frame_info
.need_regstk
)
2883 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2884 current_frame_info
.n_input_regs
,
2885 current_frame_info
.n_local_regs
,
2886 current_frame_info
.n_output_regs
,
2887 current_frame_info
.n_rotate_regs
);
2889 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2892 /* Emit the .prologue directive. */
2895 grsave
= grsave_prev
= 0;
2896 if (current_frame_info
.reg_save_b0
!= 0)
2899 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2901 if (current_frame_info
.reg_save_ar_pfs
!= 0
2902 && (grsave_prev
== 0
2903 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2906 if (grsave_prev
== 0)
2907 grsave
= current_frame_info
.reg_save_ar_pfs
;
2908 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2910 if (current_frame_info
.reg_fp
!= 0
2911 && (grsave_prev
== 0
2912 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2915 if (grsave_prev
== 0)
2916 grsave
= HARD_FRAME_POINTER_REGNUM
;
2917 grsave_prev
= current_frame_info
.reg_fp
;
2919 if (current_frame_info
.reg_save_pr
!= 0
2920 && (grsave_prev
== 0
2921 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2924 if (grsave_prev
== 0)
2925 grsave
= current_frame_info
.reg_save_pr
;
2929 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2930 ia64_dbx_register_number (grsave
));
2932 fputs ("\t.prologue\n", file
);
2934 /* Emit a .spill directive, if necessary, to relocate the base of
2935 the register spill area. */
2936 if (current_frame_info
.spill_cfa_off
!= -16)
2937 fprintf (file
, "\t.spill %ld\n",
2938 (long) (current_frame_info
.spill_cfa_off
2939 + current_frame_info
.spill_size
));
2942 /* Emit the .body directive at the scheduled end of the prologue. */
2945 ia64_output_function_end_prologue (file
)
2948 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2951 fputs ("\t.body\n", file
);
2954 /* Emit the function epilogue. */
2957 ia64_output_function_epilogue (file
, size
)
2958 FILE *file ATTRIBUTE_UNUSED
;
2959 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2963 /* Reset from the function's potential modifications. */
2964 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2966 if (current_frame_info
.reg_fp
)
2968 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2969 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2970 = reg_names
[current_frame_info
.reg_fp
];
2971 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2973 if (! TARGET_REG_NAMES
)
2975 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2976 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2977 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2978 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2979 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2980 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2983 current_frame_info
.initialized
= 0;
2987 ia64_dbx_register_number (regno
)
2990 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2991 from its home at loc79 to something inside the register frame. We
2992 must perform the same renumbering here for the debug info. */
2993 if (current_frame_info
.reg_fp
)
2995 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2996 regno
= current_frame_info
.reg_fp
;
2997 else if (regno
== current_frame_info
.reg_fp
)
2998 regno
= HARD_FRAME_POINTER_REGNUM
;
3001 if (IN_REGNO_P (regno
))
3002 return 32 + regno
- IN_REG (0);
3003 else if (LOC_REGNO_P (regno
))
3004 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
3005 else if (OUT_REGNO_P (regno
))
3006 return (32 + current_frame_info
.n_input_regs
3007 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
3013 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
3014 rtx addr
, fnaddr
, static_chain
;
3016 rtx addr_reg
, eight
= GEN_INT (8);
3018 /* Load up our iterator. */
3019 addr_reg
= gen_reg_rtx (Pmode
);
3020 emit_move_insn (addr_reg
, addr
);
3022 /* The first two words are the fake descriptor:
3023 __ia64_trampoline, ADDR+16. */
3024 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3025 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
3026 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3028 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3029 copy_to_reg (plus_constant (addr
, 16)));
3030 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3032 /* The third word is the target descriptor. */
3033 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
3034 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3036 /* The fourth word is the static chain. */
3037 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
3040 /* Do any needed setup for a variadic function. CUM has not been updated
3041 for the last named argument which has type TYPE and mode MODE.
3043 We generate the actual spill instructions during prologue generation. */
3046 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
3047 CUMULATIVE_ARGS cum
;
3051 int second_time ATTRIBUTE_UNUSED
;
3053 /* If this is a stdarg function, then skip the current argument. */
3054 if (! current_function_varargs
)
3055 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
3057 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
3059 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
3060 *pretend_size
= n
* UNITS_PER_WORD
;
3061 cfun
->machine
->n_varargs
= n
;
3065 /* Check whether TYPE is a homogeneous floating point aggregate. If
3066 it is, return the mode of the floating point type that appears
3067 in all leafs. If it is not, return VOIDmode.
3069 An aggregate is a homogeneous floating point aggregate is if all
3070 fields/elements in it have the same floating point type (e.g,
3071 SFmode). 128-bit quad-precision floats are excluded. */
3073 static enum machine_mode
3074 hfa_element_mode (type
, nested
)
3078 enum machine_mode element_mode
= VOIDmode
;
3079 enum machine_mode mode
;
3080 enum tree_code code
= TREE_CODE (type
);
3081 int know_element_mode
= 0;
3086 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
3087 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
3088 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
3089 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
3093 /* Fortran complex types are supposed to be HFAs, so we need to handle
3094 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3097 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
3098 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
3099 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
3104 /* ??? Should exclude 128-bit long double here. */
3105 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3106 mode if this is contained within an aggregate. */
3108 return TYPE_MODE (type
);
3113 return hfa_element_mode (TREE_TYPE (type
), 1);
3117 case QUAL_UNION_TYPE
:
3118 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3120 if (TREE_CODE (t
) != FIELD_DECL
)
3123 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3124 if (know_element_mode
)
3126 if (mode
!= element_mode
)
3129 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3133 know_element_mode
= 1;
3134 element_mode
= mode
;
3137 return element_mode
;
3140 /* If we reach here, we probably have some front-end specific type
3141 that the backend doesn't know about. This can happen via the
3142 aggregate_value_p call in init_function_start. All we can do is
3143 ignore unknown tree types. */
3150 /* Return rtx for register where argument is passed, or zero if it is passed
3153 /* ??? 128-bit quad-precision floats are always passed in general
3157 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
3158 CUMULATIVE_ARGS
*cum
;
3159 enum machine_mode mode
;
3164 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3165 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3166 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3169 enum machine_mode hfa_mode
= VOIDmode
;
3171 /* Integer and float arguments larger than 8 bytes start at the next even
3172 boundary. Aggregates larger than 8 bytes start at the next even boundary
3173 if the aggregate has 16 byte alignment. Net effect is that types with
3174 alignment greater than 8 start at the next even boundary. */
3175 /* ??? The ABI does not specify how to handle aggregates with alignment from
3176 9 to 15 bytes, or greater than 16. We handle them all as if they had
3177 16 byte alignment. Such aggregates can occur only if gcc extensions are
3179 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3181 && (cum
->words
& 1))
3184 /* If all argument slots are used, then it must go on the stack. */
3185 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3188 /* Check for and handle homogeneous FP aggregates. */
3190 hfa_mode
= hfa_element_mode (type
, 0);
3192 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3193 and unprototyped hfas are passed specially. */
3194 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3198 int fp_regs
= cum
->fp_regs
;
3199 int int_regs
= cum
->words
+ offset
;
3200 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3204 /* If prototyped, pass it in FR regs then GR regs.
3205 If not prototyped, pass it in both FR and GR regs.
3207 If this is an SFmode aggregate, then it is possible to run out of
3208 FR regs while GR regs are still left. In that case, we pass the
3209 remaining part in the GR regs. */
3211 /* Fill the FP regs. We do this always. We stop if we reach the end
3212 of the argument, the last FP register, or the last argument slot. */
3214 byte_size
= ((mode
== BLKmode
)
3215 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3216 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3218 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3219 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3221 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3222 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3226 args_byte_size
+= hfa_size
;
3230 /* If no prototype, then the whole thing must go in GR regs. */
3231 if (! cum
->prototype
)
3233 /* If this is an SFmode aggregate, then we might have some left over
3234 that needs to go in GR regs. */
3235 else if (byte_size
!= offset
)
3236 int_regs
+= offset
/ UNITS_PER_WORD
;
3238 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3240 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3242 enum machine_mode gr_mode
= DImode
;
3244 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3245 then this goes in a GR reg left adjusted/little endian, right
3246 adjusted/big endian. */
3247 /* ??? Currently this is handled wrong, because 4-byte hunks are
3248 always right adjusted/little endian. */
3251 /* If we have an even 4 byte hunk because the aggregate is a
3252 multiple of 4 bytes in size, then this goes in a GR reg right
3253 adjusted/little endian. */
3254 else if (byte_size
- offset
== 4)
3256 /* Complex floats need to have float mode. */
3257 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3260 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3261 gen_rtx_REG (gr_mode
, (basereg
3264 offset
+= GET_MODE_SIZE (gr_mode
);
3265 int_regs
+= GET_MODE_SIZE (gr_mode
) <= UNITS_PER_WORD
3266 ? 1 : GET_MODE_SIZE (gr_mode
) / UNITS_PER_WORD
;
3269 /* If we ended up using just one location, just return that one loc. */
3271 return XEXP (loc
[0], 0);
3273 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3276 /* Integral and aggregates go in general registers. If we have run out of
3277 FR registers, then FP values must also go in general registers. This can
3278 happen when we have a SFmode HFA. */
3279 else if (((mode
== TFmode
) && ! INTEL_EXTENDED_IEEE_FORMAT
)
3280 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3281 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3283 /* If there is a prototype, then FP values go in a FR register when
3284 named, and in a GR registeer when unnamed. */
3285 else if (cum
->prototype
)
3288 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3290 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3292 /* If there is no prototype, then FP values go in both FR and GR
3296 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3297 gen_rtx_REG (mode
, (FR_ARG_FIRST
3300 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3302 (basereg
+ cum
->words
3306 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3310 /* Return number of words, at the beginning of the argument, that must be
3311 put in registers. 0 is the argument is entirely in registers or entirely
3315 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
3316 CUMULATIVE_ARGS
*cum
;
3317 enum machine_mode mode
;
3319 int named ATTRIBUTE_UNUSED
;
3321 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3322 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3326 /* Arguments with alignment larger than 8 bytes start at the next even
3328 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3330 && (cum
->words
& 1))
3333 /* If all argument slots are used, then it must go on the stack. */
3334 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3337 /* It doesn't matter whether the argument goes in FR or GR regs. If
3338 it fits within the 8 argument slots, then it goes entirely in
3339 registers. If it extends past the last argument slot, then the rest
3340 goes on the stack. */
3342 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3345 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3348 /* Update CUM to point after this argument. This is patterned after
3349 ia64_function_arg. */
3352 ia64_function_arg_advance (cum
, mode
, type
, named
)
3353 CUMULATIVE_ARGS
*cum
;
3354 enum machine_mode mode
;
3358 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3359 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3362 enum machine_mode hfa_mode
= VOIDmode
;
3364 /* If all arg slots are already full, then there is nothing to do. */
3365 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3368 /* Arguments with alignment larger than 8 bytes start at the next even
3370 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3372 && (cum
->words
& 1))
3375 cum
->words
+= words
+ offset
;
3377 /* Check for and handle homogeneous FP aggregates. */
3379 hfa_mode
= hfa_element_mode (type
, 0);
3381 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3382 and unprototyped hfas are passed specially. */
3383 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3385 int fp_regs
= cum
->fp_regs
;
3386 /* This is the original value of cum->words + offset. */
3387 int int_regs
= cum
->words
- words
;
3388 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3392 /* If prototyped, pass it in FR regs then GR regs.
3393 If not prototyped, pass it in both FR and GR regs.
3395 If this is an SFmode aggregate, then it is possible to run out of
3396 FR regs while GR regs are still left. In that case, we pass the
3397 remaining part in the GR regs. */
3399 /* Fill the FP regs. We do this always. We stop if we reach the end
3400 of the argument, the last FP register, or the last argument slot. */
3402 byte_size
= ((mode
== BLKmode
)
3403 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3404 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3406 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3407 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3410 args_byte_size
+= hfa_size
;
3414 cum
->fp_regs
= fp_regs
;
3417 /* Integral and aggregates go in general registers. If we have run out of
3418 FR registers, then FP values must also go in general registers. This can
3419 happen when we have a SFmode HFA. */
3420 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3421 cum
->int_regs
= cum
->words
;
3423 /* If there is a prototype, then FP values go in a FR register when
3424 named, and in a GR registeer when unnamed. */
3425 else if (cum
->prototype
)
3428 cum
->int_regs
= cum
->words
;
3430 /* ??? Complex types should not reach here. */
3431 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3433 /* If there is no prototype, then FP values go in both FR and GR
3437 /* ??? Complex types should not reach here. */
3438 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3439 cum
->int_regs
= cum
->words
;
3443 /* Variable sized types are passed by reference. */
3444 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3447 ia64_function_arg_pass_by_reference (cum
, mode
, type
, named
)
3448 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
3449 enum machine_mode mode ATTRIBUTE_UNUSED
;
3451 int named ATTRIBUTE_UNUSED
;
3453 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3456 /* Implement va_start. */
3459 ia64_va_start (stdarg_p
, valist
, nextarg
)
3467 arg_words
= current_function_args_info
.words
;
3472 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3474 nextarg
= plus_constant (nextarg
, ofs
);
3475 std_expand_builtin_va_start (1, valist
, nextarg
);
3478 /* Implement va_arg. */
3481 ia64_va_arg (valist
, type
)
3486 /* Variable sized types are passed by reference. */
3487 if (TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
3489 rtx addr
= std_expand_builtin_va_arg (valist
, build_pointer_type (type
));
3490 return gen_rtx_MEM (ptr_mode
, force_reg (Pmode
, addr
));
3493 /* Arguments with alignment larger than 8 bytes start at the next even
3495 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3497 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3498 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3499 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3500 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3501 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3502 TREE_SIDE_EFFECTS (t
) = 1;
3503 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3506 return std_expand_builtin_va_arg (valist
, type
);
3509 /* Return 1 if function return value returned in memory. Return 0 if it is
3513 ia64_return_in_memory (valtype
)
3516 enum machine_mode mode
;
3517 enum machine_mode hfa_mode
;
3518 HOST_WIDE_INT byte_size
;
3520 mode
= TYPE_MODE (valtype
);
3521 byte_size
= GET_MODE_SIZE (mode
);
3522 if (mode
== BLKmode
)
3524 byte_size
= int_size_in_bytes (valtype
);
3529 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3531 hfa_mode
= hfa_element_mode (valtype
, 0);
3532 if (hfa_mode
!= VOIDmode
)
3534 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3536 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3541 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3547 /* Return rtx for register that holds the function return value. */
3550 ia64_function_value (valtype
, func
)
3552 tree func ATTRIBUTE_UNUSED
;
3554 enum machine_mode mode
;
3555 enum machine_mode hfa_mode
;
3557 mode
= TYPE_MODE (valtype
);
3558 hfa_mode
= hfa_element_mode (valtype
, 0);
3560 if (hfa_mode
!= VOIDmode
)
3568 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3569 byte_size
= ((mode
== BLKmode
)
3570 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3572 for (i
= 0; offset
< byte_size
; i
++)
3574 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3575 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3581 return XEXP (loc
[0], 0);
3583 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3585 else if (FLOAT_TYPE_P (valtype
) &&
3586 ((mode
!= TFmode
) || INTEL_EXTENDED_IEEE_FORMAT
))
3587 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3589 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3592 /* Print a memory address as an operand to reference that memory location. */
3594 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3595 also call this from ia64_print_operand for memory addresses. */
3598 ia64_print_operand_address (stream
, address
)
3599 FILE * stream ATTRIBUTE_UNUSED
;
3600 rtx address ATTRIBUTE_UNUSED
;
3604 /* Print an operand to an assembler instruction.
3605 C Swap and print a comparison operator.
3606 D Print an FP comparison operator.
3607 E Print 32 - constant, for SImode shifts as extract.
3608 e Print 64 - constant, for DImode rotates.
3609 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3610 a floating point register emitted normally.
3611 I Invert a predicate register by adding 1.
3612 J Select the proper predicate register for a condition.
3613 j Select the inverse predicate register for a condition.
3614 O Append .acq for volatile load.
3615 P Postincrement of a MEM.
3616 Q Append .rel for volatile store.
3617 S Shift amount for shladd instruction.
3618 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3619 for Intel assembler.
3620 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3621 for Intel assembler.
3622 r Print register name, or constant 0 as r0. HP compatibility for
3625 ia64_print_operand (file
, x
, code
)
3635 /* Handled below. */
3640 enum rtx_code c
= swap_condition (GET_CODE (x
));
3641 fputs (GET_RTX_NAME (c
), file
);
3646 switch (GET_CODE (x
))
3658 str
= GET_RTX_NAME (GET_CODE (x
));
3665 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3669 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3673 if (x
== CONST0_RTX (GET_MODE (x
)))
3674 str
= reg_names
[FR_REG (0)];
3675 else if (x
== CONST1_RTX (GET_MODE (x
)))
3676 str
= reg_names
[FR_REG (1)];
3677 else if (GET_CODE (x
) == REG
)
3678 str
= reg_names
[REGNO (x
)];
3685 fputs (reg_names
[REGNO (x
) + 1], file
);
3691 unsigned int regno
= REGNO (XEXP (x
, 0));
3692 if (GET_CODE (x
) == EQ
)
3696 fputs (reg_names
[regno
], file
);
3701 if (MEM_VOLATILE_P (x
))
3702 fputs(".acq", file
);
3707 HOST_WIDE_INT value
;
3709 switch (GET_CODE (XEXP (x
, 0)))
3715 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3716 if (GET_CODE (x
) == CONST_INT
)
3718 else if (GET_CODE (x
) == REG
)
3720 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3728 value
= GET_MODE_SIZE (GET_MODE (x
));
3732 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3738 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3743 if (MEM_VOLATILE_P (x
))
3744 fputs(".rel", file
);
3748 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3752 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3754 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3760 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3762 const char *prefix
= "0x";
3763 if (INTVAL (x
) & 0x80000000)
3765 fprintf (file
, "0xffffffff");
3768 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3774 /* If this operand is the constant zero, write it as register zero.
3775 Any register, zero, or CONST_INT value is OK here. */
3776 if (GET_CODE (x
) == REG
)
3777 fputs (reg_names
[REGNO (x
)], file
);
3778 else if (x
== CONST0_RTX (GET_MODE (x
)))
3780 else if (GET_CODE (x
) == CONST_INT
)
3781 output_addr_const (file
, x
);
3783 output_operand_lossage ("invalid %%r value");
3790 /* For conditional branches, returns or calls, substitute
3791 sptk, dptk, dpnt, or spnt for %s. */
3792 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3795 int pred_val
= INTVAL (XEXP (x
, 0));
3797 /* Guess top and bottom 10% statically predicted. */
3798 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3800 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3802 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3807 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3812 fputs (which
, file
);
3817 x
= current_insn_predicate
;
3820 unsigned int regno
= REGNO (XEXP (x
, 0));
3821 if (GET_CODE (x
) == EQ
)
3823 fprintf (file
, "(%s) ", reg_names
[regno
]);
3828 output_operand_lossage ("ia64_print_operand: unknown code");
3832 switch (GET_CODE (x
))
3834 /* This happens for the spill/restore instructions. */
3839 /* ... fall through ... */
3842 fputs (reg_names
[REGNO (x
)], file
);
3847 rtx addr
= XEXP (x
, 0);
3848 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3849 addr
= XEXP (addr
, 0);
3850 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3855 output_addr_const (file
, x
);
3862 /* Calulate the cost of moving data from a register in class FROM to
3863 one in class TO, using MODE. */
3866 ia64_register_move_cost (mode
, from
, to
)
3867 enum machine_mode mode
;
3868 enum reg_class from
, to
;
3870 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3871 if (to
== ADDL_REGS
)
3873 if (from
== ADDL_REGS
)
3876 /* All costs are symmetric, so reduce cases by putting the
3877 lower number class as the destination. */
3880 enum reg_class tmp
= to
;
3881 to
= from
, from
= tmp
;
3884 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3885 so that we get secondary memory reloads. Between FR_REGS,
3886 we have to make this at least as expensive as MEMORY_MOVE_COST
3887 to avoid spectacularly poor register class preferencing. */
3890 if (to
!= GR_REGS
|| from
!= GR_REGS
)
3891 return MEMORY_MOVE_COST (mode
, to
, 0);
3899 /* Moving between PR registers takes two insns. */
3900 if (from
== PR_REGS
)
3902 /* Moving between PR and anything but GR is impossible. */
3903 if (from
!= GR_REGS
)
3904 return MEMORY_MOVE_COST (mode
, to
, 0);
3908 /* Moving between BR and anything but GR is impossible. */
3909 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
3910 return MEMORY_MOVE_COST (mode
, to
, 0);
3915 /* Moving between AR and anything but GR is impossible. */
3916 if (from
!= GR_REGS
)
3917 return MEMORY_MOVE_COST (mode
, to
, 0);
3922 case GR_AND_FR_REGS
:
3923 case GR_AND_BR_REGS
:
3934 /* This function returns the register class required for a secondary
3935 register when copying between one of the registers in CLASS, and X,
3936 using MODE. A return value of NO_REGS means that no secondary register
3940 ia64_secondary_reload_class (class, mode
, x
)
3941 enum reg_class
class;
3942 enum machine_mode mode ATTRIBUTE_UNUSED
;
3947 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3948 regno
= true_regnum (x
);
3955 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3956 interaction. We end up with two pseudos with overlapping lifetimes
3957 both of which are equiv to the same constant, and both which need
3958 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3959 changes depending on the path length, which means the qty_first_reg
3960 check in make_regs_eqv can give different answers at different times.
3961 At some point I'll probably need a reload_indi pattern to handle
3964 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3965 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3966 non-general registers for good measure. */
3967 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
3970 /* This is needed if a pseudo used as a call_operand gets spilled to a
3972 if (GET_CODE (x
) == MEM
)
3977 /* Need to go through general regsters to get to other class regs. */
3978 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
3981 /* This can happen when a paradoxical subreg is an operand to the
3983 /* ??? This shouldn't be necessary after instruction scheduling is
3984 enabled, because paradoxical subregs are not accepted by
3985 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3986 stop the paradoxical subreg stupidity in the *_operand functions
3988 if (GET_CODE (x
) == MEM
3989 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3990 || GET_MODE (x
) == QImode
))
3993 /* This can happen because of the ior/and/etc patterns that accept FP
3994 registers as operands. If the third operand is a constant, then it
3995 needs to be reloaded into a FP register. */
3996 if (GET_CODE (x
) == CONST_INT
)
3999 /* This can happen because of register elimination in a muldi3 insn.
4000 E.g. `26107 * (unsigned long)&u'. */
4001 if (GET_CODE (x
) == PLUS
)
4006 /* ??? This happens if we cse/gcse a BImode value across a call,
4007 and the function has a nonlocal goto. This is because global
4008 does not allocate call crossing pseudos to hard registers when
4009 current_function_has_nonlocal_goto is true. This is relatively
4010 common for C++ programs that use exceptions. To reproduce,
4011 return NO_REGS and compile libstdc++. */
4012 if (GET_CODE (x
) == MEM
)
4015 /* This can happen when we take a BImode subreg of a DImode value,
4016 and that DImode value winds up in some non-GR register. */
4017 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4022 /* Since we have no offsettable memory addresses, we need a temporary
4023 to hold the address of the second word. */
4036 /* Emit text to declare externally defined variables and functions, because
4037 the Intel assembler does not support undefined externals. */
4040 ia64_asm_output_external (file
, decl
, name
)
4045 int save_referenced
;
4047 /* GNU as does not need anything here. */
4051 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4052 the linker when we do this, so we need to be careful not to do this for
4053 builtin functions which have no library equivalent. Unfortunately, we
4054 can't tell here whether or not a function will actually be called by
4055 expand_expr, so we pull in library functions even if we may not need
4057 if (! strcmp (name
, "__builtin_next_arg")
4058 || ! strcmp (name
, "alloca")
4059 || ! strcmp (name
, "__builtin_constant_p")
4060 || ! strcmp (name
, "__builtin_args_info"))
4063 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4065 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4066 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4068 fprintf (file
, "%s", TYPE_ASM_OP
);
4069 assemble_name (file
, name
);
4071 fprintf (file
, TYPE_OPERAND_FMT
, "function");
4074 ASM_GLOBALIZE_LABEL (file
, name
);
4075 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4078 /* Parse the -mfixed-range= option string. */
4081 fix_range (const_str
)
4082 const char *const_str
;
4085 char *str
, *dash
, *comma
;
4087 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4088 REG2 are either register names or register numbers. The effect
4089 of this option is to mark the registers in the range from REG1 to
4090 REG2 as ``fixed'' so they won't be used by the compiler. This is
4091 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4093 i
= strlen (const_str
);
4094 str
= (char *) alloca (i
+ 1);
4095 memcpy (str
, const_str
, i
+ 1);
4099 dash
= strchr (str
, '-');
4102 warning ("value of -mfixed-range must have form REG1-REG2");
4107 comma
= strchr (dash
+ 1, ',');
4111 first
= decode_reg_name (str
);
4114 warning ("unknown register name: %s", str
);
4118 last
= decode_reg_name (dash
+ 1);
4121 warning ("unknown register name: %s", dash
+ 1);
4129 warning ("%s-%s is an empty range", str
, dash
+ 1);
4133 for (i
= first
; i
<= last
; ++i
)
4134 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4144 static struct machine_function
*
4145 ia64_init_machine_status ()
4147 return ggc_alloc_cleared (sizeof (struct machine_function
));
4150 /* Handle TARGET_OPTIONS switches. */
4153 ia64_override_options ()
4155 if (TARGET_AUTO_PIC
)
4156 target_flags
|= MASK_CONST_GP
;
4158 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
4160 warning ("cannot optimize division for both latency and throughput");
4161 target_flags
&= ~MASK_INLINE_DIV_THR
;
4164 if (ia64_fixed_range_string
)
4165 fix_range (ia64_fixed_range_string
);
4167 if (ia64_tls_size_string
)
4170 unsigned long tmp
= strtoul (ia64_tls_size_string
, &end
, 10);
4171 if (*end
|| (tmp
!= 14 && tmp
!= 22 && tmp
!= 64))
4172 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string
);
4174 ia64_tls_size
= tmp
;
4177 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4178 flag_schedule_insns_after_reload
= 0;
4180 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4182 init_machine_status
= ia64_init_machine_status
;
4185 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
4186 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
4187 static enum attr_type ia64_safe_type
PARAMS((rtx
));
4189 static enum attr_itanium_requires_unit0
4190 ia64_safe_itanium_requires_unit0 (insn
)
4193 if (recog_memoized (insn
) >= 0)
4194 return get_attr_itanium_requires_unit0 (insn
);
4196 return ITANIUM_REQUIRES_UNIT0_NO
;
4199 static enum attr_itanium_class
4200 ia64_safe_itanium_class (insn
)
4203 if (recog_memoized (insn
) >= 0)
4204 return get_attr_itanium_class (insn
);
4206 return ITANIUM_CLASS_UNKNOWN
;
4209 static enum attr_type
4210 ia64_safe_type (insn
)
4213 if (recog_memoized (insn
) >= 0)
4214 return get_attr_type (insn
);
4216 return TYPE_UNKNOWN
;
4219 /* The following collection of routines emit instruction group stop bits as
4220 necessary to avoid dependencies. */
4222 /* Need to track some additional registers as far as serialization is
4223 concerned so we can properly handle br.call and br.ret. We could
4224 make these registers visible to gcc, but since these registers are
4225 never explicitly used in gcc generated code, it seems wasteful to
4226 do so (plus it would make the call and return patterns needlessly
4228 #define REG_GP (GR_REG (1))
4229 #define REG_RP (BR_REG (0))
4230 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4231 /* This is used for volatile asms which may require a stop bit immediately
4232 before and after them. */
4233 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4234 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4235 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4237 /* For each register, we keep track of how it has been written in the
4238 current instruction group.
4240 If a register is written unconditionally (no qualifying predicate),
4241 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4243 If a register is written if its qualifying predicate P is true, we
4244 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4245 may be written again by the complement of P (P^1) and when this happens,
4246 WRITE_COUNT gets set to 2.
4248 The result of this is that whenever an insn attempts to write a register
4249 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4251 If a predicate register is written by a floating-point insn, we set
4252 WRITTEN_BY_FP to true.
4254 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4255 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4257 struct reg_write_state
4259 unsigned int write_count
: 2;
4260 unsigned int first_pred
: 16;
4261 unsigned int written_by_fp
: 1;
4262 unsigned int written_by_and
: 1;
4263 unsigned int written_by_or
: 1;
4266 /* Cumulative info for the current instruction group. */
4267 struct reg_write_state rws_sum
[NUM_REGS
];
4268 /* Info for the current instruction. This gets copied to rws_sum after a
4269 stop bit is emitted. */
4270 struct reg_write_state rws_insn
[NUM_REGS
];
4272 /* Indicates whether this is the first instruction after a stop bit,
4273 in which case we don't need another stop bit. Without this, we hit
4274 the abort in ia64_variable_issue when scheduling an alloc. */
4275 static int first_instruction
;
4277 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4278 RTL for one instruction. */
4281 unsigned int is_write
: 1; /* Is register being written? */
4282 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4283 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4284 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4285 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4286 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4289 static void rws_update
PARAMS ((struct reg_write_state
*, int,
4290 struct reg_flags
, int));
4291 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
4292 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
4293 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
4294 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
4295 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
4296 static void init_insn_group_barriers
PARAMS ((void));
4297 static int group_barrier_needed_p
PARAMS ((rtx
));
4298 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
4300 /* Update *RWS for REGNO, which is being written by the current instruction,
4301 with predicate PRED, and associated register flags in FLAGS. */
4304 rws_update (rws
, regno
, flags
, pred
)
4305 struct reg_write_state
*rws
;
4307 struct reg_flags flags
;
4311 rws
[regno
].write_count
++;
4313 rws
[regno
].write_count
= 2;
4314 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4315 /* ??? Not tracking and/or across differing predicates. */
4316 rws
[regno
].written_by_and
= flags
.is_and
;
4317 rws
[regno
].written_by_or
= flags
.is_or
;
4318 rws
[regno
].first_pred
= pred
;
4321 /* Handle an access to register REGNO of type FLAGS using predicate register
4322 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4323 a dependency with an earlier instruction in the same group. */
4326 rws_access_regno (regno
, flags
, pred
)
4328 struct reg_flags flags
;
4331 int need_barrier
= 0;
4333 if (regno
>= NUM_REGS
)
4336 if (! PR_REGNO_P (regno
))
4337 flags
.is_and
= flags
.is_or
= 0;
4343 /* One insn writes same reg multiple times? */
4344 if (rws_insn
[regno
].write_count
> 0)
4347 /* Update info for current instruction. */
4348 rws_update (rws_insn
, regno
, flags
, pred
);
4349 write_count
= rws_sum
[regno
].write_count
;
4351 switch (write_count
)
4354 /* The register has not been written yet. */
4355 rws_update (rws_sum
, regno
, flags
, pred
);
4359 /* The register has been written via a predicate. If this is
4360 not a complementary predicate, then we need a barrier. */
4361 /* ??? This assumes that P and P+1 are always complementary
4362 predicates for P even. */
4363 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4365 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4367 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4369 rws_update (rws_sum
, regno
, flags
, pred
);
4373 /* The register has been unconditionally written already. We
4375 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4377 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4381 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4382 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4391 if (flags
.is_branch
)
4393 /* Branches have several RAW exceptions that allow to avoid
4396 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4397 /* RAW dependencies on branch regs are permissible as long
4398 as the writer is a non-branch instruction. Since we
4399 never generate code that uses a branch register written
4400 by a branch instruction, handling this case is
4404 if (REGNO_REG_CLASS (regno
) == PR_REGS
4405 && ! rws_sum
[regno
].written_by_fp
)
4406 /* The predicates of a branch are available within the
4407 same insn group as long as the predicate was written by
4408 something other than a floating-point instruction. */
4412 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4414 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4417 switch (rws_sum
[regno
].write_count
)
4420 /* The register has not been written yet. */
4424 /* The register has been written via a predicate. If this is
4425 not a complementary predicate, then we need a barrier. */
4426 /* ??? This assumes that P and P+1 are always complementary
4427 predicates for P even. */
4428 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4433 /* The register has been unconditionally written already. We
4443 return need_barrier
;
4447 rws_access_reg (reg
, flags
, pred
)
4449 struct reg_flags flags
;
4452 int regno
= REGNO (reg
);
4453 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4456 return rws_access_regno (regno
, flags
, pred
);
4459 int need_barrier
= 0;
4461 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4462 return need_barrier
;
4466 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4467 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4470 update_set_flags (x
, pflags
, ppred
, pcond
)
4472 struct reg_flags
*pflags
;
4476 rtx src
= SET_SRC (x
);
4480 switch (GET_CODE (src
))
4486 if (SET_DEST (x
) == pc_rtx
)
4487 /* X is a conditional branch. */
4491 int is_complemented
= 0;
4493 /* X is a conditional move. */
4494 rtx cond
= XEXP (src
, 0);
4495 if (GET_CODE (cond
) == EQ
)
4496 is_complemented
= 1;
4497 cond
= XEXP (cond
, 0);
4498 if (GET_CODE (cond
) != REG
4499 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4502 if (XEXP (src
, 1) == SET_DEST (x
)
4503 || XEXP (src
, 2) == SET_DEST (x
))
4505 /* X is a conditional move that conditionally writes the
4508 /* We need another complement in this case. */
4509 if (XEXP (src
, 1) == SET_DEST (x
))
4510 is_complemented
= ! is_complemented
;
4512 *ppred
= REGNO (cond
);
4513 if (is_complemented
)
4517 /* ??? If this is a conditional write to the dest, then this
4518 instruction does not actually read one source. This probably
4519 doesn't matter, because that source is also the dest. */
4520 /* ??? Multiple writes to predicate registers are allowed
4521 if they are all AND type compares, or if they are all OR
4522 type compares. We do not generate such instructions
4525 /* ... fall through ... */
4528 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4529 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4530 /* Set pflags->is_fp to 1 so that we know we're dealing
4531 with a floating point comparison when processing the
4532 destination of the SET. */
4535 /* Discover if this is a parallel comparison. We only handle
4536 and.orcm and or.andcm at present, since we must retain a
4537 strict inverse on the predicate pair. */
4538 else if (GET_CODE (src
) == AND
)
4540 else if (GET_CODE (src
) == IOR
)
4547 /* Subroutine of rtx_needs_barrier; this function determines whether the
4548 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4549 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4553 set_src_needs_barrier (x
, flags
, pred
, cond
)
4555 struct reg_flags flags
;
4559 int need_barrier
= 0;
4561 rtx src
= SET_SRC (x
);
4563 if (GET_CODE (src
) == CALL
)
4564 /* We don't need to worry about the result registers that
4565 get written by subroutine call. */
4566 return rtx_needs_barrier (src
, flags
, pred
);
4567 else if (SET_DEST (x
) == pc_rtx
)
4569 /* X is a conditional branch. */
4570 /* ??? This seems redundant, as the caller sets this bit for
4572 flags
.is_branch
= 1;
4573 return rtx_needs_barrier (src
, flags
, pred
);
4576 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4578 /* This instruction unconditionally uses a predicate register. */
4580 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4583 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4585 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4586 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4587 dst
= XEXP (dst
, 0);
4589 return need_barrier
;
4592 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4593 Return 1 is this access creates a dependency with an earlier instruction
4594 in the same group. */
4597 rtx_needs_barrier (x
, flags
, pred
)
4599 struct reg_flags flags
;
4603 int is_complemented
= 0;
4604 int need_barrier
= 0;
4605 const char *format_ptr
;
4606 struct reg_flags new_flags
;
4614 switch (GET_CODE (x
))
4617 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4618 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4619 if (GET_CODE (SET_SRC (x
)) != CALL
)
4621 new_flags
.is_write
= 1;
4622 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4627 new_flags
.is_write
= 0;
4628 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4630 /* Avoid multiple register writes, in case this is a pattern with
4631 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4632 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4634 new_flags
.is_write
= 1;
4635 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4636 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4637 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4642 /* X is a predicated instruction. */
4644 cond
= COND_EXEC_TEST (x
);
4647 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4649 if (GET_CODE (cond
) == EQ
)
4650 is_complemented
= 1;
4651 cond
= XEXP (cond
, 0);
4652 if (GET_CODE (cond
) != REG
4653 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4655 pred
= REGNO (cond
);
4656 if (is_complemented
)
4659 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4660 return need_barrier
;
4664 /* Clobber & use are for earlier compiler-phases only. */
4669 /* We always emit stop bits for traditional asms. We emit stop bits
4670 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4671 if (GET_CODE (x
) != ASM_OPERANDS
4672 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4674 /* Avoid writing the register multiple times if we have multiple
4675 asm outputs. This avoids an abort in rws_access_reg. */
4676 if (! rws_insn
[REG_VOLATILE
].write_count
)
4678 new_flags
.is_write
= 1;
4679 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4684 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4685 We can not just fall through here since then we would be confused
4686 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4687 traditional asms unlike their normal usage. */
4689 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4690 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4695 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4697 rtx pat
= XVECEXP (x
, 0, i
);
4698 if (GET_CODE (pat
) == SET
)
4700 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4701 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4703 else if (GET_CODE (pat
) == USE
4704 || GET_CODE (pat
) == CALL
4705 || GET_CODE (pat
) == ASM_OPERANDS
)
4706 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4707 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4710 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4712 rtx pat
= XVECEXP (x
, 0, i
);
4713 if (GET_CODE (pat
) == SET
)
4715 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4717 new_flags
.is_write
= 1;
4718 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4722 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4723 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4731 if (REGNO (x
) == AR_UNAT_REGNUM
)
4733 for (i
= 0; i
< 64; ++i
)
4734 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4737 need_barrier
= rws_access_reg (x
, flags
, pred
);
4741 /* Find the regs used in memory address computation. */
4742 new_flags
.is_write
= 0;
4743 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4746 case CONST_INT
: case CONST_DOUBLE
:
4747 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4750 /* Operators with side-effects. */
4751 case POST_INC
: case POST_DEC
:
4752 if (GET_CODE (XEXP (x
, 0)) != REG
)
4755 new_flags
.is_write
= 0;
4756 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4757 new_flags
.is_write
= 1;
4758 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4762 if (GET_CODE (XEXP (x
, 0)) != REG
)
4765 new_flags
.is_write
= 0;
4766 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4767 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4768 new_flags
.is_write
= 1;
4769 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4772 /* Handle common unary and binary ops for efficiency. */
4773 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4774 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4775 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4776 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4777 case NE
: case EQ
: case GE
: case GT
: case LE
:
4778 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4779 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4780 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4783 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4784 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4785 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4786 case SQRT
: case FFS
:
4787 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4791 switch (XINT (x
, 1))
4793 case UNSPEC_LTOFF_DTPMOD
:
4794 case UNSPEC_LTOFF_DTPREL
:
4796 case UNSPEC_LTOFF_TPREL
:
4798 case UNSPEC_PRED_REL_MUTEX
:
4799 case UNSPEC_PIC_CALL
:
4801 case UNSPEC_FETCHADD_ACQ
:
4802 case UNSPEC_BSP_VALUE
:
4803 case UNSPEC_FLUSHRS
:
4804 case UNSPEC_BUNDLE_SELECTOR
:
4807 case UNSPEC_GR_SPILL
:
4808 case UNSPEC_GR_RESTORE
:
4810 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4811 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4813 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4814 new_flags
.is_write
= (XINT (x
, 1) == 1);
4815 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4820 case UNSPEC_FR_SPILL
:
4821 case UNSPEC_FR_RESTORE
:
4823 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4827 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4830 case UNSPEC_FR_RECIP_APPROX
:
4831 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4832 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4835 case UNSPEC_CMPXCHG_ACQ
:
4836 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4837 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4845 case UNSPEC_VOLATILE
:
4846 switch (XINT (x
, 1))
4849 /* Alloc must always be the first instruction of a group.
4850 We force this by always returning true. */
4851 /* ??? We might get better scheduling if we explicitly check for
4852 input/local/output register dependencies, and modify the
4853 scheduler so that alloc is always reordered to the start of
4854 the current group. We could then eliminate all of the
4855 first_instruction code. */
4856 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4858 new_flags
.is_write
= 1;
4859 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4862 case UNSPECV_SET_BSP
:
4866 case UNSPECV_BLOCKAGE
:
4867 case UNSPECV_INSN_GROUP_BARRIER
:
4869 case UNSPECV_PSAC_ALL
:
4870 case UNSPECV_PSAC_NORMAL
:
4879 new_flags
.is_write
= 0;
4880 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4881 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4883 new_flags
.is_write
= 1;
4884 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4885 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4889 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4890 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4891 switch (format_ptr
[i
])
4893 case '0': /* unused field */
4894 case 'i': /* integer */
4895 case 'n': /* note */
4896 case 'w': /* wide integer */
4897 case 's': /* pointer to string */
4898 case 'S': /* optional pointer to string */
4902 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4907 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4908 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4917 return need_barrier
;
4920 /* Clear out the state for group_barrier_needed_p at the start of a
4921 sequence of insns. */
4924 init_insn_group_barriers ()
4926 memset (rws_sum
, 0, sizeof (rws_sum
));
4927 first_instruction
= 1;
4930 /* Given the current state, recorded by previous calls to this function,
4931 determine whether a group barrier (a stop bit) is necessary before INSN.
4932 Return nonzero if so. */
4935 group_barrier_needed_p (insn
)
4939 int need_barrier
= 0;
4940 struct reg_flags flags
;
4942 memset (&flags
, 0, sizeof (flags
));
4943 switch (GET_CODE (insn
))
4949 /* A barrier doesn't imply an instruction group boundary. */
4953 memset (rws_insn
, 0, sizeof (rws_insn
));
4957 flags
.is_branch
= 1;
4958 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4959 memset (rws_insn
, 0, sizeof (rws_insn
));
4961 /* Don't bundle a call following another call. */
4962 if ((pat
= prev_active_insn (insn
))
4963 && GET_CODE (pat
) == CALL_INSN
)
4969 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4973 flags
.is_branch
= 1;
4975 /* Don't bundle a jump following a call. */
4976 if ((pat
= prev_active_insn (insn
))
4977 && GET_CODE (pat
) == CALL_INSN
)
4985 if (GET_CODE (PATTERN (insn
)) == USE
4986 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4987 /* Don't care about USE and CLOBBER "insns"---those are used to
4988 indicate to the optimizer that it shouldn't get rid of
4989 certain operations. */
4992 pat
= PATTERN (insn
);
4994 /* Ug. Hack hacks hacked elsewhere. */
4995 switch (recog_memoized (insn
))
4997 /* We play dependency tricks with the epilogue in order
4998 to get proper schedules. Undo this for dv analysis. */
4999 case CODE_FOR_epilogue_deallocate_stack
:
5000 case CODE_FOR_prologue_allocate_stack
:
5001 pat
= XVECEXP (pat
, 0, 0);
5004 /* The pattern we use for br.cloop confuses the code above.
5005 The second element of the vector is representative. */
5006 case CODE_FOR_doloop_end_internal
:
5007 pat
= XVECEXP (pat
, 0, 1);
5010 /* Doesn't generate code. */
5011 case CODE_FOR_pred_rel_mutex
:
5012 case CODE_FOR_prologue_use
:
5019 memset (rws_insn
, 0, sizeof (rws_insn
));
5020 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5022 /* Check to see if the previous instruction was a volatile
5025 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5032 if (first_instruction
)
5035 first_instruction
= 0;
5038 return need_barrier
;
5041 /* Like group_barrier_needed_p, but do not clobber the current state. */
5044 safe_group_barrier_needed_p (insn
)
5047 struct reg_write_state rws_saved
[NUM_REGS
];
5048 int saved_first_instruction
;
5051 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5052 saved_first_instruction
= first_instruction
;
5054 t
= group_barrier_needed_p (insn
);
5056 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5057 first_instruction
= saved_first_instruction
;
5062 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
5063 as necessary to eliminate dependendencies. This function assumes that
5064 a final instruction scheduling pass has been run which has already
5065 inserted most of the necessary stop bits. This function only inserts
5066 new ones at basic block boundaries, since these are invisible to the
5070 emit_insn_group_barriers (dump
, insns
)
5076 int insns_since_last_label
= 0;
5078 init_insn_group_barriers ();
5080 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
5082 if (GET_CODE (insn
) == CODE_LABEL
)
5084 if (insns_since_last_label
)
5086 insns_since_last_label
= 0;
5088 else if (GET_CODE (insn
) == NOTE
5089 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5091 if (insns_since_last_label
)
5093 insns_since_last_label
= 0;
5095 else if (GET_CODE (insn
) == INSN
5096 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5097 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5099 init_insn_group_barriers ();
5102 else if (INSN_P (insn
))
5104 insns_since_last_label
= 1;
5106 if (group_barrier_needed_p (insn
))
5111 fprintf (dump
, "Emitting stop before label %d\n",
5112 INSN_UID (last_label
));
5113 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5116 init_insn_group_barriers ();
5124 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5125 This function has to emit all necessary group barriers. */
5128 emit_all_insn_group_barriers (dump
, insns
)
5129 FILE *dump ATTRIBUTE_UNUSED
;
5134 init_insn_group_barriers ();
5136 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
5138 if (GET_CODE (insn
) == BARRIER
)
5140 rtx last
= prev_active_insn (insn
);
5144 if (GET_CODE (last
) == JUMP_INSN
5145 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5146 last
= prev_active_insn (last
);
5147 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5148 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5150 init_insn_group_barriers ();
5152 else if (INSN_P (insn
))
5154 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5155 init_insn_group_barriers ();
5156 else if (group_barrier_needed_p (insn
))
5158 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5159 init_insn_group_barriers ();
5160 group_barrier_needed_p (insn
);
5166 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
5167 static void errata_emit_nops
PARAMS ((rtx
));
5168 static void fixup_errata
PARAMS ((void));
5170 /* This structure is used to track some details about the previous insns
5171 groups so we can determine if it may be necessary to insert NOPs to
5172 workaround hardware errata. */
5175 HARD_REG_SET p_reg_set
;
5176 HARD_REG_SET gr_reg_conditionally_set
;
5179 /* Index into the last_group array. */
5180 static int group_idx
;
5182 /* Called through for_each_rtx; determines if a hard register that was
5183 conditionally set in the previous group is used as an address register.
5184 It ensures that for_each_rtx returns 1 in that case. */
5186 errata_find_address_regs (xp
, data
)
5188 void *data ATTRIBUTE_UNUSED
;
5191 if (GET_CODE (x
) != MEM
)
5194 if (GET_CODE (x
) == POST_MODIFY
)
5196 if (GET_CODE (x
) == REG
)
5198 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5199 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5207 /* Called for each insn; this function keeps track of the state in
5208 last_group and emits additional NOPs if necessary to work around
5209 an Itanium A/B step erratum. */
5211 errata_emit_nops (insn
)
5214 struct group
*this_group
= last_group
+ group_idx
;
5215 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5216 rtx pat
= PATTERN (insn
);
5217 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5218 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5219 enum attr_type type
;
5222 if (GET_CODE (real_pat
) == USE
5223 || GET_CODE (real_pat
) == CLOBBER
5224 || GET_CODE (real_pat
) == ASM_INPUT
5225 || GET_CODE (real_pat
) == ADDR_VEC
5226 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5227 || asm_noperands (PATTERN (insn
)) >= 0)
5230 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5233 if (GET_CODE (set
) == PARALLEL
)
5236 set
= XVECEXP (real_pat
, 0, 0);
5237 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5238 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5239 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5246 if (set
&& GET_CODE (set
) != SET
)
5249 type
= get_attr_type (insn
);
5252 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5253 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5255 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5256 && REG_P (SET_DEST (set
))
5257 && GET_CODE (SET_SRC (set
)) != PLUS
5258 && GET_CODE (SET_SRC (set
)) != MINUS
5259 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5260 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5261 && (GET_CODE (SET_SRC (set
)) != MEM
5262 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5263 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5265 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
5266 || ! REG_P (XEXP (cond
, 0)))
5269 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5270 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5272 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5274 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5275 emit_insn_before (gen_nop (), insn
);
5276 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5278 memset (last_group
, 0, sizeof last_group
);
5282 /* Emit extra nops if they are required to work around hardware errata. */
5289 if (! TARGET_B_STEP
)
5293 memset (last_group
, 0, sizeof last_group
);
5295 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5300 if (ia64_safe_type (insn
) == TYPE_S
)
5303 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5306 errata_emit_nops (insn
);
5310 /* Instruction scheduling support. */
5311 /* Describe one bundle. */
5315 /* Zero if there's no possibility of a stop in this bundle other than
5316 at the end, otherwise the position of the optional stop bit. */
5318 /* The types of the three slots. */
5319 enum attr_type t
[3];
5320 /* The pseudo op to be emitted into the assembler output. */
5324 #define NR_BUNDLES 10
5326 /* A list of all available bundles. */
5328 static const struct bundle bundle
[NR_BUNDLES
] =
5330 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
5331 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
5332 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
5333 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
5334 #if NR_BUNDLES == 10
5335 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
5336 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
5338 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
5339 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
5340 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
5341 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5342 it matches an L type insn. Otherwise we'll try to generate L type
5344 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
5347 /* Describe a packet of instructions. Packets consist of two bundles that
5348 are visible to the hardware in one scheduling window. */
5352 const struct bundle
*t1
, *t2
;
5353 /* Precomputed value of the first split issue in this packet if a cycle
5354 starts at its beginning. */
5356 /* For convenience, the insn types are replicated here so we don't have
5357 to go through T1 and T2 all the time. */
5358 enum attr_type t
[6];
5361 /* An array containing all possible packets. */
5362 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5363 static struct ia64_packet packets
[NR_PACKETS
];
5365 /* Map attr_type to a string with the name. */
5367 static const char *const type_names
[] =
5369 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5372 /* Nonzero if we should insert stop bits into the schedule. */
5373 int ia64_final_schedule
= 0;
5375 static int itanium_split_issue
PARAMS ((const struct ia64_packet
*, int));
5376 static rtx ia64_single_set
PARAMS ((rtx
));
5377 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
5378 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
5379 static void maybe_rotate
PARAMS ((FILE *));
5380 static void finish_last_head
PARAMS ((FILE *, int));
5381 static void rotate_one_bundle
PARAMS ((FILE *));
5382 static void rotate_two_bundles
PARAMS ((FILE *));
5383 static void nop_cycles_until
PARAMS ((int, FILE *));
5384 static void cycle_end_fill_slots
PARAMS ((FILE *));
5385 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
5386 static int get_split
PARAMS ((const struct ia64_packet
*, int));
5387 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
5388 const struct ia64_packet
*, int));
5389 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
5390 rtx
*, enum attr_type
*, int));
5391 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
5392 static void dump_current_packet
PARAMS ((FILE *));
5393 static void schedule_stop
PARAMS ((FILE *));
5394 static rtx gen_nop_type
PARAMS ((enum attr_type
));
5395 static void ia64_emit_nops
PARAMS ((void));
5397 /* Map a bundle number to its pseudo-op. */
5403 return bundle
[b
].name
;
5406 /* Compute the slot which will cause a split issue in packet P if the
5407 current cycle begins at slot BEGIN. */
5410 itanium_split_issue (p
, begin
)
5411 const struct ia64_packet
*p
;
5414 int type_count
[TYPE_S
];
5420 /* Always split before and after MMF. */
5421 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
5423 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
5425 /* Always split after MBB and BBB. */
5426 if (p
->t
[1] == TYPE_B
)
5428 /* Split after first bundle in MIB BBB combination. */
5429 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
5433 memset (type_count
, 0, sizeof type_count
);
5434 for (i
= begin
; i
< split
; i
++)
5436 enum attr_type t0
= p
->t
[i
];
5437 /* An MLX bundle reserves the same units as an MFI bundle. */
5438 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
5439 : t0
== TYPE_X
? TYPE_I
5442 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5443 2 integer per cycle. */
5444 int max
= (t
== TYPE_B
? 3 : 2);
5445 if (type_count
[t
] == max
)
5453 /* Return the maximum number of instructions a cpu can issue. */
5461 /* Helper function - like single_set, but look inside COND_EXEC. */
5464 ia64_single_set (insn
)
5467 rtx x
= PATTERN (insn
), ret
;
5468 if (GET_CODE (x
) == COND_EXEC
)
5469 x
= COND_EXEC_CODE (x
);
5470 if (GET_CODE (x
) == SET
)
5473 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5474 Although they are not classical single set, the second set is there just
5475 to protect it from moving past FP-relative stack accesses. */
5476 switch (recog_memoized (insn
))
5478 case CODE_FOR_prologue_allocate_stack
:
5479 case CODE_FOR_epilogue_deallocate_stack
:
5480 ret
= XVECEXP (x
, 0, 0);
5484 ret
= single_set_2 (insn
, x
);
5491 /* Adjust the cost of a scheduling dependency. Return the new cost of
5492 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5495 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
5496 rtx insn
, link
, dep_insn
;
5499 enum attr_type dep_type
;
5500 enum attr_itanium_class dep_class
;
5501 enum attr_itanium_class insn_class
;
5502 rtx dep_set
, set
, src
, addr
;
5504 if (GET_CODE (PATTERN (insn
)) == CLOBBER
5505 || GET_CODE (PATTERN (insn
)) == USE
5506 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
5507 || GET_CODE (PATTERN (dep_insn
)) == USE
5508 /* @@@ Not accurate for indirect calls. */
5509 || GET_CODE (insn
) == CALL_INSN
5510 || ia64_safe_type (insn
) == TYPE_S
)
5513 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
5514 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
5517 dep_type
= ia64_safe_type (dep_insn
);
5518 dep_class
= ia64_safe_itanium_class (dep_insn
);
5519 insn_class
= ia64_safe_itanium_class (insn
);
5521 /* Compares that feed a conditional branch can execute in the same
5523 dep_set
= ia64_single_set (dep_insn
);
5524 set
= ia64_single_set (insn
);
5526 if (dep_type
!= TYPE_F
5528 && GET_CODE (SET_DEST (dep_set
)) == REG
5529 && PR_REG (REGNO (SET_DEST (dep_set
)))
5530 && GET_CODE (insn
) == JUMP_INSN
)
5533 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
5535 /* ??? Can't find any information in the documenation about whether
5539 splits issue. Assume it doesn't. */
5543 src
= set
? SET_SRC (set
) : 0;
5547 if (GET_CODE (SET_DEST (set
)) == MEM
)
5548 addr
= XEXP (SET_DEST (set
), 0);
5549 else if (GET_CODE (SET_DEST (set
)) == SUBREG
5550 && GET_CODE (SUBREG_REG (SET_DEST (set
))) == MEM
)
5551 addr
= XEXP (SUBREG_REG (SET_DEST (set
)), 0);
5555 if (GET_CODE (addr
) == UNSPEC
&& XVECLEN (addr
, 0) > 0)
5556 addr
= XVECEXP (addr
, 0, 0);
5557 while (GET_CODE (addr
) == SUBREG
|| GET_CODE (addr
) == ZERO_EXTEND
)
5558 addr
= XEXP (addr
, 0);
5559 if (GET_CODE (addr
) == MEM
)
5560 addr
= XEXP (addr
, 0);
5566 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
5567 addr
= XEXP (addr
, 0);
5569 set
= ia64_single_set (dep_insn
);
5571 if ((dep_class
== ITANIUM_CLASS_IALU
5572 || dep_class
== ITANIUM_CLASS_ILOG
5573 || dep_class
== ITANIUM_CLASS_LD
)
5574 && (insn_class
== ITANIUM_CLASS_LD
5575 || insn_class
== ITANIUM_CLASS_ST
))
5577 if (! addr
|| ! set
)
5579 /* This isn't completely correct - an IALU that feeds an address has
5580 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5581 otherwise. Unfortunately there's no good way to describe this. */
5582 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
5586 if ((dep_class
== ITANIUM_CLASS_IALU
5587 || dep_class
== ITANIUM_CLASS_ILOG
5588 || dep_class
== ITANIUM_CLASS_LD
)
5589 && (insn_class
== ITANIUM_CLASS_MMMUL
5590 || insn_class
== ITANIUM_CLASS_MMSHF
5591 || insn_class
== ITANIUM_CLASS_MMSHFI
))
5594 if (dep_class
== ITANIUM_CLASS_FMAC
5595 && (insn_class
== ITANIUM_CLASS_FMISC
5596 || insn_class
== ITANIUM_CLASS_FCVTFX
5597 || insn_class
== ITANIUM_CLASS_XMPY
))
5600 if ((dep_class
== ITANIUM_CLASS_FMAC
5601 || dep_class
== ITANIUM_CLASS_FMISC
5602 || dep_class
== ITANIUM_CLASS_FCVTFX
5603 || dep_class
== ITANIUM_CLASS_XMPY
)
5604 && insn_class
== ITANIUM_CLASS_STF
)
5607 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5608 but HP engineers say any non-MM operation. */
5609 if ((dep_class
== ITANIUM_CLASS_MMMUL
5610 || dep_class
== ITANIUM_CLASS_MMSHF
5611 || dep_class
== ITANIUM_CLASS_MMSHFI
)
5612 && insn_class
!= ITANIUM_CLASS_MMMUL
5613 && insn_class
!= ITANIUM_CLASS_MMSHF
5614 && insn_class
!= ITANIUM_CLASS_MMSHFI
)
5620 /* Describe the current state of the Itanium pipeline. */
5623 /* The first slot that is used in the current cycle. */
5625 /* The next slot to fill. */
5627 /* The packet we have selected for the current issue window. */
5628 const struct ia64_packet
*packet
;
5629 /* The position of the split issue that occurs due to issue width
5630 limitations (6 if there's no split issue). */
5632 /* Record data about the insns scheduled so far in the same issue
5633 window. The elements up to but not including FIRST_SLOT belong
5634 to the previous cycle, the ones starting with FIRST_SLOT belong
5635 to the current cycle. */
5636 enum attr_type types
[6];
5639 /* Nonzero if we decided to schedule a stop bit. */
5643 /* Temporary arrays; they have enough elements to hold all insns that
5644 can be ready at the same time while scheduling of the current block.
5645 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5646 static rtx
*sched_ready
;
5647 static enum attr_type
*sched_types
;
5649 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5653 insn_matches_slot (p
, itype
, slot
, insn
)
5654 const struct ia64_packet
*p
;
5655 enum attr_type itype
;
5659 enum attr_itanium_requires_unit0 u0
;
5660 enum attr_type stype
= p
->t
[slot
];
5664 u0
= ia64_safe_itanium_requires_unit0 (insn
);
5665 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
5668 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
5669 if (p
->t
[i
] == stype
5670 || (stype
== TYPE_F
&& p
->t
[i
] == TYPE_L
)
5671 || (stype
== TYPE_I
&& p
->t
[i
] == TYPE_X
))
5674 if (GET_CODE (insn
) == CALL_INSN
)
5676 /* Reject calls in multiway branch packets. We want to limit
5677 the number of multiway branches we generate (since the branch
5678 predictor is limited), and this seems to work fairly well.
5679 (If we didn't do this, we'd have to add another test here to
5680 force calls into the third slot of the bundle.) */
5683 if (p
->t
[1] == TYPE_B
)
5688 if (p
->t
[4] == TYPE_B
)
5696 if (itype
== TYPE_A
)
5697 return stype
== TYPE_M
|| stype
== TYPE_I
;
5701 /* Like emit_insn_before, but skip cycle_display notes.
5702 ??? When cycle display notes are implemented, update this. */
5705 ia64_emit_insn_before (insn
, before
)
5708 emit_insn_before (insn
, before
);
5711 /* When rotating a bundle out of the issue window, insert a bundle selector
5712 insn in front of it. DUMP is the scheduling dump file or NULL. START
5713 is either 0 or 3, depending on whether we want to emit a bundle selector
5714 for the first bundle or the second bundle in the current issue window.
5716 The selector insns are emitted this late because the selected packet can
5717 be changed until parts of it get rotated out. */
5720 finish_last_head (dump
, start
)
5724 const struct ia64_packet
*p
= sched_data
.packet
;
5725 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5726 int bundle_type
= b
- bundle
;
5730 if (! ia64_final_schedule
)
5733 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5736 insn
= sched_data
.insns
[i
];
5739 fprintf (dump
, "// Emitting template before %d: %s\n",
5740 INSN_UID (insn
), b
->name
);
5742 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5745 /* We can't schedule more insns this cycle. Fix up the scheduling state
5746 and advance FIRST_SLOT and CUR.
5747 We have to distribute the insns that are currently found between
5748 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5749 far, they are stored successively in the fields starting at FIRST_SLOT;
5750 now they must be moved to the correct slots.
5751 DUMP is the current scheduling dump file, or NULL. */
5754 cycle_end_fill_slots (dump
)
5757 const struct ia64_packet
*packet
= sched_data
.packet
;
5759 enum attr_type tmp_types
[6];
5762 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5763 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5765 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5767 enum attr_type t
= tmp_types
[i
];
5768 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5770 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5772 if (slot
> sched_data
.split
)
5775 fprintf (dump
, "// Packet needs %s, have %s\n",
5776 type_names
[packet
->t
[slot
]], type_names
[t
]);
5777 sched_data
.types
[slot
] = packet
->t
[slot
];
5778 sched_data
.insns
[slot
] = 0;
5779 sched_data
.stopbit
[slot
] = 0;
5781 /* ??? TYPE_L instructions always fill up two slots, but we don't
5782 support TYPE_L nops. */
5783 if (packet
->t
[slot
] == TYPE_L
)
5789 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5790 actual slot type later. */
5791 sched_data
.types
[slot
] = packet
->t
[slot
];
5792 sched_data
.insns
[slot
] = tmp_insns
[i
];
5793 sched_data
.stopbit
[slot
] = 0;
5796 /* TYPE_L instructions always fill up two slots. */
5799 sched_data
.types
[slot
] = packet
->t
[slot
];
5800 sched_data
.insns
[slot
] = 0;
5801 sched_data
.stopbit
[slot
] = 0;
5806 /* This isn't right - there's no need to pad out until the forced split;
5807 the CPU will automatically split if an insn isn't ready. */
5809 while (slot
< sched_data
.split
)
5811 sched_data
.types
[slot
] = packet
->t
[slot
];
5812 sched_data
.insns
[slot
] = 0;
5813 sched_data
.stopbit
[slot
] = 0;
5818 sched_data
.first_slot
= sched_data
.cur
= slot
;
5821 /* Bundle rotations, as described in the Itanium optimization manual.
5822 We can rotate either one or both bundles out of the issue window.
5823 DUMP is the current scheduling dump file, or NULL. */
5826 rotate_one_bundle (dump
)
5830 fprintf (dump
, "// Rotating one bundle.\n");
5832 finish_last_head (dump
, 0);
5833 if (sched_data
.cur
> 3)
5835 sched_data
.cur
-= 3;
5836 sched_data
.first_slot
-= 3;
5837 memmove (sched_data
.types
,
5838 sched_data
.types
+ 3,
5839 sched_data
.cur
* sizeof *sched_data
.types
);
5840 memmove (sched_data
.stopbit
,
5841 sched_data
.stopbit
+ 3,
5842 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5843 memmove (sched_data
.insns
,
5844 sched_data
.insns
+ 3,
5845 sched_data
.cur
* sizeof *sched_data
.insns
);
5847 = &packets
[(sched_data
.packet
->t2
- bundle
) * NR_BUNDLES
];
5852 sched_data
.first_slot
= 0;
5857 rotate_two_bundles (dump
)
5861 fprintf (dump
, "// Rotating two bundles.\n");
5863 if (sched_data
.cur
== 0)
5866 finish_last_head (dump
, 0);
5867 if (sched_data
.cur
> 3)
5868 finish_last_head (dump
, 3);
5870 sched_data
.first_slot
= 0;
5873 /* We're beginning a new block. Initialize data structures as necessary. */
5876 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5877 FILE *dump ATTRIBUTE_UNUSED
;
5878 int sched_verbose ATTRIBUTE_UNUSED
;
5881 static int initialized
= 0;
5889 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5891 const struct bundle
*t1
= bundle
+ b1
;
5892 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5894 const struct bundle
*t2
= bundle
+ b2
;
5900 for (i
= 0; i
< NR_PACKETS
; i
++)
5903 for (j
= 0; j
< 3; j
++)
5904 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5905 for (j
= 0; j
< 3; j
++)
5906 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5907 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5912 init_insn_group_barriers ();
5914 memset (&sched_data
, 0, sizeof sched_data
);
5915 sched_types
= (enum attr_type
*) xmalloc (max_ready
5916 * sizeof (enum attr_type
));
5917 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5920 /* See if the packet P can match the insns we have already scheduled. Return
5921 nonzero if so. In *PSLOT, we store the first slot that is available for
5922 more instructions if we choose this packet.
5923 SPLIT holds the last slot we can use, there's a split issue after it so
5924 scheduling beyond it would cause us to use more than one cycle. */
5927 packet_matches_p (p
, split
, pslot
)
5928 const struct ia64_packet
*p
;
5932 int filled
= sched_data
.cur
;
5933 int first
= sched_data
.first_slot
;
5936 /* First, check if the first of the two bundles must be a specific one (due
5938 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5940 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5943 for (i
= 0; i
< first
; i
++)
5944 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5945 sched_data
.insns
[i
]))
5947 for (i
= slot
= first
; i
< filled
; i
++)
5949 while (slot
< split
)
5951 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5952 sched_data
.insns
[i
]))
5966 /* A frontend for itanium_split_issue. For a packet P and a slot
5967 number FIRST that describes the start of the current clock cycle,
5968 return the slot number of the first split issue. This function
5969 uses the cached number found in P if possible. */
5972 get_split (p
, first
)
5973 const struct ia64_packet
*p
;
5977 return p
->first_split
;
5978 return itanium_split_issue (p
, first
);
5981 /* Given N_READY insns in the array READY, whose types are found in the
5982 corresponding array TYPES, return the insn that is best suited to be
5983 scheduled in slot SLOT of packet P. */
5986 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5988 enum attr_type
*types
;
5990 const struct ia64_packet
*p
;
5995 while (n_ready
-- > 0)
5997 rtx insn
= ready
[n_ready
];
6000 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
6002 /* If we have equally good insns, one of which has a stricter
6003 slot requirement, prefer the one with the stricter requirement. */
6004 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
6006 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
6009 best_pri
= INSN_PRIORITY (ready
[best
]);
6011 /* If there's no way we could get a stricter requirement, stop
6013 if (types
[n_ready
] != TYPE_A
6014 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
6022 /* Select the best packet to use given the current scheduler state and the
6024 READY is an array holding N_READY ready insns; TYPES is a corresponding
6025 array that holds their types. Store the best packet in *PPACKET and the
6026 number of insns that can be scheduled in the current cycle in *PBEST. */
6029 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
6031 const struct ia64_packet
**ppacket
;
6033 enum attr_type
*types
;
6036 int first
= sched_data
.first_slot
;
6039 const struct ia64_packet
*best_packet
= NULL
;
6042 for (i
= 0; i
< NR_PACKETS
; i
++)
6044 const struct ia64_packet
*p
= packets
+ i
;
6046 int split
= get_split (p
, first
);
6048 int first_slot
, last_slot
;
6051 if (! packet_matches_p (p
, split
, &first_slot
))
6054 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
6058 for (slot
= first_slot
; slot
< split
; slot
++)
6062 /* Disallow a degenerate case where the first bundle doesn't
6063 contain anything but NOPs! */
6064 if (first_slot
== 0 && win
== 0 && slot
== 3)
6070 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
6073 sched_ready
[insn_nr
] = 0;
6077 else if (p
->t
[slot
] == TYPE_B
)
6080 /* We must disallow MBB/BBB packets if any of their B slots would be
6081 filled with nops. */
6084 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
6089 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
6094 || (win
== best
&& last_slot
< lowest_end
))
6097 lowest_end
= last_slot
;
6102 *ppacket
= best_packet
;
6105 /* Reorder the ready list so that the insns that can be issued in this cycle
6106 are found in the correct order at the end of the list.
6107 DUMP is the scheduling dump file, or NULL. READY points to the start,
6108 E_READY to the end of the ready list. MAY_FAIL determines what should be
6109 done if no insns can be scheduled in this cycle: if it is zero, we abort,
6110 otherwise we return 0.
6111 Return 1 if any insns can be scheduled in this cycle. */
6114 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
6120 const struct ia64_packet
*best_packet
;
6121 int n_ready
= e_ready
- ready
;
6122 int first
= sched_data
.first_slot
;
6123 int i
, best
, best_split
, filled
;
6125 for (i
= 0; i
< n_ready
; i
++)
6126 sched_types
[i
] = ia64_safe_type (ready
[i
]);
6128 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
6139 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
6140 best_packet
->t1
->name
,
6141 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
6144 best_split
= itanium_split_issue (best_packet
, first
);
6145 packet_matches_p (best_packet
, best_split
, &filled
);
6147 for (i
= filled
; i
< best_split
; i
++)
6151 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
6154 rtx insn
= ready
[insn_nr
];
6155 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
6156 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
6157 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
6158 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
6159 ready
[--n_ready
] = insn
;
6163 sched_data
.packet
= best_packet
;
6164 sched_data
.split
= best_split
;
6168 /* Dump information about the current scheduling state to file DUMP. */
6171 dump_current_packet (dump
)
6175 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
6176 for (i
= 0; i
< sched_data
.first_slot
; i
++)
6178 rtx insn
= sched_data
.insns
[i
];
6179 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
6181 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
6182 if (sched_data
.stopbit
[i
])
6183 fprintf (dump
, " ;;");
6185 fprintf (dump
, " :::");
6186 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
6188 rtx insn
= sched_data
.insns
[i
];
6189 enum attr_type t
= ia64_safe_type (insn
);
6190 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
6192 fprintf (dump
, "\n");
6195 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
6199 schedule_stop (dump
)
6202 const struct ia64_packet
*best
= sched_data
.packet
;
6207 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
6209 if (sched_data
.cur
== 0)
6212 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
6214 rotate_two_bundles (NULL
);
6218 for (i
= -1; i
< NR_PACKETS
; i
++)
6220 /* This is a slight hack to give the current packet the first chance.
6221 This is done to avoid e.g. switching from MIB to MBB bundles. */
6222 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
6223 int split
= get_split (p
, sched_data
.first_slot
);
6224 const struct bundle
*compare
;
6227 if (! packet_matches_p (p
, split
, &next
))
6230 compare
= next
> 3 ? p
->t2
: p
->t1
;
6233 if (compare
->possible_stop
)
6234 stoppos
= compare
->possible_stop
;
6238 if (stoppos
< next
|| stoppos
>= best_stop
)
6240 if (compare
->possible_stop
== 0)
6242 stoppos
= (next
> 3 ? 6 : 3);
6244 if (stoppos
< next
|| stoppos
>= best_stop
)
6248 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
6249 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
6252 best_stop
= stoppos
;
6256 sched_data
.packet
= best
;
6257 cycle_end_fill_slots (dump
);
6258 while (sched_data
.cur
< best_stop
)
6260 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
6261 sched_data
.insns
[sched_data
.cur
] = 0;
6262 sched_data
.stopbit
[sched_data
.cur
] = 0;
6265 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
6266 sched_data
.first_slot
= best_stop
;
6269 dump_current_packet (dump
);
6272 /* If necessary, perform one or two rotations on the scheduling state.
6273 This should only be called if we are starting a new cycle. */
6279 cycle_end_fill_slots (dump
);
6280 if (sched_data
.cur
== 6)
6281 rotate_two_bundles (dump
);
6282 else if (sched_data
.cur
>= 3)
6283 rotate_one_bundle (dump
);
6284 sched_data
.first_slot
= sched_data
.cur
;
6287 /* The clock cycle when ia64_sched_reorder was last called. */
6288 static int prev_cycle
;
6290 /* The first insn scheduled in the previous cycle. This is the saved
6291 value of sched_data.first_slot. */
6292 static int prev_first
;
6294 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6295 pad out the delay between MM (shifts, etc.) and integer operations. */
6298 nop_cycles_until (clock_var
, dump
)
6302 int prev_clock
= prev_cycle
;
6303 int cycles_left
= clock_var
- prev_clock
;
6304 bool did_stop
= false;
6306 /* Finish the previous cycle; pad it out with NOPs. */
6307 if (sched_data
.cur
== 3)
6309 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6311 maybe_rotate (dump
);
6313 else if (sched_data
.cur
> 0)
6316 int split
= itanium_split_issue (sched_data
.packet
, prev_first
);
6318 if (sched_data
.cur
< 3 && split
> 3)
6324 if (split
> sched_data
.cur
)
6327 for (i
= sched_data
.cur
; i
< split
; i
++)
6329 rtx t
= sched_emit_insn (gen_nop_type (sched_data
.packet
->t
[i
]));
6330 sched_data
.types
[i
] = sched_data
.packet
->t
[i
];
6331 sched_data
.insns
[i
] = t
;
6332 sched_data
.stopbit
[i
] = 0;
6334 sched_data
.cur
= split
;
6337 if (! need_stop
&& sched_data
.cur
> 0 && sched_data
.cur
< 6
6341 for (i
= sched_data
.cur
; i
< 6; i
++)
6343 rtx t
= sched_emit_insn (gen_nop_type (sched_data
.packet
->t
[i
]));
6344 sched_data
.types
[i
] = sched_data
.packet
->t
[i
];
6345 sched_data
.insns
[i
] = t
;
6346 sched_data
.stopbit
[i
] = 0;
6353 if (need_stop
|| sched_data
.cur
== 6)
6355 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6358 maybe_rotate (dump
);
6362 while (cycles_left
> 0)
6364 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6365 sched_emit_insn (gen_nop_type (TYPE_M
));
6366 sched_emit_insn (gen_nop_type (TYPE_I
));
6367 if (cycles_left
> 1)
6369 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6372 sched_emit_insn (gen_nop_type (TYPE_I
));
6373 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6379 init_insn_group_barriers ();
6382 /* We are about to being issuing insns for this clock cycle.
6383 Override the default sort algorithm to better slot instructions. */
6386 ia64_internal_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6387 reorder_type
, clock_var
)
6388 FILE *dump ATTRIBUTE_UNUSED
;
6389 int sched_verbose ATTRIBUTE_UNUSED
;
6392 int reorder_type
, clock_var
;
6395 int n_ready
= *pn_ready
;
6396 rtx
*e_ready
= ready
+ n_ready
;
6401 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
6402 dump_current_packet (dump
);
6405 /* Work around the pipeline flush that will occurr if the results of
6406 an MM instruction are accessed before the result is ready. Intel
6407 documentation says this only happens with IALU, ISHF, ILOG, LD,
6408 and ST consumers, but experimental evidence shows that *any* non-MM
6409 type instruction will incurr the flush. */
6410 if (reorder_type
== 0 && clock_var
> 0 && ia64_final_schedule
)
6412 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6414 rtx insn
= *insnp
, link
;
6415 enum attr_itanium_class t
= ia64_safe_itanium_class (insn
);
6417 if (t
== ITANIUM_CLASS_MMMUL
6418 || t
== ITANIUM_CLASS_MMSHF
6419 || t
== ITANIUM_CLASS_MMSHFI
)
6422 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6423 if (REG_NOTE_KIND (link
) == 0)
6425 rtx other
= XEXP (link
, 0);
6426 enum attr_itanium_class t0
= ia64_safe_itanium_class (other
);
6427 if (t0
== ITANIUM_CLASS_MMSHF
|| t0
== ITANIUM_CLASS_MMMUL
)
6429 nop_cycles_until (clock_var
, sched_verbose
? dump
: NULL
);
6437 prev_first
= sched_data
.first_slot
;
6438 prev_cycle
= clock_var
;
6440 if (reorder_type
== 0)
6441 maybe_rotate (sched_verbose
? dump
: NULL
);
6443 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6445 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6446 if (insnp
< e_ready
)
6449 enum attr_type t
= ia64_safe_type (insn
);
6450 if (t
== TYPE_UNKNOWN
)
6452 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6453 || asm_noperands (PATTERN (insn
)) >= 0)
6455 rtx lowest
= ready
[n_asms
];
6456 ready
[n_asms
] = insn
;
6462 rtx highest
= ready
[n_ready
- 1];
6463 ready
[n_ready
- 1] = insn
;
6465 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
6467 schedule_stop (sched_verbose
? dump
: NULL
);
6468 sched_data
.last_was_stop
= 1;
6469 maybe_rotate (sched_verbose
? dump
: NULL
);
6476 if (n_asms
< n_ready
)
6478 /* Some normal insns to process. Skip the asms. */
6482 else if (n_ready
> 0)
6484 /* Only asm insns left. */
6485 if (ia64_final_schedule
&& group_barrier_needed_p (ready
[n_ready
- 1]))
6487 schedule_stop (sched_verbose
? dump
: NULL
);
6488 sched_data
.last_was_stop
= 1;
6489 maybe_rotate (sched_verbose
? dump
: NULL
);
6491 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6495 if (ia64_final_schedule
)
6497 int nr_need_stop
= 0;
6499 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6500 if (safe_group_barrier_needed_p (*insnp
))
6503 /* Schedule a stop bit if
6504 - all insns require a stop bit, or
6505 - we are starting a new cycle and _any_ insns require a stop bit.
6506 The reason for the latter is that if our schedule is accurate, then
6507 the additional stop won't decrease performance at this point (since
6508 there's a split issue at this point anyway), but it gives us more
6509 freedom when scheduling the currently ready insns. */
6510 if ((reorder_type
== 0 && nr_need_stop
)
6511 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
6513 schedule_stop (sched_verbose
? dump
: NULL
);
6514 sched_data
.last_was_stop
= 1;
6515 maybe_rotate (sched_verbose
? dump
: NULL
);
6516 if (reorder_type
== 1)
6523 /* Move down everything that needs a stop bit, preserving relative
6525 while (insnp
-- > ready
+ deleted
)
6526 while (insnp
>= ready
+ deleted
)
6529 if (! safe_group_barrier_needed_p (insn
))
6531 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6537 if (deleted
!= nr_need_stop
)
6542 return itanium_reorder (sched_verbose
? dump
: NULL
,
6543 ready
, e_ready
, reorder_type
== 1);
6547 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6554 return ia64_internal_sched_reorder (dump
, sched_verbose
, ready
,
6555 pn_ready
, 0, clock_var
);
6558 /* Like ia64_sched_reorder, but called after issuing each insn.
6559 Override the default sort algorithm to better slot instructions. */
6562 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6563 FILE *dump ATTRIBUTE_UNUSED
;
6564 int sched_verbose ATTRIBUTE_UNUSED
;
6569 if (sched_data
.last_was_stop
)
6572 /* Detect one special case and try to optimize it.
6573 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6574 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6575 if (sched_data
.first_slot
== 1
6576 && sched_data
.stopbit
[0]
6577 && ((sched_data
.cur
== 4
6578 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6579 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
6580 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
6581 || (sched_data
.cur
== 3
6582 && (sched_data
.types
[1] == TYPE_M
6583 || sched_data
.types
[1] == TYPE_A
)
6584 && (sched_data
.types
[2] != TYPE_M
6585 && sched_data
.types
[2] != TYPE_I
6586 && sched_data
.types
[2] != TYPE_A
))))
6590 rtx stop
= sched_data
.insns
[1];
6592 /* Search backward for the stop bit that must be there. */
6597 stop
= PREV_INSN (stop
);
6598 if (GET_CODE (stop
) != INSN
)
6600 insn_code
= recog_memoized (stop
);
6602 /* Ignore .pred.rel.mutex.
6604 ??? Update this to ignore cycle display notes too
6605 ??? once those are implemented */
6606 if (insn_code
== CODE_FOR_pred_rel_mutex
6607 || insn_code
== CODE_FOR_prologue_use
)
6610 if (insn_code
== CODE_FOR_insn_group_barrier
)
6615 /* Adjust the stop bit's slot selector. */
6616 if (INTVAL (XVECEXP (PATTERN (stop
), 0, 0)) != 1)
6618 XVECEXP (PATTERN (stop
), 0, 0) = GEN_INT (3);
6620 sched_data
.stopbit
[0] = 0;
6621 sched_data
.stopbit
[2] = 1;
6623 sched_data
.types
[5] = sched_data
.types
[3];
6624 sched_data
.types
[4] = sched_data
.types
[2];
6625 sched_data
.types
[3] = sched_data
.types
[1];
6626 sched_data
.insns
[5] = sched_data
.insns
[3];
6627 sched_data
.insns
[4] = sched_data
.insns
[2];
6628 sched_data
.insns
[3] = sched_data
.insns
[1];
6629 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
6630 sched_data
.cur
+= 2;
6631 sched_data
.first_slot
= 3;
6632 for (i
= 0; i
< NR_PACKETS
; i
++)
6634 const struct ia64_packet
*p
= packets
+ i
;
6635 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
6637 sched_data
.packet
= p
;
6641 rotate_one_bundle (sched_verbose
? dump
: NULL
);
6644 for (i
= 0; i
< NR_PACKETS
; i
++)
6646 const struct ia64_packet
*p
= packets
+ i
;
6647 int split
= get_split (p
, sched_data
.first_slot
);
6650 /* Disallow multiway branches here. */
6651 if (p
->t
[1] == TYPE_B
)
6654 if (packet_matches_p (p
, split
, &next
) && next
< best
)
6657 sched_data
.packet
= p
;
6658 sched_data
.split
= split
;
6667 int more
= ia64_internal_sched_reorder (dump
, sched_verbose
,
6672 /* Did we schedule a stop? If so, finish this cycle. */
6673 if (sched_data
.cur
== sched_data
.first_slot
)
6678 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
6680 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6682 dump_current_packet (dump
);
6686 /* We are about to issue INSN. Return the number of insns left on the
6687 ready queue that can be issued this cycle. */
6690 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6694 int can_issue_more ATTRIBUTE_UNUSED
;
6696 enum attr_type t
= ia64_safe_type (insn
);
6698 if (sched_data
.last_was_stop
)
6700 int t
= sched_data
.first_slot
;
6703 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
6704 init_insn_group_barriers ();
6705 sched_data
.last_was_stop
= 0;
6708 if (t
== TYPE_UNKNOWN
)
6711 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
6712 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6713 || asm_noperands (PATTERN (insn
)) >= 0)
6715 /* This must be some kind of asm. Clear the scheduling state. */
6716 rotate_two_bundles (sched_verbose
? dump
: NULL
);
6717 if (ia64_final_schedule
)
6718 group_barrier_needed_p (insn
);
6723 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6724 important state info. Don't delete this test. */
6725 if (ia64_final_schedule
6726 && group_barrier_needed_p (insn
))
6729 sched_data
.stopbit
[sched_data
.cur
] = 0;
6730 sched_data
.insns
[sched_data
.cur
] = insn
;
6731 sched_data
.types
[sched_data
.cur
] = t
;
6735 fprintf (dump
, "// Scheduling insn %d of type %s\n",
6736 INSN_UID (insn
), type_names
[t
]);
6738 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
6740 schedule_stop (sched_verbose
? dump
: NULL
);
6741 sched_data
.last_was_stop
= 1;
6747 /* Free data allocated by ia64_sched_init. */
6750 ia64_sched_finish (dump
, sched_verbose
)
6755 fprintf (dump
, "// Finishing schedule.\n");
6756 rotate_two_bundles (NULL
);
6761 /* Emit pseudo-ops for the assembler to describe predicate relations.
6762 At present this assumes that we only consider predicate pairs to
6763 be mutex, and that the assembler can deduce proper values from
6764 straight-line code. */
6767 emit_predicate_relation_info ()
6771 FOR_EACH_BB_REVERSE (bb
)
6774 rtx head
= bb
->head
;
6776 /* We only need such notes at code labels. */
6777 if (GET_CODE (head
) != CODE_LABEL
)
6779 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6780 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6781 head
= NEXT_INSN (head
);
6783 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6784 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6786 rtx p
= gen_rtx_REG (BImode
, r
);
6787 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6788 if (head
== bb
->end
)
6794 /* Look for conditional calls that do not return, and protect predicate
6795 relations around them. Otherwise the assembler will assume the call
6796 returns, and complain about uses of call-clobbered predicates after
6798 FOR_EACH_BB_REVERSE (bb
)
6800 rtx insn
= bb
->head
;
6804 if (GET_CODE (insn
) == CALL_INSN
6805 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6806 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6808 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6809 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6810 if (bb
->head
== insn
)
6812 if (bb
->end
== insn
)
6816 if (insn
== bb
->end
)
6818 insn
= NEXT_INSN (insn
);
6823 /* Generate a NOP instruction of type T. We will never generate L type
6833 return gen_nop_m ();
6835 return gen_nop_i ();
6837 return gen_nop_b ();
6839 return gen_nop_f ();
6841 return gen_nop_x ();
6847 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6848 here than while scheduling. */
6854 const struct bundle
*b
= 0;
6857 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6861 pat
= INSN_P (insn
) ? PATTERN (insn
) : const0_rtx
;
6862 if (GET_CODE (pat
) == USE
|| GET_CODE (pat
) == CLOBBER
)
6864 if ((GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == UNSPEC_BUNDLE_SELECTOR
)
6865 || GET_CODE (insn
) == CODE_LABEL
)
6868 while (bundle_pos
< 3)
6870 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6873 if (GET_CODE (insn
) != CODE_LABEL
)
6874 b
= bundle
+ INTVAL (XVECEXP (pat
, 0, 0));
6880 else if (GET_CODE (pat
) == UNSPEC_VOLATILE
6881 && XINT (pat
, 1) == UNSPECV_INSN_GROUP_BARRIER
)
6883 int t
= INTVAL (XVECEXP (pat
, 0, 0));
6885 while (bundle_pos
< t
)
6887 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6893 if (bundle_pos
== 3)
6896 if (b
&& INSN_P (insn
))
6898 t
= ia64_safe_type (insn
);
6899 if (asm_noperands (PATTERN (insn
)) >= 0
6900 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
6902 while (bundle_pos
< 3)
6904 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6910 if (t
== TYPE_UNKNOWN
)
6912 while (bundle_pos
< 3)
6914 if (t
== b
->t
[bundle_pos
]
6915 || (t
== TYPE_A
&& (b
->t
[bundle_pos
] == TYPE_M
6916 || b
->t
[bundle_pos
] == TYPE_I
)))
6919 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6928 /* Perform machine dependent operations on the rtl chain INSNS. */
6934 /* We are freeing block_for_insn in the toplev to keep compatibility
6935 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6936 compute_bb_for_insn (get_max_uid ());
6938 /* If optimizing, we'll have split before scheduling. */
6940 split_all_insns (0);
6942 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6943 non-optimizing bootstrap. */
6944 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
6946 if (ia64_flag_schedule_insns2
)
6948 timevar_push (TV_SCHED2
);
6949 ia64_final_schedule
= 1;
6950 schedule_ebbs (rtl_dump_file
);
6951 ia64_final_schedule
= 0;
6952 timevar_pop (TV_SCHED2
);
6954 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6955 place as they were during scheduling. */
6956 emit_insn_group_barriers (rtl_dump_file
, insns
);
6960 emit_all_insn_group_barriers (rtl_dump_file
, insns
);
6962 /* A call must not be the last instruction in a function, so that the
6963 return address is still within the function, so that unwinding works
6964 properly. Note that IA-64 differs from dwarf2 on this point. */
6965 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6970 insn
= get_last_insn ();
6971 if (! INSN_P (insn
))
6972 insn
= prev_active_insn (insn
);
6973 if (GET_CODE (insn
) == INSN
6974 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6975 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
6978 insn
= prev_active_insn (insn
);
6980 if (GET_CODE (insn
) == CALL_INSN
)
6983 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6984 emit_insn (gen_break_f ());
6985 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6990 emit_predicate_relation_info ();
6993 /* Return true if REGNO is used by the epilogue. */
6996 ia64_epilogue_uses (regno
)
7002 /* When a function makes a call through a function descriptor, we
7003 will write a (potentially) new value to "gp". After returning
7004 from such a call, we need to make sure the function restores the
7005 original gp-value, even if the function itself does not use the
7007 return (TARGET_CONST_GP
&& !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
));
7009 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7010 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7011 /* For functions defined with the syscall_linkage attribute, all
7012 input registers are marked as live at all function exits. This
7013 prevents the register allocator from using the input registers,
7014 which in turn makes it possible to restart a system call after
7015 an interrupt without having to save/restore the input registers.
7016 This also prevents kernel data from leaking to application code. */
7017 return lookup_attribute ("syscall_linkage",
7018 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7021 /* Conditional return patterns can't represent the use of `b0' as
7022 the return address, so we force the value live this way. */
7026 /* Likewise for ar.pfs, which is used by br.ret. */
7034 /* Return true if REGNO is used by the frame unwinder. */
7037 ia64_eh_uses (regno
)
7040 if (! reload_completed
)
7043 if (current_frame_info
.reg_save_b0
7044 && regno
== current_frame_info
.reg_save_b0
)
7046 if (current_frame_info
.reg_save_pr
7047 && regno
== current_frame_info
.reg_save_pr
)
7049 if (current_frame_info
.reg_save_ar_pfs
7050 && regno
== current_frame_info
.reg_save_ar_pfs
)
7052 if (current_frame_info
.reg_save_ar_unat
7053 && regno
== current_frame_info
.reg_save_ar_unat
)
7055 if (current_frame_info
.reg_save_ar_lc
7056 && regno
== current_frame_info
.reg_save_ar_lc
)
7062 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7064 We add @ to the name if this goes in small data/bss. We can only put
7065 a variable in small data/bss if it is defined in this module or a module
7066 that we are statically linked with. We can't check the second condition,
7067 but TREE_STATIC gives us the first one. */
7069 /* ??? If we had IPA, we could check the second condition. We could support
7070 programmer added section attributes if the variable is not defined in this
7073 /* ??? See the v850 port for a cleaner way to do this. */
7075 /* ??? We could also support own long data here. Generating movl/add/ld8
7076 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7077 code faster because there is one less load. This also includes incomplete
7078 types which can't go in sdata/sbss. */
7081 ia64_in_small_data_p (exp
)
7084 if (TARGET_NO_SDATA
)
7087 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7089 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7090 if (strcmp (section
, ".sdata") == 0
7091 || strcmp (section
, ".sbss") == 0)
7096 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7098 /* If this is an incomplete type with size 0, then we can't put it
7099 in sdata because it might be too big when completed. */
7100 if (size
> 0 && size
<= ia64_section_threshold
)
7108 ia64_encode_section_info (decl
, first
)
7110 int first ATTRIBUTE_UNUSED
;
7112 const char *symbol_str
;
7117 if (TREE_CODE (decl
) == FUNCTION_DECL
)
7119 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
7123 /* Careful not to prod global register variables. */
7124 if (TREE_CODE (decl
) != VAR_DECL
7125 || GET_CODE (DECL_RTL (decl
)) != MEM
7126 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
7129 symbol
= XEXP (DECL_RTL (decl
), 0);
7130 symbol_str
= XSTR (symbol
, 0);
7132 is_local
= (*targetm
.binds_local_p
) (decl
);
7134 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
7136 enum tls_model kind
;
7140 kind
= TLS_MODEL_LOCAL_EXEC
;
7142 kind
= TLS_MODEL_INITIAL_EXEC
;
7145 kind
= TLS_MODEL_LOCAL_DYNAMIC
;
7147 kind
= TLS_MODEL_GLOBAL_DYNAMIC
;
7148 if (kind
< flag_tls_default
)
7149 kind
= flag_tls_default
;
7151 encoding
= " GLil"[kind
];
7153 /* Determine if DECL will wind up in .sdata/.sbss. */
7154 else if (is_local
&& ia64_in_small_data_p (decl
))
7157 /* Finally, encode this into the symbol string. */
7163 if (symbol_str
[0] == ENCODE_SECTION_INFO_CHAR
)
7165 if (encoding
== symbol_str
[1])
7167 /* ??? Sdata became thread or thread becaome not thread. Lose. */
7171 len
= strlen (symbol_str
);
7172 newstr
= alloca (len
+ 3);
7173 newstr
[0] = ENCODE_SECTION_INFO_CHAR
;
7174 newstr
[1] = encoding
;
7175 memcpy (newstr
+ 2, symbol_str
, len
+ 1);
7177 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2);
7180 /* This decl is marked as being in small data/bss but it shouldn't be;
7181 one likely explanation for this is that the decl has been moved into
7182 a different section from the one it was in when encode_section_info
7183 was first called. Remove the encoding. */
7184 else if (symbol_str
[0] == ENCODE_SECTION_INFO_CHAR
)
7185 XSTR (symbol
, 0) = ggc_strdup (symbol_str
+ 2);
7189 ia64_strip_name_encoding (str
)
7192 if (str
[0] == ENCODE_SECTION_INFO_CHAR
)
7199 /* Output assembly directives for prologue regions. */
7201 /* The current basic block number. */
7203 static bool last_block
;
7205 /* True if we need a copy_state command at the start of the next block. */
7207 static bool need_copy_state
;
7209 /* The function emits unwind directives for the start of an epilogue. */
7214 /* If this isn't the last block of the function, then we need to label the
7215 current state, and copy it back in at the start of the next block. */
7219 fprintf (asm_out_file
, "\t.label_state 1\n");
7220 need_copy_state
= true;
7223 fprintf (asm_out_file
, "\t.restore sp\n");
7226 /* This function processes a SET pattern looking for specific patterns
7227 which result in emitting an assembly directive required for unwinding. */
7230 process_set (asm_out_file
, pat
)
7234 rtx src
= SET_SRC (pat
);
7235 rtx dest
= SET_DEST (pat
);
7236 int src_regno
, dest_regno
;
7238 /* Look for the ALLOC insn. */
7239 if (GET_CODE (src
) == UNSPEC_VOLATILE
7240 && XINT (src
, 1) == UNSPECV_ALLOC
7241 && GET_CODE (dest
) == REG
)
7243 dest_regno
= REGNO (dest
);
7245 /* If this isn't the final destination for ar.pfs, the alloc
7246 shouldn't have been marked frame related. */
7247 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7250 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7251 ia64_dbx_register_number (dest_regno
));
7255 /* Look for SP = .... */
7256 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7258 if (GET_CODE (src
) == PLUS
)
7260 rtx op0
= XEXP (src
, 0);
7261 rtx op1
= XEXP (src
, 1);
7262 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7264 if (INTVAL (op1
) < 0)
7266 fputs ("\t.fframe ", asm_out_file
);
7267 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
7269 fputc ('\n', asm_out_file
);
7272 process_epilogue ();
7277 else if (GET_CODE (src
) == REG
7278 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7279 process_epilogue ();
7286 /* Register move we need to look at. */
7287 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7289 src_regno
= REGNO (src
);
7290 dest_regno
= REGNO (dest
);
7295 /* Saving return address pointer. */
7296 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7298 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7299 ia64_dbx_register_number (dest_regno
));
7303 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7305 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7306 ia64_dbx_register_number (dest_regno
));
7309 case AR_UNAT_REGNUM
:
7310 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7312 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7313 ia64_dbx_register_number (dest_regno
));
7317 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7319 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7320 ia64_dbx_register_number (dest_regno
));
7323 case STACK_POINTER_REGNUM
:
7324 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7325 || ! frame_pointer_needed
)
7327 fprintf (asm_out_file
, "\t.vframe r%d\n",
7328 ia64_dbx_register_number (dest_regno
));
7332 /* Everything else should indicate being stored to memory. */
7337 /* Memory store we need to look at. */
7338 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7344 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7346 base
= XEXP (dest
, 0);
7349 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7350 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7352 base
= XEXP (XEXP (dest
, 0), 0);
7353 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7358 if (base
== hard_frame_pointer_rtx
)
7360 saveop
= ".savepsp";
7363 else if (base
== stack_pointer_rtx
)
7368 src_regno
= REGNO (src
);
7372 if (current_frame_info
.reg_save_b0
!= 0)
7374 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7378 if (current_frame_info
.reg_save_pr
!= 0)
7380 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7384 if (current_frame_info
.reg_save_ar_lc
!= 0)
7386 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7390 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7392 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7395 case AR_UNAT_REGNUM
:
7396 if (current_frame_info
.reg_save_ar_unat
!= 0)
7398 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7405 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7406 1 << (src_regno
- GR_REG (4)));
7414 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7415 1 << (src_regno
- BR_REG (1)));
7422 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7423 1 << (src_regno
- FR_REG (2)));
7426 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7427 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7428 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7429 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7430 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7431 1 << (src_regno
- FR_REG (12)));
7443 /* This function looks at a single insn and emits any directives
7444 required to unwind this insn. */
7446 process_for_unwind_directive (asm_out_file
, insn
)
7450 if (flag_unwind_tables
7451 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7455 if (GET_CODE (insn
) == NOTE
7456 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7458 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7460 /* Restore unwind state from immediately before the epilogue. */
7461 if (need_copy_state
)
7463 fprintf (asm_out_file
, "\t.body\n");
7464 fprintf (asm_out_file
, "\t.copy_state 1\n");
7465 need_copy_state
= false;
7469 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7472 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7474 pat
= XEXP (pat
, 0);
7476 pat
= PATTERN (insn
);
7478 switch (GET_CODE (pat
))
7481 process_set (asm_out_file
, pat
);
7487 int limit
= XVECLEN (pat
, 0);
7488 for (par_index
= 0; par_index
< limit
; par_index
++)
7490 rtx x
= XVECEXP (pat
, 0, par_index
);
7491 if (GET_CODE (x
) == SET
)
7492 process_set (asm_out_file
, x
);
7505 ia64_init_builtins ()
7507 tree psi_type_node
= build_pointer_type (integer_type_node
);
7508 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7509 tree endlink
= void_list_node
;
7511 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7512 tree si_ftype_psi_si_si
7513 = build_function_type (integer_type_node
,
7514 tree_cons (NULL_TREE
, psi_type_node
,
7515 tree_cons (NULL_TREE
, integer_type_node
,
7516 tree_cons (NULL_TREE
,
7520 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7521 tree di_ftype_pdi_di_di
7522 = build_function_type (long_integer_type_node
,
7523 tree_cons (NULL_TREE
, pdi_type_node
,
7524 tree_cons (NULL_TREE
,
7525 long_integer_type_node
,
7526 tree_cons (NULL_TREE
,
7527 long_integer_type_node
,
7529 /* __sync_synchronize */
7530 tree void_ftype_void
7531 = build_function_type (void_type_node
, endlink
);
7533 /* __sync_lock_test_and_set_si */
7534 tree si_ftype_psi_si
7535 = build_function_type (integer_type_node
,
7536 tree_cons (NULL_TREE
, psi_type_node
,
7537 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
7539 /* __sync_lock_test_and_set_di */
7540 tree di_ftype_pdi_di
7541 = build_function_type (long_integer_type_node
,
7542 tree_cons (NULL_TREE
, pdi_type_node
,
7543 tree_cons (NULL_TREE
, long_integer_type_node
,
7546 /* __sync_lock_release_si */
7548 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
7551 /* __sync_lock_release_di */
7553 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
7556 #define def_builtin(name, type, code) \
7557 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7559 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7560 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7561 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7562 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7563 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7564 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7565 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
7566 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7568 def_builtin ("__sync_synchronize", void_ftype_void
,
7569 IA64_BUILTIN_SYNCHRONIZE
);
7571 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7572 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7573 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7574 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7575 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7576 IA64_BUILTIN_LOCK_RELEASE_SI
);
7577 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7578 IA64_BUILTIN_LOCK_RELEASE_DI
);
7580 def_builtin ("__builtin_ia64_bsp",
7581 build_function_type (ptr_type_node
, endlink
),
7584 def_builtin ("__builtin_ia64_flushrs",
7585 build_function_type (void_type_node
, endlink
),
7586 IA64_BUILTIN_FLUSHRS
);
7588 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7589 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7590 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7591 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7592 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7593 IA64_BUILTIN_FETCH_AND_OR_SI
);
7594 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7595 IA64_BUILTIN_FETCH_AND_AND_SI
);
7596 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7597 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7598 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7599 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7601 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7602 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7603 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7604 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7605 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7606 IA64_BUILTIN_OR_AND_FETCH_SI
);
7607 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7608 IA64_BUILTIN_AND_AND_FETCH_SI
);
7609 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7610 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7611 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7612 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7614 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7615 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7616 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7617 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7618 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7619 IA64_BUILTIN_FETCH_AND_OR_DI
);
7620 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7621 IA64_BUILTIN_FETCH_AND_AND_DI
);
7622 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7623 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7624 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7625 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7627 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7628 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7629 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7630 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7631 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7632 IA64_BUILTIN_OR_AND_FETCH_DI
);
7633 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7634 IA64_BUILTIN_AND_AND_FETCH_DI
);
7635 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7636 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7637 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7638 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7643 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7651 cmpxchgsz.acq tmp = [ptr], tmp
7652 } while (tmp != ret)
7656 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
7658 enum machine_mode mode
;
7662 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7665 arg0
= TREE_VALUE (arglist
);
7666 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7667 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7668 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7670 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7671 MEM_VOLATILE_P (mem
) = 1;
7673 if (target
&& register_operand (target
, mode
))
7676 ret
= gen_reg_rtx (mode
);
7678 emit_insn (gen_mf ());
7680 /* Special case for fetchadd instructions. */
7681 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7684 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7686 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7691 tmp
= gen_reg_rtx (mode
);
7692 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7693 emit_move_insn (tmp
, mem
);
7695 label
= gen_label_rtx ();
7697 emit_move_insn (ret
, tmp
);
7698 emit_move_insn (ccv
, tmp
);
7700 /* Perform the specific operation. Special case NAND by noticing
7701 one_cmpl_optab instead. */
7702 if (binoptab
== one_cmpl_optab
)
7704 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7705 binoptab
= and_optab
;
7707 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7710 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7712 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7715 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7720 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7728 cmpxchgsz.acq tmp = [ptr], ret
7729 } while (tmp != old)
7733 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
7735 enum machine_mode mode
;
7739 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7742 arg0
= TREE_VALUE (arglist
);
7743 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7744 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7745 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7747 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7748 MEM_VOLATILE_P (mem
) = 1;
7750 if (target
&& ! register_operand (target
, mode
))
7753 emit_insn (gen_mf ());
7754 tmp
= gen_reg_rtx (mode
);
7755 old
= gen_reg_rtx (mode
);
7756 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7758 emit_move_insn (tmp
, mem
);
7760 label
= gen_label_rtx ();
7762 emit_move_insn (old
, tmp
);
7763 emit_move_insn (ccv
, tmp
);
7765 /* Perform the specific operation. Special case NAND by noticing
7766 one_cmpl_optab instead. */
7767 if (binoptab
== one_cmpl_optab
)
7769 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7770 binoptab
= and_optab
;
7772 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7775 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7777 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7780 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
7785 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7789 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7792 For bool_ it's the same except return ret == oldval.
7796 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
7797 enum machine_mode mode
;
7802 tree arg0
, arg1
, arg2
;
7803 rtx mem
, old
, new, ccv
, tmp
, insn
;
7805 arg0
= TREE_VALUE (arglist
);
7806 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7807 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7808 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7809 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7810 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7812 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7813 MEM_VOLATILE_P (mem
) = 1;
7815 if (! register_operand (old
, mode
))
7816 old
= copy_to_mode_reg (mode
, old
);
7817 if (! register_operand (new, mode
))
7818 new = copy_to_mode_reg (mode
, new);
7820 if (! boolp
&& target
&& register_operand (target
, mode
))
7823 tmp
= gen_reg_rtx (mode
);
7825 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7826 emit_move_insn (ccv
, old
);
7827 emit_insn (gen_mf ());
7829 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7831 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7837 target
= gen_reg_rtx (mode
);
7838 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7844 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7847 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
7848 enum machine_mode mode
;
7853 rtx mem
, new, ret
, insn
;
7855 arg0
= TREE_VALUE (arglist
);
7856 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7857 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7858 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7860 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7861 MEM_VOLATILE_P (mem
) = 1;
7862 if (! register_operand (new, mode
))
7863 new = copy_to_mode_reg (mode
, new);
7865 if (target
&& register_operand (target
, mode
))
7868 ret
= gen_reg_rtx (mode
);
7871 insn
= gen_xchgsi (ret
, mem
, new);
7873 insn
= gen_xchgdi (ret
, mem
, new);
7879 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7882 ia64_expand_lock_release (mode
, arglist
, target
)
7883 enum machine_mode mode
;
7885 rtx target ATTRIBUTE_UNUSED
;
7890 arg0
= TREE_VALUE (arglist
);
7891 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7893 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7894 MEM_VOLATILE_P (mem
) = 1;
7896 emit_move_insn (mem
, const0_rtx
);
7902 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
7905 rtx subtarget ATTRIBUTE_UNUSED
;
7906 enum machine_mode mode ATTRIBUTE_UNUSED
;
7907 int ignore ATTRIBUTE_UNUSED
;
7909 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7910 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7911 tree arglist
= TREE_OPERAND (exp
, 1);
7915 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7916 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7917 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7918 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7919 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7920 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7921 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7922 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7923 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7924 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7925 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7926 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7927 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7928 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7929 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7930 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7934 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7935 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7936 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7937 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7938 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7939 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7940 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7941 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7942 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7943 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7944 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7945 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7946 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7947 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7948 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7949 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7959 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7960 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7961 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
7963 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7964 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7965 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
7967 case IA64_BUILTIN_SYNCHRONIZE
:
7968 emit_insn (gen_mf ());
7971 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7972 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7973 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
7975 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7976 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7977 return ia64_expand_lock_release (mode
, arglist
, target
);
7979 case IA64_BUILTIN_BSP
:
7980 if (! target
|| ! register_operand (target
, DImode
))
7981 target
= gen_reg_rtx (DImode
);
7982 emit_insn (gen_bsp_value (target
));
7985 case IA64_BUILTIN_FLUSHRS
:
7986 emit_insn (gen_flushrs ());
7989 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7990 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7991 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
7993 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7994 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7995 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
7997 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7998 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7999 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
8001 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8002 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8003 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
8005 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8006 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8007 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
8009 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8010 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8011 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
8013 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8014 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8015 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
8017 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8018 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8019 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
8021 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8022 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8023 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
8025 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8026 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8027 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
8029 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8030 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8031 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
8033 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8034 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8035 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
8044 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8045 most significant bits of the stack slot. */
8048 ia64_hpux_function_arg_padding (mode
, type
)
8049 enum machine_mode mode
;
8052 /* Exception to normal case for structures/unions/etc. */
8054 if (type
&& AGGREGATE_TYPE_P (type
)
8055 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8058 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8059 hardwired to be true. */
8061 return((mode
== BLKmode
8062 ? (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
8063 && int_size_in_bytes (type
) < (PARM_BOUNDARY
/ BITS_PER_UNIT
))
8064 : GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
8065 ? downward
: upward
);
8068 /* Switch to the section to which we should output X. The only thing
8069 special we do here is to honor small data. */
8072 ia64_select_rtx_section (mode
, x
, align
)
8073 enum machine_mode mode
;
8075 unsigned HOST_WIDE_INT align
;
8077 if (GET_MODE_SIZE (mode
) > 0
8078 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8081 default_elf_select_rtx_section (mode
, x
, align
);
8084 /* It is illegal to have relocations in shared segments on AIX.
8085 Pretend flag_pic is always set. */
8088 ia64_aix_select_section (exp
, reloc
, align
)
8091 unsigned HOST_WIDE_INT align
;
8093 int save_pic
= flag_pic
;
8095 default_elf_select_section (exp
, reloc
, align
);
8096 flag_pic
= save_pic
;
8100 ia64_aix_unique_section (decl
, reloc
)
8104 int save_pic
= flag_pic
;
8106 default_unique_section (decl
, reloc
);
8107 flag_pic
= save_pic
;
8111 ia64_aix_select_rtx_section (mode
, x
, align
)
8112 enum machine_mode mode
;
8114 unsigned HOST_WIDE_INT align
;
8116 int save_pic
= flag_pic
;
8118 ia64_select_rtx_section (mode
, x
, align
);
8119 flag_pic
= save_pic
;
8122 #include "gt-ia64.h"