1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label
= 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def
* ia64_compare_op0
;
63 struct rtx_def
* ia64_compare_op1
;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers
[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names
[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names
[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names
[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string
;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size
= 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string
;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune
;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string
;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2
;
121 /* Determines whether we run variable tracking in machine dependent
123 static int ia64_flag_var_tracking
;
125 /* Variables which are this size or smaller are put in the sdata/sbss
128 unsigned int ia64_section_threshold
;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
144 HARD_REG_SET mask
; /* mask of saved registers. */
145 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled
; /* number of spilled registers. */
148 int reg_fp
; /* register for fp. */
149 int reg_save_b0
; /* save register for b0. */
150 int reg_save_pr
; /* save register for prs. */
151 int reg_save_ar_pfs
; /* save register for ar.pfs. */
152 int reg_save_ar_unat
; /* save register for ar.unat. */
153 int reg_save_ar_lc
; /* save register for ar.lc. */
154 int reg_save_gp
; /* save register for gp. */
155 int n_input_regs
; /* number of input registers used. */
156 int n_local_regs
; /* number of local registers used. */
157 int n_output_regs
; /* number of output registers used. */
158 int n_rotate_regs
; /* number of rotating registers used. */
160 char need_regstk
; /* true if a .regstk directive needed. */
161 char initialized
; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info
;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx
ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
173 static rtx
gen_tls_get_addr (void);
174 static rtx
gen_thread_pointer (void);
175 static rtx
ia64_expand_tls_address (enum tls_model
, rtx
, rtx
);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx
, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT
);
180 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
181 static void finish_spill_pointers (void);
182 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
183 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
184 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
185 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
186 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
187 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
189 static enum machine_mode
hfa_element_mode (tree
, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
194 static bool ia64_function_ok_for_sibcall (tree
, tree
);
195 static bool ia64_return_in_memory (tree
, tree
);
196 static bool ia64_rtx_costs (rtx
, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function
* ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree
);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx
);
208 static rtx
ia64_expand_fetch_and_op (optab
, enum machine_mode
, tree
, rtx
);
209 static rtx
ia64_expand_op_and_fetch (optab
, enum machine_mode
, tree
, rtx
);
210 static rtx
ia64_expand_compare_and_swap (enum machine_mode
, enum machine_mode
,
212 static rtx
ia64_expand_lock_test_and_set (enum machine_mode
, tree
, rtx
);
213 static rtx
ia64_expand_lock_release (enum machine_mode
, tree
, rtx
);
214 static bool ia64_assemble_integer (rtx
, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
217 static void ia64_output_function_end_prologue (FILE *);
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx
, rtx
, rtx
, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx
, int);
228 static struct bundle_state
*get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state
*);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state
*);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state
*, int);
238 static int try_issue_insn (struct bundle_state
*, rtx
);
239 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
240 static int get_max_pos (state_t
);
241 static int get_template (state_t
, int);
243 static rtx
get_next_important_insn (rtx
, rtx
);
244 static void bundling (FILE *, int, rtx
, rtx
);
246 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
247 HOST_WIDE_INT
, tree
);
248 static void ia64_file_start (void);
250 static void ia64_select_rtx_section (enum machine_mode
, rtx
,
251 unsigned HOST_WIDE_INT
);
252 static void ia64_rwreloc_select_section (tree
, int, unsigned HOST_WIDE_INT
)
254 static void ia64_rwreloc_unique_section (tree
, int)
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode
, rtx
,
257 unsigned HOST_WIDE_INT
)
259 static unsigned int ia64_rwreloc_section_type_flags (tree
, const char *, int)
262 static void ia64_hpux_add_extern_decl (tree decl
)
264 static void ia64_hpux_file_end (void)
266 static void ia64_init_libfuncs (void)
268 static void ia64_hpux_init_libfuncs (void)
270 static void ia64_sysv4_init_libfuncs (void)
272 static void ia64_vms_init_libfuncs (void)
275 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
276 static void ia64_encode_section_info (tree
, rtx
, int);
277 static rtx
ia64_struct_value_rtx (tree
, int);
278 static tree
ia64_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
279 static bool ia64_scalar_mode_supported_p (enum machine_mode mode
);
282 /* Table of valid machine attributes. */
283 static const struct attribute_spec ia64_attribute_table
[] =
285 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
286 { "syscall_linkage", 0, 0, false, true, true, NULL
},
287 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
288 { NULL
, 0, 0, false, false, false, NULL
}
291 /* Initialize the GCC target structure. */
292 #undef TARGET_ATTRIBUTE_TABLE
293 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
295 #undef TARGET_INIT_BUILTINS
296 #define TARGET_INIT_BUILTINS ia64_init_builtins
298 #undef TARGET_EXPAND_BUILTIN
299 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
301 #undef TARGET_ASM_BYTE_OP
302 #define TARGET_ASM_BYTE_OP "\tdata1\t"
303 #undef TARGET_ASM_ALIGNED_HI_OP
304 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
305 #undef TARGET_ASM_ALIGNED_SI_OP
306 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
307 #undef TARGET_ASM_ALIGNED_DI_OP
308 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
309 #undef TARGET_ASM_UNALIGNED_HI_OP
310 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
311 #undef TARGET_ASM_UNALIGNED_SI_OP
312 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
315 #undef TARGET_ASM_INTEGER
316 #define TARGET_ASM_INTEGER ia64_assemble_integer
318 #undef TARGET_ASM_FUNCTION_PROLOGUE
319 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
320 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
321 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
322 #undef TARGET_ASM_FUNCTION_EPILOGUE
323 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
325 #undef TARGET_IN_SMALL_DATA_P
326 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
328 #undef TARGET_SCHED_ADJUST_COST
329 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
330 #undef TARGET_SCHED_ISSUE_RATE
331 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
332 #undef TARGET_SCHED_VARIABLE_ISSUE
333 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
334 #undef TARGET_SCHED_INIT
335 #define TARGET_SCHED_INIT ia64_sched_init
336 #undef TARGET_SCHED_FINISH
337 #define TARGET_SCHED_FINISH ia64_sched_finish
338 #undef TARGET_SCHED_REORDER
339 #define TARGET_SCHED_REORDER ia64_sched_reorder
340 #undef TARGET_SCHED_REORDER2
341 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
343 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
344 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
346 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
347 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
349 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
350 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
351 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
352 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
354 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
355 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
356 ia64_first_cycle_multipass_dfa_lookahead_guard
358 #undef TARGET_SCHED_DFA_NEW_CYCLE
359 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
362 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
366 #undef TARGET_ASM_OUTPUT_MI_THUNK
367 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
368 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
369 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
371 #undef TARGET_ASM_FILE_START
372 #define TARGET_ASM_FILE_START ia64_file_start
374 #undef TARGET_RTX_COSTS
375 #define TARGET_RTX_COSTS ia64_rtx_costs
376 #undef TARGET_ADDRESS_COST
377 #define TARGET_ADDRESS_COST hook_int_rtx_0
379 #undef TARGET_MACHINE_DEPENDENT_REORG
380 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
382 #undef TARGET_ENCODE_SECTION_INFO
383 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
385 /* ??? ABI doesn't allow us to define this. */
387 #undef TARGET_PROMOTE_FUNCTION_ARGS
388 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
391 /* ??? ABI doesn't allow us to define this. */
393 #undef TARGET_PROMOTE_FUNCTION_RETURN
394 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
397 /* ??? Investigate. */
399 #undef TARGET_PROMOTE_PROTOTYPES
400 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
403 #undef TARGET_STRUCT_VALUE_RTX
404 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
405 #undef TARGET_RETURN_IN_MEMORY
406 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
407 #undef TARGET_SETUP_INCOMING_VARARGS
408 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
409 #undef TARGET_STRICT_ARGUMENT_NAMING
410 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
411 #undef TARGET_MUST_PASS_IN_STACK
412 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
414 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
415 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
417 #undef TARGET_UNWIND_EMIT
418 #define TARGET_UNWIND_EMIT process_for_unwind_directive
420 #undef TARGET_SCALAR_MODE_SUPPORTED_P
421 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
423 struct gcc_target targetm
= TARGET_INITIALIZER
;
427 ADDR_AREA_NORMAL
, /* normal address area */
428 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
432 static GTY(()) tree small_ident1
;
433 static GTY(()) tree small_ident2
;
438 if (small_ident1
== 0)
440 small_ident1
= get_identifier ("small");
441 small_ident2
= get_identifier ("__small__");
445 /* Retrieve the address area that has been chosen for the given decl. */
447 static ia64_addr_area
448 ia64_get_addr_area (tree decl
)
452 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
458 id
= TREE_VALUE (TREE_VALUE (model_attr
));
459 if (id
== small_ident1
|| id
== small_ident2
)
460 return ADDR_AREA_SMALL
;
462 return ADDR_AREA_NORMAL
;
466 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
, int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
468 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
470 tree arg
, decl
= *node
;
473 arg
= TREE_VALUE (args
);
474 if (arg
== small_ident1
|| arg
== small_ident2
)
476 addr_area
= ADDR_AREA_SMALL
;
480 warning ("invalid argument of `%s' attribute",
481 IDENTIFIER_POINTER (name
));
482 *no_add_attrs
= true;
485 switch (TREE_CODE (decl
))
488 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
490 && !TREE_STATIC (decl
))
492 error ("%Jan address area attribute cannot be specified for "
493 "local variables", decl
, decl
);
494 *no_add_attrs
= true;
496 area
= ia64_get_addr_area (decl
);
497 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
499 error ("%Jaddress area of '%s' conflicts with previous "
500 "declaration", decl
, decl
);
501 *no_add_attrs
= true;
506 error ("%Jaddress area attribute cannot be specified for functions",
508 *no_add_attrs
= true;
512 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
513 *no_add_attrs
= true;
521 ia64_encode_addr_area (tree decl
, rtx symbol
)
525 flags
= SYMBOL_REF_FLAGS (symbol
);
526 switch (ia64_get_addr_area (decl
))
528 case ADDR_AREA_NORMAL
: break;
529 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
532 SYMBOL_REF_FLAGS (symbol
) = flags
;
536 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
538 default_encode_section_info (decl
, rtl
, first
);
540 /* Careful not to prod global register variables. */
541 if (TREE_CODE (decl
) == VAR_DECL
542 && GET_CODE (DECL_RTL (decl
)) == MEM
543 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
544 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
545 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
548 /* Return 1 if the operands of a move are ok. */
551 ia64_move_ok (rtx dst
, rtx src
)
553 /* If we're under init_recog_no_volatile, we'll not be able to use
554 memory_operand. So check the code directly and don't worry about
555 the validity of the underlying address, which should have been
556 checked elsewhere anyway. */
557 if (GET_CODE (dst
) != MEM
)
559 if (GET_CODE (src
) == MEM
)
561 if (register_operand (src
, VOIDmode
))
564 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
565 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
566 return src
== const0_rtx
;
568 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
572 addp4_optimize_ok (rtx op1
, rtx op2
)
574 return (basereg_operand (op1
, GET_MODE(op1
)) !=
575 basereg_operand (op2
, GET_MODE(op2
)));
578 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
579 Return the length of the field, or <= 0 on failure. */
582 ia64_depz_field_mask (rtx rop
, rtx rshift
)
584 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
585 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
587 /* Get rid of the zero bits we're shifting in. */
590 /* We must now have a solid block of 1's at bit 0. */
591 return exact_log2 (op
+ 1);
594 /* Expand a symbolic constant load. */
597 ia64_expand_load_address (rtx dest
, rtx src
)
599 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (src
))
601 if (GET_CODE (dest
) != REG
)
604 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
605 having to pointer-extend the value afterward. Other forms of address
606 computation below are also more natural to compute as 64-bit quantities.
607 If we've been given an SImode destination register, change it. */
608 if (GET_MODE (dest
) != Pmode
)
609 dest
= gen_rtx_REG (Pmode
, REGNO (dest
));
611 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_SMALL_ADDR_P (src
))
613 emit_insn (gen_rtx_SET (VOIDmode
, dest
, src
));
616 else if (TARGET_AUTO_PIC
)
618 emit_insn (gen_load_gprel64 (dest
, src
));
621 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
623 emit_insn (gen_load_fptr (dest
, src
));
626 else if (sdata_symbolic_operand (src
, VOIDmode
))
628 emit_insn (gen_load_gprel (dest
, src
));
632 if (GET_CODE (src
) == CONST
633 && GET_CODE (XEXP (src
, 0)) == PLUS
634 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
635 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x3fff) != 0)
637 rtx sym
= XEXP (XEXP (src
, 0), 0);
638 HOST_WIDE_INT ofs
, hi
, lo
;
640 /* Split the offset into a sign extended 14-bit low part
641 and a complementary high part. */
642 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
643 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
646 ia64_expand_load_address (dest
, plus_constant (sym
, hi
));
647 emit_insn (gen_adddi3 (dest
, dest
, GEN_INT (lo
)));
653 tmp
= gen_rtx_HIGH (Pmode
, src
);
654 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
655 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
657 tmp
= gen_rtx_LO_SUM (GET_MODE (dest
), dest
, src
);
658 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
662 static GTY(()) rtx gen_tls_tga
;
664 gen_tls_get_addr (void)
667 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
671 static GTY(()) rtx thread_pointer_rtx
;
673 gen_thread_pointer (void)
675 if (!thread_pointer_rtx
)
676 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
677 return thread_pointer_rtx
;
681 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
)
683 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
688 case TLS_MODEL_GLOBAL_DYNAMIC
:
691 tga_op1
= gen_reg_rtx (Pmode
);
692 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
693 tga_op1
= gen_const_mem (Pmode
, tga_op1
);
695 tga_op2
= gen_reg_rtx (Pmode
);
696 emit_insn (gen_load_ltoff_dtprel (tga_op2
, op1
));
697 tga_op2
= gen_const_mem (Pmode
, tga_op2
);
699 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
700 LCT_CONST
, Pmode
, 2, tga_op1
,
701 Pmode
, tga_op2
, Pmode
);
703 insns
= get_insns ();
706 if (GET_MODE (op0
) != Pmode
)
708 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
711 case TLS_MODEL_LOCAL_DYNAMIC
:
712 /* ??? This isn't the completely proper way to do local-dynamic
713 If the call to __tls_get_addr is used only by a single symbol,
714 then we should (somehow) move the dtprel to the second arg
715 to avoid the extra add. */
718 tga_op1
= gen_reg_rtx (Pmode
);
719 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
720 tga_op1
= gen_const_mem (Pmode
, tga_op1
);
722 tga_op2
= const0_rtx
;
724 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
725 LCT_CONST
, Pmode
, 2, tga_op1
,
726 Pmode
, tga_op2
, Pmode
);
728 insns
= get_insns ();
731 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
733 tmp
= gen_reg_rtx (Pmode
);
734 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
736 if (!register_operand (op0
, Pmode
))
737 op0
= gen_reg_rtx (Pmode
);
740 emit_insn (gen_load_dtprel (op0
, op1
));
741 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
744 emit_insn (gen_add_dtprel (op0
, tmp
, op1
));
747 case TLS_MODEL_INITIAL_EXEC
:
748 tmp
= gen_reg_rtx (Pmode
);
749 emit_insn (gen_load_ltoff_tprel (tmp
, op1
));
750 tmp
= gen_const_mem (Pmode
, tmp
);
751 tmp
= force_reg (Pmode
, tmp
);
753 if (!register_operand (op0
, Pmode
))
754 op0
= gen_reg_rtx (Pmode
);
755 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
758 case TLS_MODEL_LOCAL_EXEC
:
759 if (!register_operand (op0
, Pmode
))
760 op0
= gen_reg_rtx (Pmode
);
763 emit_insn (gen_load_tprel (op0
, op1
));
764 emit_insn (gen_adddi3 (op0
, gen_thread_pointer (), op0
));
767 emit_insn (gen_add_tprel (op0
, gen_thread_pointer (), op1
));
776 if (GET_MODE (orig_op0
) == Pmode
)
778 return gen_lowpart (GET_MODE (orig_op0
), op0
);
782 ia64_expand_move (rtx op0
, rtx op1
)
784 enum machine_mode mode
= GET_MODE (op0
);
786 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
787 op1
= force_reg (mode
, op1
);
789 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
791 enum tls_model tls_kind
;
792 if (GET_CODE (op1
) == SYMBOL_REF
793 && (tls_kind
= SYMBOL_REF_TLS_MODEL (op1
)))
794 return ia64_expand_tls_address (tls_kind
, op0
, op1
);
796 if (!TARGET_NO_PIC
&& reload_completed
)
798 ia64_expand_load_address (op0
, op1
);
806 /* Split a move from OP1 to OP0 conditional on COND. */
809 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
811 rtx insn
, first
= get_last_insn ();
813 emit_move_insn (op0
, op1
);
815 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
817 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
821 /* Split a post-reload TImode or TFmode reference into two DImode
822 components. This is made extra difficult by the fact that we do
823 not get any scratch registers to work with, because reload cannot
824 be prevented from giving us a scratch that overlaps the register
825 pair involved. So instead, when addressing memory, we tweak the
826 pointer register up and back down with POST_INCs. Or up and not
827 back down when we can get away with it.
829 REVERSED is true when the loads must be done in reversed order
830 (high word first) for correctness. DEAD is true when the pointer
831 dies with the second insn we generate and therefore the second
832 address must not carry a postmodify.
834 May return an insn which is to be emitted after the moves. */
837 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
841 switch (GET_CODE (in
))
844 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
845 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
850 /* Cannot occur reversed. */
851 if (reversed
) abort ();
853 if (GET_MODE (in
) != TFmode
)
854 split_double (in
, &out
[0], &out
[1]);
856 /* split_double does not understand how to split a TFmode
857 quantity into a pair of DImode constants. */
860 unsigned HOST_WIDE_INT p
[2];
861 long l
[4]; /* TFmode is 128 bits */
863 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
864 real_to_target (l
, &r
, TFmode
);
866 if (FLOAT_WORDS_BIG_ENDIAN
)
868 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
869 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
873 p
[0] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
874 p
[1] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
876 out
[0] = GEN_INT (p
[0]);
877 out
[1] = GEN_INT (p
[1]);
883 rtx base
= XEXP (in
, 0);
886 switch (GET_CODE (base
))
891 out
[0] = adjust_automodify_address
892 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
893 out
[1] = adjust_automodify_address
894 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
898 /* Reversal requires a pre-increment, which can only
899 be done as a separate insn. */
900 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
901 out
[0] = adjust_automodify_address
902 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
903 out
[1] = adjust_address (in
, DImode
, 0);
908 if (reversed
|| dead
) abort ();
909 /* Just do the increment in two steps. */
910 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
911 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
915 if (reversed
|| dead
) abort ();
916 /* Add 8, subtract 24. */
917 base
= XEXP (base
, 0);
918 out
[0] = adjust_automodify_address
919 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
920 out
[1] = adjust_automodify_address
922 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
927 if (reversed
|| dead
) abort ();
928 /* Extract and adjust the modification. This case is
929 trickier than the others, because we might have an
930 index register, or we might have a combined offset that
931 doesn't fit a signed 9-bit displacement field. We can
932 assume the incoming expression is already legitimate. */
933 offset
= XEXP (base
, 1);
934 base
= XEXP (base
, 0);
936 out
[0] = adjust_automodify_address
937 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
939 if (GET_CODE (XEXP (offset
, 1)) == REG
)
941 /* Can't adjust the postmodify to match. Emit the
942 original, then a separate addition insn. */
943 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
944 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
946 else if (GET_CODE (XEXP (offset
, 1)) != CONST_INT
)
948 else if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
950 /* Again the postmodify cannot be made to match, but
951 in this case it's more efficient to get rid of the
952 postmodify entirely and fix up with an add insn. */
953 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
954 fixup
= gen_adddi3 (base
, base
,
955 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
959 /* Combined offset still fits in the displacement field.
960 (We cannot overflow it at the high end.) */
961 out
[1] = adjust_automodify_address
963 gen_rtx_POST_MODIFY (Pmode
, base
,
964 gen_rtx_PLUS (Pmode
, base
,
965 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
983 /* Split a TImode or TFmode move instruction after reload.
984 This is used by *movtf_internal and *movti_internal. */
986 ia64_split_tmode_move (rtx operands
[])
988 rtx in
[2], out
[2], insn
;
991 bool reversed
= false;
993 /* It is possible for reload to decide to overwrite a pointer with
994 the value it points to. In that case we have to do the loads in
995 the appropriate order so that the pointer is not destroyed too
996 early. Also we must not generate a postmodify for that second
997 load, or rws_access_regno will abort. */
998 if (GET_CODE (operands
[1]) == MEM
999 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1001 rtx base
= XEXP (operands
[1], 0);
1002 while (GET_CODE (base
) != REG
)
1003 base
= XEXP (base
, 0);
1005 if (REGNO (base
) == REGNO (operands
[0]))
1009 /* Another reason to do the moves in reversed order is if the first
1010 element of the target register pair is also the second element of
1011 the source register pair. */
1012 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1013 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1016 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1017 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1019 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1020 if (GET_CODE (EXP) == MEM \
1021 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1022 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1023 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1024 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1025 XEXP (XEXP (EXP, 0), 0), \
1028 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1029 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1030 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1032 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1033 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1034 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1037 emit_insn (fixup
[0]);
1039 emit_insn (fixup
[1]);
1041 #undef MAYBE_ADD_REG_INC_NOTE
1044 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1045 through memory plus an extra GR scratch register. Except that you can
1046 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1047 SECONDARY_RELOAD_CLASS, but not both.
1049 We got into problems in the first place by allowing a construct like
1050 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1051 This solution attempts to prevent this situation from occurring. When
1052 we see something like the above, we spill the inner register to memory. */
1055 spill_xfmode_operand (rtx in
, int force
)
1057 if (GET_CODE (in
) == SUBREG
1058 && GET_MODE (SUBREG_REG (in
)) == TImode
1059 && GET_CODE (SUBREG_REG (in
)) == REG
)
1061 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1062 emit_move_insn (memt
, SUBREG_REG (in
));
1063 return adjust_address (memt
, XFmode
, 0);
1065 else if (force
&& GET_CODE (in
) == REG
)
1067 rtx memx
= assign_stack_temp (XFmode
, 16, 0);
1068 emit_move_insn (memx
, in
);
1075 /* Emit comparison instruction if necessary, returning the expression
1076 that holds the compare result in the proper mode. */
1078 static GTY(()) rtx cmptf_libfunc
;
1081 ia64_expand_compare (enum rtx_code code
, enum machine_mode mode
)
1083 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1086 /* If we have a BImode input, then we already have a compare result, and
1087 do not need to emit another comparison. */
1088 if (GET_MODE (op0
) == BImode
)
1090 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1095 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1096 magic number as its third argument, that indicates what to do.
1097 The return value is an integer to be compared against zero. */
1098 else if (GET_MODE (op0
) == TFmode
)
1101 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1107 enum rtx_code ncode
;
1109 if (!cmptf_libfunc
|| GET_MODE (op1
) != TFmode
)
1113 /* 1 = equal, 0 = not equal. Equality operators do
1114 not raise FP_INVALID when given an SNaN operand. */
1115 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1116 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1117 /* isunordered() from C99. */
1118 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1119 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1120 /* Relational operators raise FP_INVALID when given
1122 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1123 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1124 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1125 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1126 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1127 Expanders for buneq etc. weuld have to be added to ia64.md
1128 for this to be useful. */
1134 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1135 op0
, TFmode
, op1
, TFmode
,
1136 GEN_INT (magic
), DImode
);
1137 cmp
= gen_reg_rtx (BImode
);
1138 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1139 gen_rtx_fmt_ee (ncode
, BImode
,
1142 insns
= get_insns ();
1145 emit_libcall_block (insns
, cmp
, cmp
,
1146 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
));
1151 cmp
= gen_reg_rtx (BImode
);
1152 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1153 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1157 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1160 /* Emit the appropriate sequence for a call. */
1163 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
1168 addr
= XEXP (addr
, 0);
1169 addr
= convert_memory_address (DImode
, addr
);
1170 b0
= gen_rtx_REG (DImode
, R_BR (0));
1172 /* ??? Should do this for functions known to bind local too. */
1173 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1176 insn
= gen_sibcall_nogp (addr
);
1178 insn
= gen_call_nogp (addr
, b0
);
1180 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1181 insn
= emit_call_insn (insn
);
1186 insn
= gen_sibcall_gp (addr
);
1188 insn
= gen_call_gp (addr
, b0
);
1190 insn
= gen_call_value_gp (retval
, addr
, b0
);
1191 insn
= emit_call_insn (insn
);
1193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1197 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1201 ia64_reload_gp (void)
1205 if (current_frame_info
.reg_save_gp
)
1206 tmp
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_gp
);
1209 HOST_WIDE_INT offset
;
1211 offset
= (current_frame_info
.spill_cfa_off
1212 + current_frame_info
.spill_size
);
1213 if (frame_pointer_needed
)
1215 tmp
= hard_frame_pointer_rtx
;
1220 tmp
= stack_pointer_rtx
;
1221 offset
= current_frame_info
.total_size
- offset
;
1224 if (CONST_OK_FOR_I (offset
))
1225 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1226 tmp
, GEN_INT (offset
)));
1229 emit_move_insn (pic_offset_table_rtx
, GEN_INT (offset
));
1230 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1231 pic_offset_table_rtx
, tmp
));
1234 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1237 emit_move_insn (pic_offset_table_rtx
, tmp
);
1241 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
1242 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
1245 bool is_desc
= false;
1247 /* If we find we're calling through a register, then we're actually
1248 calling through a descriptor, so load up the values. */
1249 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
1254 /* ??? We are currently constrained to *not* use peep2, because
1255 we can legitimately change the global lifetime of the GP
1256 (in the form of killing where previously live). This is
1257 because a call through a descriptor doesn't use the previous
1258 value of the GP, while a direct call does, and we do not
1259 commit to either form until the split here.
1261 That said, this means that we lack precise life info for
1262 whether ADDR is dead after this call. This is not terribly
1263 important, since we can fix things up essentially for free
1264 with the POST_DEC below, but it's nice to not use it when we
1265 can immediately tell it's not necessary. */
1266 addr_dead_p
= ((noreturn_p
|| sibcall_p
1267 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
1269 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
1271 /* Load the code address into scratch_b. */
1272 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
1273 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1274 emit_move_insn (scratch_r
, tmp
);
1275 emit_move_insn (scratch_b
, scratch_r
);
1277 /* Load the GP address. If ADDR is not dead here, then we must
1278 revert the change made above via the POST_INCREMENT. */
1280 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
1283 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1284 emit_move_insn (pic_offset_table_rtx
, tmp
);
1291 insn
= gen_sibcall_nogp (addr
);
1293 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
1295 insn
= gen_call_nogp (addr
, retaddr
);
1296 emit_call_insn (insn
);
1298 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
1302 /* Begin the assembly file. */
1305 ia64_file_start (void)
1307 default_file_start ();
1308 emit_safe_across_calls ();
1312 emit_safe_across_calls (void)
1314 unsigned int rs
, re
;
1321 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1325 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1329 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
1333 fputc (',', asm_out_file
);
1335 fprintf (asm_out_file
, "p%u", rs
);
1337 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
1341 fputc ('\n', asm_out_file
);
1344 /* Helper function for ia64_compute_frame_size: find an appropriate general
1345 register to spill some special register to. SPECIAL_SPILL_MASK contains
1346 bits in GR0 to GR31 that have already been allocated by this routine.
1347 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1350 find_gr_spill (int try_locals
)
1354 /* If this is a leaf function, first try an otherwise unused
1355 call-clobbered register. */
1356 if (current_function_is_leaf
)
1358 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1359 if (! regs_ever_live
[regno
]
1360 && call_used_regs
[regno
]
1361 && ! fixed_regs
[regno
]
1362 && ! global_regs
[regno
]
1363 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1365 current_frame_info
.gr_used_mask
|= 1 << regno
;
1372 regno
= current_frame_info
.n_local_regs
;
1373 /* If there is a frame pointer, then we can't use loc79, because
1374 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1375 reg_name switching code in ia64_expand_prologue. */
1376 if (regno
< (80 - frame_pointer_needed
))
1378 current_frame_info
.n_local_regs
= regno
+ 1;
1379 return LOC_REG (0) + regno
;
1383 /* Failed to find a general register to spill to. Must use stack. */
1387 /* In order to make for nice schedules, we try to allocate every temporary
1388 to a different register. We must of course stay away from call-saved,
1389 fixed, and global registers. We must also stay away from registers
1390 allocated in current_frame_info.gr_used_mask, since those include regs
1391 used all through the prologue.
1393 Any register allocated here must be used immediately. The idea is to
1394 aid scheduling, not to solve data flow problems. */
1396 static int last_scratch_gr_reg
;
1399 next_scratch_gr_reg (void)
1403 for (i
= 0; i
< 32; ++i
)
1405 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1406 if (call_used_regs
[regno
]
1407 && ! fixed_regs
[regno
]
1408 && ! global_regs
[regno
]
1409 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1411 last_scratch_gr_reg
= regno
;
1416 /* There must be _something_ available. */
1420 /* Helper function for ia64_compute_frame_size, called through
1421 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1424 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
1426 unsigned int regno
= REGNO (reg
);
1429 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1430 for (i
= 0; i
< n
; ++i
)
1431 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1435 /* Returns the number of bytes offset between the frame pointer and the stack
1436 pointer for the current function. SIZE is the number of bytes of space
1437 needed for local variables. */
1440 ia64_compute_frame_size (HOST_WIDE_INT size
)
1442 HOST_WIDE_INT total_size
;
1443 HOST_WIDE_INT spill_size
= 0;
1444 HOST_WIDE_INT extra_spill_size
= 0;
1445 HOST_WIDE_INT pretend_args_size
;
1448 int spilled_gr_p
= 0;
1449 int spilled_fr_p
= 0;
1453 if (current_frame_info
.initialized
)
1456 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1457 CLEAR_HARD_REG_SET (mask
);
1459 /* Don't allocate scratches to the return register. */
1460 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1462 /* Don't allocate scratches to the EH scratch registers. */
1463 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1464 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1465 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1466 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1468 /* Find the size of the register stack frame. We have only 80 local
1469 registers, because we reserve 8 for the inputs and 8 for the
1472 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1473 since we'll be adjusting that down later. */
1474 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1475 for (; regno
>= LOC_REG (0); regno
--)
1476 if (regs_ever_live
[regno
])
1478 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1480 /* For functions marked with the syscall_linkage attribute, we must mark
1481 all eight input registers as in use, so that locals aren't visible to
1484 if (cfun
->machine
->n_varargs
> 0
1485 || lookup_attribute ("syscall_linkage",
1486 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1487 current_frame_info
.n_input_regs
= 8;
1490 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1491 if (regs_ever_live
[regno
])
1493 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1496 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1497 if (regs_ever_live
[regno
])
1499 i
= regno
- OUT_REG (0) + 1;
1501 /* When -p profiling, we need one output register for the mcount argument.
1502 Likewise for -a profiling for the bb_init_func argument. For -ax
1503 profiling, we need two output registers for the two bb_init_trace_func
1505 if (current_function_profile
)
1507 current_frame_info
.n_output_regs
= i
;
1509 /* ??? No rotating register support yet. */
1510 current_frame_info
.n_rotate_regs
= 0;
1512 /* Discover which registers need spilling, and how much room that
1513 will take. Begin with floating point and general registers,
1514 which will always wind up on the stack. */
1516 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1517 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1519 SET_HARD_REG_BIT (mask
, regno
);
1525 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1526 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1528 SET_HARD_REG_BIT (mask
, regno
);
1534 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1535 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1537 SET_HARD_REG_BIT (mask
, regno
);
1542 /* Now come all special registers that might get saved in other
1543 general registers. */
1545 if (frame_pointer_needed
)
1547 current_frame_info
.reg_fp
= find_gr_spill (1);
1548 /* If we did not get a register, then we take LOC79. This is guaranteed
1549 to be free, even if regs_ever_live is already set, because this is
1550 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1551 as we don't count loc79 above. */
1552 if (current_frame_info
.reg_fp
== 0)
1554 current_frame_info
.reg_fp
= LOC_REG (79);
1555 current_frame_info
.n_local_regs
++;
1559 if (! current_function_is_leaf
)
1561 /* Emit a save of BR0 if we call other functions. Do this even
1562 if this function doesn't return, as EH depends on this to be
1563 able to unwind the stack. */
1564 SET_HARD_REG_BIT (mask
, BR_REG (0));
1566 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1567 if (current_frame_info
.reg_save_b0
== 0)
1573 /* Similarly for ar.pfs. */
1574 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1575 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1576 if (current_frame_info
.reg_save_ar_pfs
== 0)
1578 extra_spill_size
+= 8;
1582 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1583 registers are clobbered, so we fall back to the stack. */
1584 current_frame_info
.reg_save_gp
1585 = (current_function_calls_setjmp
? 0 : find_gr_spill (1));
1586 if (current_frame_info
.reg_save_gp
== 0)
1588 SET_HARD_REG_BIT (mask
, GR_REG (1));
1595 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1597 SET_HARD_REG_BIT (mask
, BR_REG (0));
1602 if (regs_ever_live
[AR_PFS_REGNUM
])
1604 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1605 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1606 if (current_frame_info
.reg_save_ar_pfs
== 0)
1608 extra_spill_size
+= 8;
1614 /* Unwind descriptor hackery: things are most efficient if we allocate
1615 consecutive GR save registers for RP, PFS, FP in that order. However,
1616 it is absolutely critical that FP get the only hard register that's
1617 guaranteed to be free, so we allocated it first. If all three did
1618 happen to be allocated hard regs, and are consecutive, rearrange them
1619 into the preferred order now. */
1620 if (current_frame_info
.reg_fp
!= 0
1621 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1622 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1624 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1625 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1626 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1629 /* See if we need to store the predicate register block. */
1630 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1631 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1633 if (regno
<= PR_REG (63))
1635 SET_HARD_REG_BIT (mask
, PR_REG (0));
1636 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1637 if (current_frame_info
.reg_save_pr
== 0)
1639 extra_spill_size
+= 8;
1643 /* ??? Mark them all as used so that register renaming and such
1644 are free to use them. */
1645 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1646 regs_ever_live
[regno
] = 1;
1649 /* If we're forced to use st8.spill, we're forced to save and restore
1650 ar.unat as well. The check for existing liveness allows inline asm
1651 to touch ar.unat. */
1652 if (spilled_gr_p
|| cfun
->machine
->n_varargs
1653 || regs_ever_live
[AR_UNAT_REGNUM
])
1655 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1656 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1657 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1658 if (current_frame_info
.reg_save_ar_unat
== 0)
1660 extra_spill_size
+= 8;
1665 if (regs_ever_live
[AR_LC_REGNUM
])
1667 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1668 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1669 if (current_frame_info
.reg_save_ar_lc
== 0)
1671 extra_spill_size
+= 8;
1676 /* If we have an odd number of words of pretend arguments written to
1677 the stack, then the FR save area will be unaligned. We round the
1678 size of this area up to keep things 16 byte aligned. */
1680 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1682 pretend_args_size
= current_function_pretend_args_size
;
1684 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1685 + current_function_outgoing_args_size
);
1686 total_size
= IA64_STACK_ALIGN (total_size
);
1688 /* We always use the 16-byte scratch area provided by the caller, but
1689 if we are a leaf function, there's no one to which we need to provide
1691 if (current_function_is_leaf
)
1692 total_size
= MAX (0, total_size
- 16);
1694 current_frame_info
.total_size
= total_size
;
1695 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1696 current_frame_info
.spill_size
= spill_size
;
1697 current_frame_info
.extra_spill_size
= extra_spill_size
;
1698 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1699 current_frame_info
.n_spilled
= n_spilled
;
1700 current_frame_info
.initialized
= reload_completed
;
1703 /* Compute the initial difference between the specified pair of registers. */
1706 ia64_initial_elimination_offset (int from
, int to
)
1708 HOST_WIDE_INT offset
;
1710 ia64_compute_frame_size (get_frame_size ());
1713 case FRAME_POINTER_REGNUM
:
1714 if (to
== HARD_FRAME_POINTER_REGNUM
)
1716 if (current_function_is_leaf
)
1717 offset
= -current_frame_info
.total_size
;
1719 offset
= -(current_frame_info
.total_size
1720 - current_function_outgoing_args_size
- 16);
1722 else if (to
== STACK_POINTER_REGNUM
)
1724 if (current_function_is_leaf
)
1727 offset
= 16 + current_function_outgoing_args_size
;
1733 case ARG_POINTER_REGNUM
:
1734 /* Arguments start above the 16 byte save area, unless stdarg
1735 in which case we store through the 16 byte save area. */
1736 if (to
== HARD_FRAME_POINTER_REGNUM
)
1737 offset
= 16 - current_function_pretend_args_size
;
1738 else if (to
== STACK_POINTER_REGNUM
)
1739 offset
= (current_frame_info
.total_size
1740 + 16 - current_function_pretend_args_size
);
1752 /* If there are more than a trivial number of register spills, we use
1753 two interleaved iterators so that we can get two memory references
1756 In order to simplify things in the prologue and epilogue expanders,
1757 we use helper functions to fix up the memory references after the
1758 fact with the appropriate offsets to a POST_MODIFY memory mode.
1759 The following data structure tracks the state of the two iterators
1760 while insns are being emitted. */
1762 struct spill_fill_data
1764 rtx init_after
; /* point at which to emit initializations */
1765 rtx init_reg
[2]; /* initial base register */
1766 rtx iter_reg
[2]; /* the iterator registers */
1767 rtx
*prev_addr
[2]; /* address of last memory use */
1768 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1769 HOST_WIDE_INT prev_off
[2]; /* last offset */
1770 int n_iter
; /* number of iterators in use */
1771 int next_iter
; /* next iterator to use */
1772 unsigned int save_gr_used_mask
;
1775 static struct spill_fill_data spill_fill_data
;
1778 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
1782 spill_fill_data
.init_after
= get_last_insn ();
1783 spill_fill_data
.init_reg
[0] = init_reg
;
1784 spill_fill_data
.init_reg
[1] = init_reg
;
1785 spill_fill_data
.prev_addr
[0] = NULL
;
1786 spill_fill_data
.prev_addr
[1] = NULL
;
1787 spill_fill_data
.prev_insn
[0] = NULL
;
1788 spill_fill_data
.prev_insn
[1] = NULL
;
1789 spill_fill_data
.prev_off
[0] = cfa_off
;
1790 spill_fill_data
.prev_off
[1] = cfa_off
;
1791 spill_fill_data
.next_iter
= 0;
1792 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1794 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1795 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1797 int regno
= next_scratch_gr_reg ();
1798 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1799 current_frame_info
.gr_used_mask
|= 1 << regno
;
1804 finish_spill_pointers (void)
1806 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1810 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
1812 int iter
= spill_fill_data
.next_iter
;
1813 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1814 rtx disp_rtx
= GEN_INT (disp
);
1817 if (spill_fill_data
.prev_addr
[iter
])
1819 if (CONST_OK_FOR_N (disp
))
1821 *spill_fill_data
.prev_addr
[iter
]
1822 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1823 gen_rtx_PLUS (DImode
,
1824 spill_fill_data
.iter_reg
[iter
],
1826 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
1827 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
1828 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
1832 /* ??? Could use register post_modify for loads. */
1833 if (! CONST_OK_FOR_I (disp
))
1835 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1836 emit_move_insn (tmp
, disp_rtx
);
1839 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1840 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1843 /* Micro-optimization: if we've created a frame pointer, it's at
1844 CFA 0, which may allow the real iterator to be initialized lower,
1845 slightly increasing parallelism. Also, if there are few saves
1846 it may eliminate the iterator entirely. */
1848 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1849 && frame_pointer_needed
)
1851 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1852 set_mem_alias_set (mem
, get_varargs_alias_set ());
1860 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1861 spill_fill_data
.init_reg
[iter
]);
1866 if (! CONST_OK_FOR_I (disp
))
1868 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1869 emit_move_insn (tmp
, disp_rtx
);
1873 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1874 spill_fill_data
.init_reg
[iter
],
1881 /* Careful for being the first insn in a sequence. */
1882 if (spill_fill_data
.init_after
)
1883 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
1886 rtx first
= get_insns ();
1888 insn
= emit_insn_before (seq
, first
);
1890 insn
= emit_insn (seq
);
1892 spill_fill_data
.init_after
= insn
;
1894 /* If DISP is 0, we may or may not have a further adjustment
1895 afterward. If we do, then the load/store insn may be modified
1896 to be a post-modify. If we don't, then this copy may be
1897 eliminated by copyprop_hardreg_forward, which makes this
1898 insn garbage, which runs afoul of the sanity check in
1899 propagate_one_insn. So mark this insn as legal to delete. */
1901 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1905 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1907 /* ??? Not all of the spills are for varargs, but some of them are.
1908 The rest of the spills belong in an alias set of their own. But
1909 it doesn't actually hurt to include them here. */
1910 set_mem_alias_set (mem
, get_varargs_alias_set ());
1912 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1913 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1915 if (++iter
>= spill_fill_data
.n_iter
)
1917 spill_fill_data
.next_iter
= iter
;
1923 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
1926 int iter
= spill_fill_data
.next_iter
;
1929 mem
= spill_restore_mem (reg
, cfa_off
);
1930 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1931 spill_fill_data
.prev_insn
[iter
] = insn
;
1938 RTX_FRAME_RELATED_P (insn
) = 1;
1940 /* Don't even pretend that the unwind code can intuit its way
1941 through a pair of interleaved post_modify iterators. Just
1942 provide the correct answer. */
1944 if (frame_pointer_needed
)
1946 base
= hard_frame_pointer_rtx
;
1951 base
= stack_pointer_rtx
;
1952 off
= current_frame_info
.total_size
- cfa_off
;
1956 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1957 gen_rtx_SET (VOIDmode
,
1958 gen_rtx_MEM (GET_MODE (reg
),
1959 plus_constant (base
, off
)),
1966 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
1968 int iter
= spill_fill_data
.next_iter
;
1971 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1972 GEN_INT (cfa_off
)));
1973 spill_fill_data
.prev_insn
[iter
] = insn
;
1976 /* Wrapper functions that discards the CONST_INT spill offset. These
1977 exist so that we can give gr_spill/gr_fill the offset they need and
1978 use a consistent function interface. */
1981 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1983 return gen_movdi (dest
, src
);
1987 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1989 return gen_fr_spill (dest
, src
);
1993 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1995 return gen_fr_restore (dest
, src
);
1998 /* Called after register allocation to add any instructions needed for the
1999 prologue. Using a prologue insn is favored compared to putting all of the
2000 instructions in output_function_prologue(), since it allows the scheduler
2001 to intermix instructions with the saves of the caller saved registers. In
2002 some cases, it might be necessary to emit a barrier instruction as the last
2003 insn to prevent such scheduling.
2005 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2006 so that the debug info generation code can handle them properly.
2008 The register save area is layed out like so:
2010 [ varargs spill area ]
2011 [ fr register spill area ]
2012 [ br register spill area ]
2013 [ ar register spill area ]
2014 [ pr register spill area ]
2015 [ gr register spill area ] */
2017 /* ??? Get inefficient code when the frame size is larger than can fit in an
2018 adds instruction. */
2021 ia64_expand_prologue (void)
2023 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2024 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2027 ia64_compute_frame_size (get_frame_size ());
2028 last_scratch_gr_reg
= 15;
2030 /* If there is no epilogue, then we don't need some prologue insns.
2031 We need to avoid emitting the dead prologue insns, because flow
2032 will complain about them. */
2037 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2038 if ((e
->flags
& EDGE_FAKE
) == 0
2039 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2041 epilogue_p
= (e
!= NULL
);
2046 /* Set the local, input, and output register names. We need to do this
2047 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2048 half. If we use in/loc/out register names, then we get assembler errors
2049 in crtn.S because there is no alloc insn or regstk directive in there. */
2050 if (! TARGET_REG_NAMES
)
2052 int inputs
= current_frame_info
.n_input_regs
;
2053 int locals
= current_frame_info
.n_local_regs
;
2054 int outputs
= current_frame_info
.n_output_regs
;
2056 for (i
= 0; i
< inputs
; i
++)
2057 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2058 for (i
= 0; i
< locals
; i
++)
2059 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2060 for (i
= 0; i
< outputs
; i
++)
2061 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2064 /* Set the frame pointer register name. The regnum is logically loc79,
2065 but of course we'll not have allocated that many locals. Rather than
2066 worrying about renumbering the existing rtxs, we adjust the name. */
2067 /* ??? This code means that we can never use one local register when
2068 there is a frame pointer. loc79 gets wasted in this case, as it is
2069 renamed to a register that will never be used. See also the try_locals
2070 code in find_gr_spill. */
2071 if (current_frame_info
.reg_fp
)
2073 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2074 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2075 = reg_names
[current_frame_info
.reg_fp
];
2076 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2079 /* We don't need an alloc instruction if we've used no outputs or locals. */
2080 if (current_frame_info
.n_local_regs
== 0
2081 && current_frame_info
.n_output_regs
== 0
2082 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
2083 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2085 /* If there is no alloc, but there are input registers used, then we
2086 need a .regstk directive. */
2087 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2088 ar_pfs_save_reg
= NULL_RTX
;
2092 current_frame_info
.need_regstk
= 0;
2094 if (current_frame_info
.reg_save_ar_pfs
)
2095 regno
= current_frame_info
.reg_save_ar_pfs
;
2097 regno
= next_scratch_gr_reg ();
2098 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2100 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2101 GEN_INT (current_frame_info
.n_input_regs
),
2102 GEN_INT (current_frame_info
.n_local_regs
),
2103 GEN_INT (current_frame_info
.n_output_regs
),
2104 GEN_INT (current_frame_info
.n_rotate_regs
)));
2105 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2108 /* Set up frame pointer, stack pointer, and spill iterators. */
2110 n_varargs
= cfun
->machine
->n_varargs
;
2111 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2112 stack_pointer_rtx
, 0);
2114 if (frame_pointer_needed
)
2116 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2117 RTX_FRAME_RELATED_P (insn
) = 1;
2120 if (current_frame_info
.total_size
!= 0)
2122 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2125 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2126 offset
= frame_size_rtx
;
2129 regno
= next_scratch_gr_reg ();
2130 offset
= gen_rtx_REG (DImode
, regno
);
2131 emit_move_insn (offset
, frame_size_rtx
);
2134 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2135 stack_pointer_rtx
, offset
));
2137 if (! frame_pointer_needed
)
2139 RTX_FRAME_RELATED_P (insn
) = 1;
2140 if (GET_CODE (offset
) != CONST_INT
)
2143 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2144 gen_rtx_SET (VOIDmode
,
2146 gen_rtx_PLUS (DImode
,
2153 /* ??? At this point we must generate a magic insn that appears to
2154 modify the stack pointer, the frame pointer, and all spill
2155 iterators. This would allow the most scheduling freedom. For
2156 now, just hard stop. */
2157 emit_insn (gen_blockage ());
2160 /* Must copy out ar.unat before doing any integer spills. */
2161 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2163 if (current_frame_info
.reg_save_ar_unat
)
2165 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2168 alt_regno
= next_scratch_gr_reg ();
2169 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2170 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2173 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2174 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2175 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2177 /* Even if we're not going to generate an epilogue, we still
2178 need to save the register so that EH works. */
2179 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2180 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2183 ar_unat_save_reg
= NULL_RTX
;
2185 /* Spill all varargs registers. Do this before spilling any GR registers,
2186 since we want the UNAT bits for the GR registers to override the UNAT
2187 bits from varargs, which we don't care about. */
2190 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2192 reg
= gen_rtx_REG (DImode
, regno
);
2193 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2196 /* Locate the bottom of the register save area. */
2197 cfa_off
= (current_frame_info
.spill_cfa_off
2198 + current_frame_info
.spill_size
2199 + current_frame_info
.extra_spill_size
);
2201 /* Save the predicate register block either in a register or in memory. */
2202 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2204 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2205 if (current_frame_info
.reg_save_pr
!= 0)
2207 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2208 insn
= emit_move_insn (alt_reg
, reg
);
2210 /* ??? Denote pr spill/fill by a DImode move that modifies all
2211 64 hard registers. */
2212 RTX_FRAME_RELATED_P (insn
) = 1;
2214 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2215 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2218 /* Even if we're not going to generate an epilogue, we still
2219 need to save the register so that EH works. */
2221 emit_insn (gen_prologue_use (alt_reg
));
2225 alt_regno
= next_scratch_gr_reg ();
2226 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2227 insn
= emit_move_insn (alt_reg
, reg
);
2228 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2233 /* Handle AR regs in numerical order. All of them get special handling. */
2234 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2235 && current_frame_info
.reg_save_ar_unat
== 0)
2237 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2238 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2242 /* The alloc insn already copied ar.pfs into a general register. The
2243 only thing we have to do now is copy that register to a stack slot
2244 if we'd not allocated a local register for the job. */
2245 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
2246 && current_frame_info
.reg_save_ar_pfs
== 0)
2248 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2249 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2253 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2255 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2256 if (current_frame_info
.reg_save_ar_lc
!= 0)
2258 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2259 insn
= emit_move_insn (alt_reg
, reg
);
2260 RTX_FRAME_RELATED_P (insn
) = 1;
2262 /* Even if we're not going to generate an epilogue, we still
2263 need to save the register so that EH works. */
2265 emit_insn (gen_prologue_use (alt_reg
));
2269 alt_regno
= next_scratch_gr_reg ();
2270 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2271 emit_move_insn (alt_reg
, reg
);
2272 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2277 if (current_frame_info
.reg_save_gp
)
2279 insn
= emit_move_insn (gen_rtx_REG (DImode
,
2280 current_frame_info
.reg_save_gp
),
2281 pic_offset_table_rtx
);
2282 /* We don't know for sure yet if this is actually needed, since
2283 we've not split the PIC call patterns. If all of the calls
2284 are indirect, and not followed by any uses of the gp, then
2285 this save is dead. Allow it to go away. */
2287 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, REG_NOTES (insn
));
2290 /* We should now be at the base of the gr/br/fr spill area. */
2291 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2292 + current_frame_info
.spill_size
))
2295 /* Spill all general registers. */
2296 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2297 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2299 reg
= gen_rtx_REG (DImode
, regno
);
2300 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2304 /* Handle BR0 specially -- it may be getting stored permanently in
2305 some GR register. */
2306 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2308 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2309 if (current_frame_info
.reg_save_b0
!= 0)
2311 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2312 insn
= emit_move_insn (alt_reg
, reg
);
2313 RTX_FRAME_RELATED_P (insn
) = 1;
2315 /* Even if we're not going to generate an epilogue, we still
2316 need to save the register so that EH works. */
2318 emit_insn (gen_prologue_use (alt_reg
));
2322 alt_regno
= next_scratch_gr_reg ();
2323 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2324 emit_move_insn (alt_reg
, reg
);
2325 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2330 /* Spill the rest of the BR registers. */
2331 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2332 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2334 alt_regno
= next_scratch_gr_reg ();
2335 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2336 reg
= gen_rtx_REG (DImode
, regno
);
2337 emit_move_insn (alt_reg
, reg
);
2338 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2342 /* Align the frame and spill all FR registers. */
2343 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2344 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2348 reg
= gen_rtx_REG (XFmode
, regno
);
2349 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2353 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2356 finish_spill_pointers ();
2359 /* Called after register allocation to add any instructions needed for the
2360 epilogue. Using an epilogue insn is favored compared to putting all of the
2361 instructions in output_function_prologue(), since it allows the scheduler
2362 to intermix instructions with the saves of the caller saved registers. In
2363 some cases, it might be necessary to emit a barrier instruction as the last
2364 insn to prevent such scheduling. */
2367 ia64_expand_epilogue (int sibcall_p
)
2369 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2370 int regno
, alt_regno
, cfa_off
;
2372 ia64_compute_frame_size (get_frame_size ());
2374 /* If there is a frame pointer, then we use it instead of the stack
2375 pointer, so that the stack pointer does not need to be valid when
2376 the epilogue starts. See EXIT_IGNORE_STACK. */
2377 if (frame_pointer_needed
)
2378 setup_spill_pointers (current_frame_info
.n_spilled
,
2379 hard_frame_pointer_rtx
, 0);
2381 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2382 current_frame_info
.total_size
);
2384 if (current_frame_info
.total_size
!= 0)
2386 /* ??? At this point we must generate a magic insn that appears to
2387 modify the spill iterators and the frame pointer. This would
2388 allow the most scheduling freedom. For now, just hard stop. */
2389 emit_insn (gen_blockage ());
2392 /* Locate the bottom of the register save area. */
2393 cfa_off
= (current_frame_info
.spill_cfa_off
2394 + current_frame_info
.spill_size
2395 + current_frame_info
.extra_spill_size
);
2397 /* Restore the predicate registers. */
2398 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2400 if (current_frame_info
.reg_save_pr
!= 0)
2401 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2404 alt_regno
= next_scratch_gr_reg ();
2405 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2406 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2409 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2410 emit_move_insn (reg
, alt_reg
);
2413 /* Restore the application registers. */
2415 /* Load the saved unat from the stack, but do not restore it until
2416 after the GRs have been restored. */
2417 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2419 if (current_frame_info
.reg_save_ar_unat
!= 0)
2421 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2424 alt_regno
= next_scratch_gr_reg ();
2425 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2426 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2427 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2432 ar_unat_save_reg
= NULL_RTX
;
2434 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2436 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2437 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2438 emit_move_insn (reg
, alt_reg
);
2440 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2442 alt_regno
= next_scratch_gr_reg ();
2443 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2444 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2446 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2447 emit_move_insn (reg
, alt_reg
);
2450 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2452 if (current_frame_info
.reg_save_ar_lc
!= 0)
2453 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2456 alt_regno
= next_scratch_gr_reg ();
2457 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2458 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2461 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2462 emit_move_insn (reg
, alt_reg
);
2465 /* We should now be at the base of the gr/br/fr spill area. */
2466 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2467 + current_frame_info
.spill_size
))
2470 /* The GP may be stored on the stack in the prologue, but it's
2471 never restored in the epilogue. Skip the stack slot. */
2472 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
2475 /* Restore all general registers. */
2476 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
2477 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2479 reg
= gen_rtx_REG (DImode
, regno
);
2480 do_restore (gen_gr_restore
, reg
, cfa_off
);
2484 /* Restore the branch registers. Handle B0 specially, as it may
2485 have gotten stored in some GR register. */
2486 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2488 if (current_frame_info
.reg_save_b0
!= 0)
2489 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2492 alt_regno
= next_scratch_gr_reg ();
2493 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2494 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2497 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2498 emit_move_insn (reg
, alt_reg
);
2501 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2502 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2504 alt_regno
= next_scratch_gr_reg ();
2505 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2506 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2508 reg
= gen_rtx_REG (DImode
, regno
);
2509 emit_move_insn (reg
, alt_reg
);
2512 /* Restore floating point registers. */
2513 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2514 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2518 reg
= gen_rtx_REG (XFmode
, regno
);
2519 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2523 /* Restore ar.unat for real. */
2524 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2526 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2527 emit_move_insn (reg
, ar_unat_save_reg
);
2530 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2533 finish_spill_pointers ();
2535 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2537 /* ??? At this point we must generate a magic insn that appears to
2538 modify the spill iterators, the stack pointer, and the frame
2539 pointer. This would allow the most scheduling freedom. For now,
2541 emit_insn (gen_blockage ());
2544 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2545 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2546 else if (frame_pointer_needed
)
2548 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2549 RTX_FRAME_RELATED_P (insn
) = 1;
2551 else if (current_frame_info
.total_size
)
2553 rtx offset
, frame_size_rtx
;
2555 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2556 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2557 offset
= frame_size_rtx
;
2560 regno
= next_scratch_gr_reg ();
2561 offset
= gen_rtx_REG (DImode
, regno
);
2562 emit_move_insn (offset
, frame_size_rtx
);
2565 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2568 RTX_FRAME_RELATED_P (insn
) = 1;
2569 if (GET_CODE (offset
) != CONST_INT
)
2572 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2573 gen_rtx_SET (VOIDmode
,
2575 gen_rtx_PLUS (DImode
,
2582 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2583 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2586 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2589 int fp
= GR_REG (2);
2590 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2591 first available call clobbered register. If there was a frame_pointer
2592 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2593 so we have to make sure we're using the string "r2" when emitting
2594 the register name for the assembler. */
2595 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2596 fp
= HARD_FRAME_POINTER_REGNUM
;
2598 /* We must emit an alloc to force the input registers to become output
2599 registers. Otherwise, if the callee tries to pass its parameters
2600 through to another call without an intervening alloc, then these
2602 /* ??? We don't need to preserve all input registers. We only need to
2603 preserve those input registers used as arguments to the sibling call.
2604 It is unclear how to compute that number here. */
2605 if (current_frame_info
.n_input_regs
!= 0)
2606 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2607 const0_rtx
, const0_rtx
,
2608 GEN_INT (current_frame_info
.n_input_regs
),
2613 /* Return 1 if br.ret can do all the work required to return from a
2617 ia64_direct_return (void)
2619 if (reload_completed
&& ! frame_pointer_needed
)
2621 ia64_compute_frame_size (get_frame_size ());
2623 return (current_frame_info
.total_size
== 0
2624 && current_frame_info
.n_spilled
== 0
2625 && current_frame_info
.reg_save_b0
== 0
2626 && current_frame_info
.reg_save_pr
== 0
2627 && current_frame_info
.reg_save_ar_pfs
== 0
2628 && current_frame_info
.reg_save_ar_unat
== 0
2629 && current_frame_info
.reg_save_ar_lc
== 0);
2634 /* Return the magic cookie that we use to hold the return address
2635 during early compilation. */
2638 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
2642 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
2645 /* Split this value after reload, now that we know where the return
2646 address is saved. */
2649 ia64_split_return_addr_rtx (rtx dest
)
2653 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2655 if (current_frame_info
.reg_save_b0
!= 0)
2656 src
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2662 /* Compute offset from CFA for BR0. */
2663 /* ??? Must be kept in sync with ia64_expand_prologue. */
2664 off
= (current_frame_info
.spill_cfa_off
2665 + current_frame_info
.spill_size
);
2666 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2667 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2670 /* Convert CFA offset to a register based offset. */
2671 if (frame_pointer_needed
)
2672 src
= hard_frame_pointer_rtx
;
2675 src
= stack_pointer_rtx
;
2676 off
+= current_frame_info
.total_size
;
2679 /* Load address into scratch register. */
2680 if (CONST_OK_FOR_I (off
))
2681 emit_insn (gen_adddi3 (dest
, src
, GEN_INT (off
)));
2684 emit_move_insn (dest
, GEN_INT (off
));
2685 emit_insn (gen_adddi3 (dest
, src
, dest
));
2688 src
= gen_rtx_MEM (Pmode
, dest
);
2692 src
= gen_rtx_REG (DImode
, BR_REG (0));
2694 emit_move_insn (dest
, src
);
2698 ia64_hard_regno_rename_ok (int from
, int to
)
2700 /* Don't clobber any of the registers we reserved for the prologue. */
2701 if (to
== current_frame_info
.reg_fp
2702 || to
== current_frame_info
.reg_save_b0
2703 || to
== current_frame_info
.reg_save_pr
2704 || to
== current_frame_info
.reg_save_ar_pfs
2705 || to
== current_frame_info
.reg_save_ar_unat
2706 || to
== current_frame_info
.reg_save_ar_lc
)
2709 if (from
== current_frame_info
.reg_fp
2710 || from
== current_frame_info
.reg_save_b0
2711 || from
== current_frame_info
.reg_save_pr
2712 || from
== current_frame_info
.reg_save_ar_pfs
2713 || from
== current_frame_info
.reg_save_ar_unat
2714 || from
== current_frame_info
.reg_save_ar_lc
)
2717 /* Don't use output registers outside the register frame. */
2718 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2721 /* Retain even/oddness on predicate register pairs. */
2722 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2723 return (from
& 1) == (to
& 1);
2728 /* Target hook for assembling integer objects. Handle word-sized
2729 aligned objects and detect the cases when @fptr is needed. */
2732 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
2734 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
2736 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2737 && GET_CODE (x
) == SYMBOL_REF
2738 && SYMBOL_REF_FUNCTION_P (x
))
2740 if (POINTER_SIZE
== 32)
2741 fputs ("\tdata4\t@fptr(", asm_out_file
);
2743 fputs ("\tdata8\t@fptr(", asm_out_file
);
2744 output_addr_const (asm_out_file
, x
);
2745 fputs (")\n", asm_out_file
);
2748 return default_assemble_integer (x
, size
, aligned_p
);
2751 /* Emit the function prologue. */
2754 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
2756 int mask
, grsave
, grsave_prev
;
2758 if (current_frame_info
.need_regstk
)
2759 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2760 current_frame_info
.n_input_regs
,
2761 current_frame_info
.n_local_regs
,
2762 current_frame_info
.n_output_regs
,
2763 current_frame_info
.n_rotate_regs
);
2765 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2768 /* Emit the .prologue directive. */
2771 grsave
= grsave_prev
= 0;
2772 if (current_frame_info
.reg_save_b0
!= 0)
2775 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2777 if (current_frame_info
.reg_save_ar_pfs
!= 0
2778 && (grsave_prev
== 0
2779 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2782 if (grsave_prev
== 0)
2783 grsave
= current_frame_info
.reg_save_ar_pfs
;
2784 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2786 if (current_frame_info
.reg_fp
!= 0
2787 && (grsave_prev
== 0
2788 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2791 if (grsave_prev
== 0)
2792 grsave
= HARD_FRAME_POINTER_REGNUM
;
2793 grsave_prev
= current_frame_info
.reg_fp
;
2795 if (current_frame_info
.reg_save_pr
!= 0
2796 && (grsave_prev
== 0
2797 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2800 if (grsave_prev
== 0)
2801 grsave
= current_frame_info
.reg_save_pr
;
2804 if (mask
&& TARGET_GNU_AS
)
2805 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2806 ia64_dbx_register_number (grsave
));
2808 fputs ("\t.prologue\n", file
);
2810 /* Emit a .spill directive, if necessary, to relocate the base of
2811 the register spill area. */
2812 if (current_frame_info
.spill_cfa_off
!= -16)
2813 fprintf (file
, "\t.spill %ld\n",
2814 (long) (current_frame_info
.spill_cfa_off
2815 + current_frame_info
.spill_size
));
2818 /* Emit the .body directive at the scheduled end of the prologue. */
2821 ia64_output_function_end_prologue (FILE *file
)
2823 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2826 fputs ("\t.body\n", file
);
2829 /* Emit the function epilogue. */
2832 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
2833 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
2837 if (current_frame_info
.reg_fp
)
2839 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2840 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2841 = reg_names
[current_frame_info
.reg_fp
];
2842 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2844 if (! TARGET_REG_NAMES
)
2846 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2847 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2848 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2849 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2850 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2851 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2854 current_frame_info
.initialized
= 0;
2858 ia64_dbx_register_number (int regno
)
2860 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2861 from its home at loc79 to something inside the register frame. We
2862 must perform the same renumbering here for the debug info. */
2863 if (current_frame_info
.reg_fp
)
2865 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2866 regno
= current_frame_info
.reg_fp
;
2867 else if (regno
== current_frame_info
.reg_fp
)
2868 regno
= HARD_FRAME_POINTER_REGNUM
;
2871 if (IN_REGNO_P (regno
))
2872 return 32 + regno
- IN_REG (0);
2873 else if (LOC_REGNO_P (regno
))
2874 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2875 else if (OUT_REGNO_P (regno
))
2876 return (32 + current_frame_info
.n_input_regs
2877 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2883 ia64_initialize_trampoline (rtx addr
, rtx fnaddr
, rtx static_chain
)
2885 rtx addr_reg
, eight
= GEN_INT (8);
2887 /* The Intel assembler requires that the global __ia64_trampoline symbol
2888 be declared explicitly */
2891 static bool declared_ia64_trampoline
= false;
2893 if (!declared_ia64_trampoline
)
2895 declared_ia64_trampoline
= true;
2896 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
2897 "__ia64_trampoline");
2901 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2902 addr
= convert_memory_address (Pmode
, addr
);
2903 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
2904 static_chain
= convert_memory_address (Pmode
, static_chain
);
2906 /* Load up our iterator. */
2907 addr_reg
= gen_reg_rtx (Pmode
);
2908 emit_move_insn (addr_reg
, addr
);
2910 /* The first two words are the fake descriptor:
2911 __ia64_trampoline, ADDR+16. */
2912 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2913 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2914 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2916 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2917 copy_to_reg (plus_constant (addr
, 16)));
2918 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2920 /* The third word is the target descriptor. */
2921 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2922 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2924 /* The fourth word is the static chain. */
2925 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2928 /* Do any needed setup for a variadic function. CUM has not been updated
2929 for the last named argument which has type TYPE and mode MODE.
2931 We generate the actual spill instructions during prologue generation. */
2934 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2935 tree type
, int * pretend_size
,
2936 int second_time ATTRIBUTE_UNUSED
)
2938 CUMULATIVE_ARGS next_cum
= *cum
;
2940 /* Skip the current argument. */
2941 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
2943 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
2945 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
2946 *pretend_size
= n
* UNITS_PER_WORD
;
2947 cfun
->machine
->n_varargs
= n
;
2951 /* Check whether TYPE is a homogeneous floating point aggregate. If
2952 it is, return the mode of the floating point type that appears
2953 in all leafs. If it is not, return VOIDmode.
2955 An aggregate is a homogeneous floating point aggregate is if all
2956 fields/elements in it have the same floating point type (e.g,
2957 SFmode). 128-bit quad-precision floats are excluded. */
2959 static enum machine_mode
2960 hfa_element_mode (tree type
, int nested
)
2962 enum machine_mode element_mode
= VOIDmode
;
2963 enum machine_mode mode
;
2964 enum tree_code code
= TREE_CODE (type
);
2965 int know_element_mode
= 0;
2970 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2971 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2972 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2973 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2977 /* Fortran complex types are supposed to be HFAs, so we need to handle
2978 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2981 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
2982 && TYPE_MODE (type
) != TCmode
)
2983 return GET_MODE_INNER (TYPE_MODE (type
));
2988 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2989 mode if this is contained within an aggregate. */
2990 if (nested
&& TYPE_MODE (type
) != TFmode
)
2991 return TYPE_MODE (type
);
2996 return hfa_element_mode (TREE_TYPE (type
), 1);
3000 case QUAL_UNION_TYPE
:
3001 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3003 if (TREE_CODE (t
) != FIELD_DECL
)
3006 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3007 if (know_element_mode
)
3009 if (mode
!= element_mode
)
3012 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3016 know_element_mode
= 1;
3017 element_mode
= mode
;
3020 return element_mode
;
3023 /* If we reach here, we probably have some front-end specific type
3024 that the backend doesn't know about. This can happen via the
3025 aggregate_value_p call in init_function_start. All we can do is
3026 ignore unknown tree types. */
3033 /* Return the number of words required to hold a quantity of TYPE and MODE
3034 when passed as an argument. */
3036 ia64_function_arg_words (tree type
, enum machine_mode mode
)
3040 if (mode
== BLKmode
)
3041 words
= int_size_in_bytes (type
);
3043 words
= GET_MODE_SIZE (mode
);
3045 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
3048 /* Return the number of registers that should be skipped so the current
3049 argument (described by TYPE and WORDS) will be properly aligned.
3051 Integer and float arguments larger than 8 bytes start at the next
3052 even boundary. Aggregates larger than 8 bytes start at the next
3053 even boundary if the aggregate has 16 byte alignment. Note that
3054 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3055 but are still to be aligned in registers.
3057 ??? The ABI does not specify how to handle aggregates with
3058 alignment from 9 to 15 bytes, or greater than 16. We handle them
3059 all as if they had 16 byte alignment. Such aggregates can occur
3060 only if gcc extensions are used. */
3062 ia64_function_arg_offset (CUMULATIVE_ARGS
*cum
, tree type
, int words
)
3064 if ((cum
->words
& 1) == 0)
3068 && TREE_CODE (type
) != INTEGER_TYPE
3069 && TREE_CODE (type
) != REAL_TYPE
)
3070 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
3075 /* Return rtx for register where argument is passed, or zero if it is passed
3077 /* ??? 128-bit quad-precision floats are always passed in general
3081 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
3082 int named
, int incoming
)
3084 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3085 int words
= ia64_function_arg_words (type
, mode
);
3086 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3087 enum machine_mode hfa_mode
= VOIDmode
;
3089 /* If all argument slots are used, then it must go on the stack. */
3090 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3093 /* Check for and handle homogeneous FP aggregates. */
3095 hfa_mode
= hfa_element_mode (type
, 0);
3097 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3098 and unprototyped hfas are passed specially. */
3099 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3103 int fp_regs
= cum
->fp_regs
;
3104 int int_regs
= cum
->words
+ offset
;
3105 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3109 /* If prototyped, pass it in FR regs then GR regs.
3110 If not prototyped, pass it in both FR and GR regs.
3112 If this is an SFmode aggregate, then it is possible to run out of
3113 FR regs while GR regs are still left. In that case, we pass the
3114 remaining part in the GR regs. */
3116 /* Fill the FP regs. We do this always. We stop if we reach the end
3117 of the argument, the last FP register, or the last argument slot. */
3119 byte_size
= ((mode
== BLKmode
)
3120 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3121 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3123 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3124 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3126 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3127 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3131 args_byte_size
+= hfa_size
;
3135 /* If no prototype, then the whole thing must go in GR regs. */
3136 if (! cum
->prototype
)
3138 /* If this is an SFmode aggregate, then we might have some left over
3139 that needs to go in GR regs. */
3140 else if (byte_size
!= offset
)
3141 int_regs
+= offset
/ UNITS_PER_WORD
;
3143 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3145 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3147 enum machine_mode gr_mode
= DImode
;
3148 unsigned int gr_size
;
3150 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3151 then this goes in a GR reg left adjusted/little endian, right
3152 adjusted/big endian. */
3153 /* ??? Currently this is handled wrong, because 4-byte hunks are
3154 always right adjusted/little endian. */
3157 /* If we have an even 4 byte hunk because the aggregate is a
3158 multiple of 4 bytes in size, then this goes in a GR reg right
3159 adjusted/little endian. */
3160 else if (byte_size
- offset
== 4)
3163 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3164 gen_rtx_REG (gr_mode
, (basereg
3168 gr_size
= GET_MODE_SIZE (gr_mode
);
3170 if (gr_size
== UNITS_PER_WORD
3171 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
3173 else if (gr_size
> UNITS_PER_WORD
)
3174 int_regs
+= gr_size
/ UNITS_PER_WORD
;
3176 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3179 /* Integral and aggregates go in general registers. If we have run out of
3180 FR registers, then FP values must also go in general registers. This can
3181 happen when we have a SFmode HFA. */
3182 else if (mode
== TFmode
|| mode
== TCmode
3183 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3185 int byte_size
= ((mode
== BLKmode
)
3186 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3187 if (BYTES_BIG_ENDIAN
3188 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3189 && byte_size
< UNITS_PER_WORD
3192 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3193 gen_rtx_REG (DImode
,
3194 (basereg
+ cum
->words
3197 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3200 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3204 /* If there is a prototype, then FP values go in a FR register when
3205 named, and in a GR register when unnamed. */
3206 else if (cum
->prototype
)
3209 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3210 /* In big-endian mode, an anonymous SFmode value must be represented
3211 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3212 the value into the high half of the general register. */
3213 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
3214 return gen_rtx_PARALLEL (mode
,
3216 gen_rtx_EXPR_LIST (VOIDmode
,
3217 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
3220 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3222 /* If there is no prototype, then FP values go in both FR and GR
3226 /* See comment above. */
3227 enum machine_mode inner_mode
=
3228 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
3230 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3231 gen_rtx_REG (mode
, (FR_ARG_FIRST
3234 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3235 gen_rtx_REG (inner_mode
,
3236 (basereg
+ cum
->words
3240 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3244 /* Return number of words, at the beginning of the argument, that must be
3245 put in registers. 0 is the argument is entirely in registers or entirely
3249 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3250 tree type
, int named ATTRIBUTE_UNUSED
)
3252 int words
= ia64_function_arg_words (type
, mode
);
3253 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3255 /* If all argument slots are used, then it must go on the stack. */
3256 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3259 /* It doesn't matter whether the argument goes in FR or GR regs. If
3260 it fits within the 8 argument slots, then it goes entirely in
3261 registers. If it extends past the last argument slot, then the rest
3262 goes on the stack. */
3264 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3267 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3270 /* Update CUM to point after this argument. This is patterned after
3271 ia64_function_arg. */
3274 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3275 tree type
, int named
)
3277 int words
= ia64_function_arg_words (type
, mode
);
3278 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3279 enum machine_mode hfa_mode
= VOIDmode
;
3281 /* If all arg slots are already full, then there is nothing to do. */
3282 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3285 cum
->words
+= words
+ offset
;
3287 /* Check for and handle homogeneous FP aggregates. */
3289 hfa_mode
= hfa_element_mode (type
, 0);
3291 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3292 and unprototyped hfas are passed specially. */
3293 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3295 int fp_regs
= cum
->fp_regs
;
3296 /* This is the original value of cum->words + offset. */
3297 int int_regs
= cum
->words
- words
;
3298 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3302 /* If prototyped, pass it in FR regs then GR regs.
3303 If not prototyped, pass it in both FR and GR regs.
3305 If this is an SFmode aggregate, then it is possible to run out of
3306 FR regs while GR regs are still left. In that case, we pass the
3307 remaining part in the GR regs. */
3309 /* Fill the FP regs. We do this always. We stop if we reach the end
3310 of the argument, the last FP register, or the last argument slot. */
3312 byte_size
= ((mode
== BLKmode
)
3313 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3314 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3316 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3317 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3320 args_byte_size
+= hfa_size
;
3324 cum
->fp_regs
= fp_regs
;
3327 /* Integral and aggregates go in general registers. If we have run out of
3328 FR registers, then FP values must also go in general registers. This can
3329 happen when we have a SFmode HFA. */
3330 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3331 cum
->int_regs
= cum
->words
;
3333 /* If there is a prototype, then FP values go in a FR register when
3334 named, and in a GR register when unnamed. */
3335 else if (cum
->prototype
)
3338 cum
->int_regs
= cum
->words
;
3340 /* ??? Complex types should not reach here. */
3341 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3343 /* If there is no prototype, then FP values go in both FR and GR
3347 /* ??? Complex types should not reach here. */
3348 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3349 cum
->int_regs
= cum
->words
;
3353 /* Variable sized types are passed by reference. */
3354 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3357 ia64_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3358 enum machine_mode mode ATTRIBUTE_UNUSED
,
3359 tree type
, bool named ATTRIBUTE_UNUSED
)
3361 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3364 /* True if it is OK to do sibling call optimization for the specified
3365 call expression EXP. DECL will be the called function, or NULL if
3366 this is an indirect call. */
3368 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
3370 /* We can't perform a sibcall if the current function has the syscall_linkage
3372 if (lookup_attribute ("syscall_linkage",
3373 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
3376 /* We must always return with our current GP. This means we can
3377 only sibcall to functions defined in the current module. */
3378 return decl
&& (*targetm
.binds_local_p
) (decl
);
3382 /* Implement va_arg. */
3385 ia64_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3387 /* Variable sized types are passed by reference. */
3388 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
3390 tree ptrtype
= build_pointer_type (type
);
3391 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
3392 return build_va_arg_indirect_ref (addr
);
3395 /* Aggregate arguments with alignment larger than 8 bytes start at
3396 the next even boundary. Integer and floating point arguments
3397 do so if they are larger than 8 bytes, whether or not they are
3398 also aligned larger than 8 bytes. */
3399 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
3400 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3402 tree t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3403 build_int_cst (NULL_TREE
, 2 * UNITS_PER_WORD
- 1));
3404 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3405 build_int_cst (NULL_TREE
, -2 * UNITS_PER_WORD
));
3406 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3407 gimplify_and_add (t
, pre_p
);
3410 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3413 /* Return 1 if function return value returned in memory. Return 0 if it is
3417 ia64_return_in_memory (tree valtype
, tree fntype ATTRIBUTE_UNUSED
)
3419 enum machine_mode mode
;
3420 enum machine_mode hfa_mode
;
3421 HOST_WIDE_INT byte_size
;
3423 mode
= TYPE_MODE (valtype
);
3424 byte_size
= GET_MODE_SIZE (mode
);
3425 if (mode
== BLKmode
)
3427 byte_size
= int_size_in_bytes (valtype
);
3432 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3434 hfa_mode
= hfa_element_mode (valtype
, 0);
3435 if (hfa_mode
!= VOIDmode
)
3437 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3439 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3444 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3450 /* Return rtx for register that holds the function return value. */
3453 ia64_function_value (tree valtype
, tree func ATTRIBUTE_UNUSED
)
3455 enum machine_mode mode
;
3456 enum machine_mode hfa_mode
;
3458 mode
= TYPE_MODE (valtype
);
3459 hfa_mode
= hfa_element_mode (valtype
, 0);
3461 if (hfa_mode
!= VOIDmode
)
3469 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3470 byte_size
= ((mode
== BLKmode
)
3471 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3473 for (i
= 0; offset
< byte_size
; i
++)
3475 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3476 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3480 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3482 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
3483 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3486 if (BYTES_BIG_ENDIAN
3487 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
3495 bytesize
= int_size_in_bytes (valtype
);
3496 for (i
= 0; offset
< bytesize
; i
++)
3498 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3499 gen_rtx_REG (DImode
,
3502 offset
+= UNITS_PER_WORD
;
3504 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3507 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3511 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3512 We need to emit DTP-relative relocations. */
3515 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
3519 fputs ("\tdata8.ua\t@dtprel(", file
);
3520 output_addr_const (file
, x
);
3524 /* Print a memory address as an operand to reference that memory location. */
3526 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3527 also call this from ia64_print_operand for memory addresses. */
3530 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
3531 rtx address ATTRIBUTE_UNUSED
)
3535 /* Print an operand to an assembler instruction.
3536 C Swap and print a comparison operator.
3537 D Print an FP comparison operator.
3538 E Print 32 - constant, for SImode shifts as extract.
3539 e Print 64 - constant, for DImode rotates.
3540 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3541 a floating point register emitted normally.
3542 I Invert a predicate register by adding 1.
3543 J Select the proper predicate register for a condition.
3544 j Select the inverse predicate register for a condition.
3545 O Append .acq for volatile load.
3546 P Postincrement of a MEM.
3547 Q Append .rel for volatile store.
3548 S Shift amount for shladd instruction.
3549 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3550 for Intel assembler.
3551 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3552 for Intel assembler.
3553 r Print register name, or constant 0 as r0. HP compatibility for
3556 ia64_print_operand (FILE * file
, rtx x
, int code
)
3563 /* Handled below. */
3568 enum rtx_code c
= swap_condition (GET_CODE (x
));
3569 fputs (GET_RTX_NAME (c
), file
);
3574 switch (GET_CODE (x
))
3586 str
= GET_RTX_NAME (GET_CODE (x
));
3593 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3597 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3601 if (x
== CONST0_RTX (GET_MODE (x
)))
3602 str
= reg_names
[FR_REG (0)];
3603 else if (x
== CONST1_RTX (GET_MODE (x
)))
3604 str
= reg_names
[FR_REG (1)];
3605 else if (GET_CODE (x
) == REG
)
3606 str
= reg_names
[REGNO (x
)];
3613 fputs (reg_names
[REGNO (x
) + 1], file
);
3619 unsigned int regno
= REGNO (XEXP (x
, 0));
3620 if (GET_CODE (x
) == EQ
)
3624 fputs (reg_names
[regno
], file
);
3629 if (MEM_VOLATILE_P (x
))
3630 fputs(".acq", file
);
3635 HOST_WIDE_INT value
;
3637 switch (GET_CODE (XEXP (x
, 0)))
3643 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3644 if (GET_CODE (x
) == CONST_INT
)
3646 else if (GET_CODE (x
) == REG
)
3648 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3656 value
= GET_MODE_SIZE (GET_MODE (x
));
3660 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3664 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
3669 if (MEM_VOLATILE_P (x
))
3670 fputs(".rel", file
);
3674 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3678 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3680 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3686 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3688 const char *prefix
= "0x";
3689 if (INTVAL (x
) & 0x80000000)
3691 fprintf (file
, "0xffffffff");
3694 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3700 /* If this operand is the constant zero, write it as register zero.
3701 Any register, zero, or CONST_INT value is OK here. */
3702 if (GET_CODE (x
) == REG
)
3703 fputs (reg_names
[REGNO (x
)], file
);
3704 else if (x
== CONST0_RTX (GET_MODE (x
)))
3706 else if (GET_CODE (x
) == CONST_INT
)
3707 output_addr_const (file
, x
);
3709 output_operand_lossage ("invalid %%r value");
3716 /* For conditional branches, returns or calls, substitute
3717 sptk, dptk, dpnt, or spnt for %s. */
3718 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3721 int pred_val
= INTVAL (XEXP (x
, 0));
3723 /* Guess top and bottom 10% statically predicted. */
3724 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3726 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3728 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3733 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3738 fputs (which
, file
);
3743 x
= current_insn_predicate
;
3746 unsigned int regno
= REGNO (XEXP (x
, 0));
3747 if (GET_CODE (x
) == EQ
)
3749 fprintf (file
, "(%s) ", reg_names
[regno
]);
3754 output_operand_lossage ("ia64_print_operand: unknown code");
3758 switch (GET_CODE (x
))
3760 /* This happens for the spill/restore instructions. */
3765 /* ... fall through ... */
3768 fputs (reg_names
[REGNO (x
)], file
);
3773 rtx addr
= XEXP (x
, 0);
3774 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
3775 addr
= XEXP (addr
, 0);
3776 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3781 output_addr_const (file
, x
);
3788 /* Compute a (partial) cost for rtx X. Return true if the complete
3789 cost has been computed, and false if subexpressions should be
3790 scanned. In either case, *TOTAL contains the cost result. */
3791 /* ??? This is incomplete. */
3794 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
3802 *total
= CONST_OK_FOR_J (INTVAL (x
)) ? 0 : COSTS_N_INSNS (1);
3805 if (CONST_OK_FOR_I (INTVAL (x
)))
3807 else if (CONST_OK_FOR_J (INTVAL (x
)))
3810 *total
= COSTS_N_INSNS (1);
3813 if (CONST_OK_FOR_K (INTVAL (x
)) || CONST_OK_FOR_L (INTVAL (x
)))
3816 *total
= COSTS_N_INSNS (1);
3821 *total
= COSTS_N_INSNS (1);
3827 *total
= COSTS_N_INSNS (3);
3831 /* For multiplies wider than HImode, we have to go to the FPU,
3832 which normally involves copies. Plus there's the latency
3833 of the multiply itself, and the latency of the instructions to
3834 transfer integer regs to FP regs. */
3835 /* ??? Check for FP mode. */
3836 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
3837 *total
= COSTS_N_INSNS (10);
3839 *total
= COSTS_N_INSNS (2);
3847 *total
= COSTS_N_INSNS (1);
3854 /* We make divide expensive, so that divide-by-constant will be
3855 optimized to a multiply. */
3856 *total
= COSTS_N_INSNS (60);
3864 /* Calculate the cost of moving data from a register in class FROM to
3865 one in class TO, using MODE. */
3868 ia64_register_move_cost (enum machine_mode mode
, enum reg_class from
,
3871 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3872 if (to
== ADDL_REGS
)
3874 if (from
== ADDL_REGS
)
3877 /* All costs are symmetric, so reduce cases by putting the
3878 lower number class as the destination. */
3881 enum reg_class tmp
= to
;
3882 to
= from
, from
= tmp
;
3885 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3886 so that we get secondary memory reloads. Between FR_REGS,
3887 we have to make this at least as expensive as MEMORY_MOVE_COST
3888 to avoid spectacularly poor register class preferencing. */
3891 if (to
!= GR_REGS
|| from
!= GR_REGS
)
3892 return MEMORY_MOVE_COST (mode
, to
, 0);
3900 /* Moving between PR registers takes two insns. */
3901 if (from
== PR_REGS
)
3903 /* Moving between PR and anything but GR is impossible. */
3904 if (from
!= GR_REGS
)
3905 return MEMORY_MOVE_COST (mode
, to
, 0);
3909 /* Moving between BR and anything but GR is impossible. */
3910 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
3911 return MEMORY_MOVE_COST (mode
, to
, 0);
3916 /* Moving between AR and anything but GR is impossible. */
3917 if (from
!= GR_REGS
)
3918 return MEMORY_MOVE_COST (mode
, to
, 0);
3923 case GR_AND_FR_REGS
:
3924 case GR_AND_BR_REGS
:
3935 /* This function returns the register class required for a secondary
3936 register when copying between one of the registers in CLASS, and X,
3937 using MODE. A return value of NO_REGS means that no secondary register
3941 ia64_secondary_reload_class (enum reg_class
class,
3942 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3946 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3947 regno
= true_regnum (x
);
3954 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3955 interaction. We end up with two pseudos with overlapping lifetimes
3956 both of which are equiv to the same constant, and both which need
3957 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3958 changes depending on the path length, which means the qty_first_reg
3959 check in make_regs_eqv can give different answers at different times.
3960 At some point I'll probably need a reload_indi pattern to handle
3963 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3964 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3965 non-general registers for good measure. */
3966 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
3969 /* This is needed if a pseudo used as a call_operand gets spilled to a
3971 if (GET_CODE (x
) == MEM
)
3976 /* Need to go through general registers to get to other class regs. */
3977 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
3980 /* This can happen when a paradoxical subreg is an operand to the
3982 /* ??? This shouldn't be necessary after instruction scheduling is
3983 enabled, because paradoxical subregs are not accepted by
3984 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3985 stop the paradoxical subreg stupidity in the *_operand functions
3987 if (GET_CODE (x
) == MEM
3988 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3989 || GET_MODE (x
) == QImode
))
3992 /* This can happen because of the ior/and/etc patterns that accept FP
3993 registers as operands. If the third operand is a constant, then it
3994 needs to be reloaded into a FP register. */
3995 if (GET_CODE (x
) == CONST_INT
)
3998 /* This can happen because of register elimination in a muldi3 insn.
3999 E.g. `26107 * (unsigned long)&u'. */
4000 if (GET_CODE (x
) == PLUS
)
4005 /* ??? This happens if we cse/gcse a BImode value across a call,
4006 and the function has a nonlocal goto. This is because global
4007 does not allocate call crossing pseudos to hard registers when
4008 current_function_has_nonlocal_goto is true. This is relatively
4009 common for C++ programs that use exceptions. To reproduce,
4010 return NO_REGS and compile libstdc++. */
4011 if (GET_CODE (x
) == MEM
)
4014 /* This can happen when we take a BImode subreg of a DImode value,
4015 and that DImode value winds up in some non-GR register. */
4016 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4028 /* Emit text to declare externally defined variables and functions, because
4029 the Intel assembler does not support undefined externals. */
4032 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
4034 int save_referenced
;
4036 /* GNU as does not need anything here, but the HP linker does need
4037 something for external functions. */
4041 || TREE_CODE (decl
) != FUNCTION_DECL
4042 || strstr (name
, "__builtin_") == name
))
4045 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4046 the linker when we do this, so we need to be careful not to do this for
4047 builtin functions which have no library equivalent. Unfortunately, we
4048 can't tell here whether or not a function will actually be called by
4049 expand_expr, so we pull in library functions even if we may not need
4051 if (! strcmp (name
, "__builtin_next_arg")
4052 || ! strcmp (name
, "alloca")
4053 || ! strcmp (name
, "__builtin_constant_p")
4054 || ! strcmp (name
, "__builtin_args_info"))
4058 ia64_hpux_add_extern_decl (decl
);
4061 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4063 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4064 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4065 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
4066 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4067 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4071 /* Parse the -mfixed-range= option string. */
4074 fix_range (const char *const_str
)
4077 char *str
, *dash
, *comma
;
4079 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4080 REG2 are either register names or register numbers. The effect
4081 of this option is to mark the registers in the range from REG1 to
4082 REG2 as ``fixed'' so they won't be used by the compiler. This is
4083 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4085 i
= strlen (const_str
);
4086 str
= (char *) alloca (i
+ 1);
4087 memcpy (str
, const_str
, i
+ 1);
4091 dash
= strchr (str
, '-');
4094 warning ("value of -mfixed-range must have form REG1-REG2");
4099 comma
= strchr (dash
+ 1, ',');
4103 first
= decode_reg_name (str
);
4106 warning ("unknown register name: %s", str
);
4110 last
= decode_reg_name (dash
+ 1);
4113 warning ("unknown register name: %s", dash
+ 1);
4121 warning ("%s-%s is an empty range", str
, dash
+ 1);
4125 for (i
= first
; i
<= last
; ++i
)
4126 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4136 static struct machine_function
*
4137 ia64_init_machine_status (void)
4139 return ggc_alloc_cleared (sizeof (struct machine_function
));
4142 /* Handle TARGET_OPTIONS switches. */
4145 ia64_override_options (void)
4149 const char *const name
; /* processor name or nickname. */
4150 const enum processor_type processor
;
4152 const processor_alias_table
[] =
4154 {"itanium", PROCESSOR_ITANIUM
},
4155 {"itanium1", PROCESSOR_ITANIUM
},
4156 {"merced", PROCESSOR_ITANIUM
},
4157 {"itanium2", PROCESSOR_ITANIUM2
},
4158 {"mckinley", PROCESSOR_ITANIUM2
},
4161 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
4164 if (TARGET_AUTO_PIC
)
4165 target_flags
|= MASK_CONST_GP
;
4167 if (TARGET_INLINE_FLOAT_DIV_LAT
&& TARGET_INLINE_FLOAT_DIV_THR
)
4169 if ((target_flags_explicit
& MASK_INLINE_FLOAT_DIV_LAT
)
4170 && (target_flags_explicit
& MASK_INLINE_FLOAT_DIV_THR
))
4172 warning ("cannot optimize floating point division for both latency and throughput");
4173 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4177 if (target_flags_explicit
& MASK_INLINE_FLOAT_DIV_THR
)
4178 target_flags
&= ~MASK_INLINE_FLOAT_DIV_LAT
;
4180 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4184 if (TARGET_INLINE_INT_DIV_LAT
&& TARGET_INLINE_INT_DIV_THR
)
4186 if ((target_flags_explicit
& MASK_INLINE_INT_DIV_LAT
)
4187 && (target_flags_explicit
& MASK_INLINE_INT_DIV_THR
))
4189 warning ("cannot optimize integer division for both latency and throughput");
4190 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4194 if (target_flags_explicit
& MASK_INLINE_INT_DIV_THR
)
4195 target_flags
&= ~MASK_INLINE_INT_DIV_LAT
;
4197 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4201 if (TARGET_INLINE_SQRT_LAT
&& TARGET_INLINE_SQRT_THR
)
4203 if ((target_flags_explicit
& MASK_INLINE_SQRT_LAT
)
4204 && (target_flags_explicit
& MASK_INLINE_SQRT_THR
))
4206 warning ("cannot optimize square root for both latency and throughput");
4207 target_flags
&= ~MASK_INLINE_SQRT_THR
;
4211 if (target_flags_explicit
& MASK_INLINE_SQRT_THR
)
4212 target_flags
&= ~MASK_INLINE_SQRT_LAT
;
4214 target_flags
&= ~MASK_INLINE_SQRT_THR
;
4218 if (TARGET_INLINE_SQRT_LAT
)
4220 warning ("not yet implemented: latency-optimized inline square root");
4221 target_flags
&= ~MASK_INLINE_SQRT_LAT
;
4224 if (ia64_fixed_range_string
)
4225 fix_range (ia64_fixed_range_string
);
4227 if (ia64_tls_size_string
)
4230 unsigned long tmp
= strtoul (ia64_tls_size_string
, &end
, 10);
4231 if (*end
|| (tmp
!= 14 && tmp
!= 22 && tmp
!= 64))
4232 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string
);
4234 ia64_tls_size
= tmp
;
4237 if (!ia64_tune_string
)
4238 ia64_tune_string
= "itanium2";
4240 for (i
= 0; i
< pta_size
; i
++)
4241 if (! strcmp (ia64_tune_string
, processor_alias_table
[i
].name
))
4243 ia64_tune
= processor_alias_table
[i
].processor
;
4248 error ("bad value (%s) for -tune= switch", ia64_tune_string
);
4250 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4251 flag_schedule_insns_after_reload
= 0;
4253 /* Variable tracking should be run after all optimizations which change order
4254 of insns. It also needs a valid CFG. */
4255 ia64_flag_var_tracking
= flag_var_tracking
;
4256 flag_var_tracking
= 0;
4258 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4260 init_machine_status
= ia64_init_machine_status
;
4263 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
4264 static enum attr_type
ia64_safe_type (rtx
);
4266 static enum attr_itanium_class
4267 ia64_safe_itanium_class (rtx insn
)
4269 if (recog_memoized (insn
) >= 0)
4270 return get_attr_itanium_class (insn
);
4272 return ITANIUM_CLASS_UNKNOWN
;
4275 static enum attr_type
4276 ia64_safe_type (rtx insn
)
4278 if (recog_memoized (insn
) >= 0)
4279 return get_attr_type (insn
);
4281 return TYPE_UNKNOWN
;
4284 /* The following collection of routines emit instruction group stop bits as
4285 necessary to avoid dependencies. */
4287 /* Need to track some additional registers as far as serialization is
4288 concerned so we can properly handle br.call and br.ret. We could
4289 make these registers visible to gcc, but since these registers are
4290 never explicitly used in gcc generated code, it seems wasteful to
4291 do so (plus it would make the call and return patterns needlessly
4293 #define REG_RP (BR_REG (0))
4294 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4295 /* This is used for volatile asms which may require a stop bit immediately
4296 before and after them. */
4297 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4298 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4299 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4301 /* For each register, we keep track of how it has been written in the
4302 current instruction group.
4304 If a register is written unconditionally (no qualifying predicate),
4305 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4307 If a register is written if its qualifying predicate P is true, we
4308 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4309 may be written again by the complement of P (P^1) and when this happens,
4310 WRITE_COUNT gets set to 2.
4312 The result of this is that whenever an insn attempts to write a register
4313 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4315 If a predicate register is written by a floating-point insn, we set
4316 WRITTEN_BY_FP to true.
4318 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4319 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4321 struct reg_write_state
4323 unsigned int write_count
: 2;
4324 unsigned int first_pred
: 16;
4325 unsigned int written_by_fp
: 1;
4326 unsigned int written_by_and
: 1;
4327 unsigned int written_by_or
: 1;
4330 /* Cumulative info for the current instruction group. */
4331 struct reg_write_state rws_sum
[NUM_REGS
];
4332 /* Info for the current instruction. This gets copied to rws_sum after a
4333 stop bit is emitted. */
4334 struct reg_write_state rws_insn
[NUM_REGS
];
4336 /* Indicates whether this is the first instruction after a stop bit,
4337 in which case we don't need another stop bit. Without this, we hit
4338 the abort in ia64_variable_issue when scheduling an alloc. */
4339 static int first_instruction
;
4341 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4342 RTL for one instruction. */
4345 unsigned int is_write
: 1; /* Is register being written? */
4346 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4347 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4348 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4349 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4350 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4353 static void rws_update (struct reg_write_state
*, int, struct reg_flags
, int);
4354 static int rws_access_regno (int, struct reg_flags
, int);
4355 static int rws_access_reg (rtx
, struct reg_flags
, int);
4356 static void update_set_flags (rtx
, struct reg_flags
*, int *, rtx
*);
4357 static int set_src_needs_barrier (rtx
, struct reg_flags
, int, rtx
);
4358 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
4359 static void init_insn_group_barriers (void);
4360 static int group_barrier_needed_p (rtx
);
4361 static int safe_group_barrier_needed_p (rtx
);
4363 /* Update *RWS for REGNO, which is being written by the current instruction,
4364 with predicate PRED, and associated register flags in FLAGS. */
4367 rws_update (struct reg_write_state
*rws
, int regno
, struct reg_flags flags
, int pred
)
4370 rws
[regno
].write_count
++;
4372 rws
[regno
].write_count
= 2;
4373 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4374 /* ??? Not tracking and/or across differing predicates. */
4375 rws
[regno
].written_by_and
= flags
.is_and
;
4376 rws
[regno
].written_by_or
= flags
.is_or
;
4377 rws
[regno
].first_pred
= pred
;
4380 /* Handle an access to register REGNO of type FLAGS using predicate register
4381 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4382 a dependency with an earlier instruction in the same group. */
4385 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
4387 int need_barrier
= 0;
4389 if (regno
>= NUM_REGS
)
4392 if (! PR_REGNO_P (regno
))
4393 flags
.is_and
= flags
.is_or
= 0;
4399 /* One insn writes same reg multiple times? */
4400 if (rws_insn
[regno
].write_count
> 0)
4403 /* Update info for current instruction. */
4404 rws_update (rws_insn
, regno
, flags
, pred
);
4405 write_count
= rws_sum
[regno
].write_count
;
4407 switch (write_count
)
4410 /* The register has not been written yet. */
4411 rws_update (rws_sum
, regno
, flags
, pred
);
4415 /* The register has been written via a predicate. If this is
4416 not a complementary predicate, then we need a barrier. */
4417 /* ??? This assumes that P and P+1 are always complementary
4418 predicates for P even. */
4419 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4421 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4423 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4425 rws_update (rws_sum
, regno
, flags
, pred
);
4429 /* The register has been unconditionally written already. We
4431 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4433 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4437 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4438 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4447 if (flags
.is_branch
)
4449 /* Branches have several RAW exceptions that allow to avoid
4452 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4453 /* RAW dependencies on branch regs are permissible as long
4454 as the writer is a non-branch instruction. Since we
4455 never generate code that uses a branch register written
4456 by a branch instruction, handling this case is
4460 if (REGNO_REG_CLASS (regno
) == PR_REGS
4461 && ! rws_sum
[regno
].written_by_fp
)
4462 /* The predicates of a branch are available within the
4463 same insn group as long as the predicate was written by
4464 something other than a floating-point instruction. */
4468 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4470 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4473 switch (rws_sum
[regno
].write_count
)
4476 /* The register has not been written yet. */
4480 /* The register has been written via a predicate. If this is
4481 not a complementary predicate, then we need a barrier. */
4482 /* ??? This assumes that P and P+1 are always complementary
4483 predicates for P even. */
4484 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4489 /* The register has been unconditionally written already. We
4499 return need_barrier
;
4503 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
4505 int regno
= REGNO (reg
);
4506 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4509 return rws_access_regno (regno
, flags
, pred
);
4512 int need_barrier
= 0;
4514 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4515 return need_barrier
;
4519 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4520 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4523 update_set_flags (rtx x
, struct reg_flags
*pflags
, int *ppred
, rtx
*pcond
)
4525 rtx src
= SET_SRC (x
);
4529 switch (GET_CODE (src
))
4535 if (SET_DEST (x
) == pc_rtx
)
4536 /* X is a conditional branch. */
4540 int is_complemented
= 0;
4542 /* X is a conditional move. */
4543 rtx cond
= XEXP (src
, 0);
4544 if (GET_CODE (cond
) == EQ
)
4545 is_complemented
= 1;
4546 cond
= XEXP (cond
, 0);
4547 if (GET_CODE (cond
) != REG
4548 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4551 if (XEXP (src
, 1) == SET_DEST (x
)
4552 || XEXP (src
, 2) == SET_DEST (x
))
4554 /* X is a conditional move that conditionally writes the
4557 /* We need another complement in this case. */
4558 if (XEXP (src
, 1) == SET_DEST (x
))
4559 is_complemented
= ! is_complemented
;
4561 *ppred
= REGNO (cond
);
4562 if (is_complemented
)
4566 /* ??? If this is a conditional write to the dest, then this
4567 instruction does not actually read one source. This probably
4568 doesn't matter, because that source is also the dest. */
4569 /* ??? Multiple writes to predicate registers are allowed
4570 if they are all AND type compares, or if they are all OR
4571 type compares. We do not generate such instructions
4574 /* ... fall through ... */
4577 if (COMPARISON_P (src
)
4578 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4579 /* Set pflags->is_fp to 1 so that we know we're dealing
4580 with a floating point comparison when processing the
4581 destination of the SET. */
4584 /* Discover if this is a parallel comparison. We only handle
4585 and.orcm and or.andcm at present, since we must retain a
4586 strict inverse on the predicate pair. */
4587 else if (GET_CODE (src
) == AND
)
4589 else if (GET_CODE (src
) == IOR
)
4596 /* Subroutine of rtx_needs_barrier; this function determines whether the
4597 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4598 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4602 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
, rtx cond
)
4604 int need_barrier
= 0;
4606 rtx src
= SET_SRC (x
);
4608 if (GET_CODE (src
) == CALL
)
4609 /* We don't need to worry about the result registers that
4610 get written by subroutine call. */
4611 return rtx_needs_barrier (src
, flags
, pred
);
4612 else if (SET_DEST (x
) == pc_rtx
)
4614 /* X is a conditional branch. */
4615 /* ??? This seems redundant, as the caller sets this bit for
4617 flags
.is_branch
= 1;
4618 return rtx_needs_barrier (src
, flags
, pred
);
4621 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4623 /* This instruction unconditionally uses a predicate register. */
4625 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4628 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4630 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4631 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4632 dst
= XEXP (dst
, 0);
4634 return need_barrier
;
4637 /* Handle an access to rtx X of type FLAGS using predicate register
4638 PRED. Return 1 if this access creates a dependency with an earlier
4639 instruction in the same group. */
4642 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
4645 int is_complemented
= 0;
4646 int need_barrier
= 0;
4647 const char *format_ptr
;
4648 struct reg_flags new_flags
;
4656 switch (GET_CODE (x
))
4659 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4660 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4661 if (GET_CODE (SET_SRC (x
)) != CALL
)
4663 new_flags
.is_write
= 1;
4664 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4669 new_flags
.is_write
= 0;
4670 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4672 /* Avoid multiple register writes, in case this is a pattern with
4673 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4674 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4676 new_flags
.is_write
= 1;
4677 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4678 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4679 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4684 /* X is a predicated instruction. */
4686 cond
= COND_EXEC_TEST (x
);
4689 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4691 if (GET_CODE (cond
) == EQ
)
4692 is_complemented
= 1;
4693 cond
= XEXP (cond
, 0);
4694 if (GET_CODE (cond
) != REG
4695 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4697 pred
= REGNO (cond
);
4698 if (is_complemented
)
4701 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4702 return need_barrier
;
4706 /* Clobber & use are for earlier compiler-phases only. */
4711 /* We always emit stop bits for traditional asms. We emit stop bits
4712 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4713 if (GET_CODE (x
) != ASM_OPERANDS
4714 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4716 /* Avoid writing the register multiple times if we have multiple
4717 asm outputs. This avoids an abort in rws_access_reg. */
4718 if (! rws_insn
[REG_VOLATILE
].write_count
)
4720 new_flags
.is_write
= 1;
4721 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4726 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4727 We cannot just fall through here since then we would be confused
4728 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4729 traditional asms unlike their normal usage. */
4731 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4732 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4737 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4739 rtx pat
= XVECEXP (x
, 0, i
);
4740 if (GET_CODE (pat
) == SET
)
4742 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4743 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4745 else if (GET_CODE (pat
) == USE
4746 || GET_CODE (pat
) == CALL
4747 || GET_CODE (pat
) == ASM_OPERANDS
)
4748 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4749 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4752 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4754 rtx pat
= XVECEXP (x
, 0, i
);
4755 if (GET_CODE (pat
) == SET
)
4757 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4759 new_flags
.is_write
= 1;
4760 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4764 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4765 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4773 if (REGNO (x
) == AR_UNAT_REGNUM
)
4775 for (i
= 0; i
< 64; ++i
)
4776 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4779 need_barrier
= rws_access_reg (x
, flags
, pred
);
4783 /* Find the regs used in memory address computation. */
4784 new_flags
.is_write
= 0;
4785 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4788 case CONST_INT
: case CONST_DOUBLE
:
4789 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4792 /* Operators with side-effects. */
4793 case POST_INC
: case POST_DEC
:
4794 if (GET_CODE (XEXP (x
, 0)) != REG
)
4797 new_flags
.is_write
= 0;
4798 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4799 new_flags
.is_write
= 1;
4800 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4804 if (GET_CODE (XEXP (x
, 0)) != REG
)
4807 new_flags
.is_write
= 0;
4808 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4809 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4810 new_flags
.is_write
= 1;
4811 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4814 /* Handle common unary and binary ops for efficiency. */
4815 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4816 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4817 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4818 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4819 case NE
: case EQ
: case GE
: case GT
: case LE
:
4820 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4821 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4822 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4825 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4826 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4827 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4828 case SQRT
: case FFS
: case POPCOUNT
:
4829 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4833 switch (XINT (x
, 1))
4835 case UNSPEC_LTOFF_DTPMOD
:
4836 case UNSPEC_LTOFF_DTPREL
:
4838 case UNSPEC_LTOFF_TPREL
:
4840 case UNSPEC_PRED_REL_MUTEX
:
4841 case UNSPEC_PIC_CALL
:
4843 case UNSPEC_FETCHADD_ACQ
:
4844 case UNSPEC_BSP_VALUE
:
4845 case UNSPEC_FLUSHRS
:
4846 case UNSPEC_BUNDLE_SELECTOR
:
4849 case UNSPEC_GR_SPILL
:
4850 case UNSPEC_GR_RESTORE
:
4852 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4853 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4855 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4856 new_flags
.is_write
= (XINT (x
, 1) == 1);
4857 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4862 case UNSPEC_FR_SPILL
:
4863 case UNSPEC_FR_RESTORE
:
4864 case UNSPEC_GETF_EXP
:
4865 case UNSPEC_SETF_EXP
:
4867 case UNSPEC_FR_SQRT_RECIP_APPROX
:
4868 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4871 case UNSPEC_FR_RECIP_APPROX
:
4872 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4873 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4876 case UNSPEC_CMPXCHG_ACQ
:
4877 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4878 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4886 case UNSPEC_VOLATILE
:
4887 switch (XINT (x
, 1))
4890 /* Alloc must always be the first instruction of a group.
4891 We force this by always returning true. */
4892 /* ??? We might get better scheduling if we explicitly check for
4893 input/local/output register dependencies, and modify the
4894 scheduler so that alloc is always reordered to the start of
4895 the current group. We could then eliminate all of the
4896 first_instruction code. */
4897 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4899 new_flags
.is_write
= 1;
4900 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4903 case UNSPECV_SET_BSP
:
4907 case UNSPECV_BLOCKAGE
:
4908 case UNSPECV_INSN_GROUP_BARRIER
:
4910 case UNSPECV_PSAC_ALL
:
4911 case UNSPECV_PSAC_NORMAL
:
4920 new_flags
.is_write
= 0;
4921 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4922 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4924 new_flags
.is_write
= 1;
4925 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4926 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4930 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4931 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4932 switch (format_ptr
[i
])
4934 case '0': /* unused field */
4935 case 'i': /* integer */
4936 case 'n': /* note */
4937 case 'w': /* wide integer */
4938 case 's': /* pointer to string */
4939 case 'S': /* optional pointer to string */
4943 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4948 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4949 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4958 return need_barrier
;
4961 /* Clear out the state for group_barrier_needed_p at the start of a
4962 sequence of insns. */
4965 init_insn_group_barriers (void)
4967 memset (rws_sum
, 0, sizeof (rws_sum
));
4968 first_instruction
= 1;
4971 /* Given the current state, recorded by previous calls to this function,
4972 determine whether a group barrier (a stop bit) is necessary before INSN.
4973 Return nonzero if so. */
4976 group_barrier_needed_p (rtx insn
)
4979 int need_barrier
= 0;
4980 struct reg_flags flags
;
4982 memset (&flags
, 0, sizeof (flags
));
4983 switch (GET_CODE (insn
))
4989 /* A barrier doesn't imply an instruction group boundary. */
4993 memset (rws_insn
, 0, sizeof (rws_insn
));
4997 flags
.is_branch
= 1;
4998 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4999 memset (rws_insn
, 0, sizeof (rws_insn
));
5001 /* Don't bundle a call following another call. */
5002 if ((pat
= prev_active_insn (insn
))
5003 && GET_CODE (pat
) == CALL_INSN
)
5009 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
5013 flags
.is_branch
= 1;
5015 /* Don't bundle a jump following a call. */
5016 if ((pat
= prev_active_insn (insn
))
5017 && GET_CODE (pat
) == CALL_INSN
)
5025 if (GET_CODE (PATTERN (insn
)) == USE
5026 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5027 /* Don't care about USE and CLOBBER "insns"---those are used to
5028 indicate to the optimizer that it shouldn't get rid of
5029 certain operations. */
5032 pat
= PATTERN (insn
);
5034 /* Ug. Hack hacks hacked elsewhere. */
5035 switch (recog_memoized (insn
))
5037 /* We play dependency tricks with the epilogue in order
5038 to get proper schedules. Undo this for dv analysis. */
5039 case CODE_FOR_epilogue_deallocate_stack
:
5040 case CODE_FOR_prologue_allocate_stack
:
5041 pat
= XVECEXP (pat
, 0, 0);
5044 /* The pattern we use for br.cloop confuses the code above.
5045 The second element of the vector is representative. */
5046 case CODE_FOR_doloop_end_internal
:
5047 pat
= XVECEXP (pat
, 0, 1);
5050 /* Doesn't generate code. */
5051 case CODE_FOR_pred_rel_mutex
:
5052 case CODE_FOR_prologue_use
:
5059 memset (rws_insn
, 0, sizeof (rws_insn
));
5060 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5062 /* Check to see if the previous instruction was a volatile
5065 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5072 if (first_instruction
&& INSN_P (insn
)
5073 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
5074 && GET_CODE (PATTERN (insn
)) != USE
5075 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
5078 first_instruction
= 0;
5081 return need_barrier
;
5084 /* Like group_barrier_needed_p, but do not clobber the current state. */
5087 safe_group_barrier_needed_p (rtx insn
)
5089 struct reg_write_state rws_saved
[NUM_REGS
];
5090 int saved_first_instruction
;
5093 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5094 saved_first_instruction
= first_instruction
;
5096 t
= group_barrier_needed_p (insn
);
5098 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5099 first_instruction
= saved_first_instruction
;
5104 /* Scan the current function and insert stop bits as necessary to
5105 eliminate dependencies. This function assumes that a final
5106 instruction scheduling pass has been run which has already
5107 inserted most of the necessary stop bits. This function only
5108 inserts new ones at basic block boundaries, since these are
5109 invisible to the scheduler. */
5112 emit_insn_group_barriers (FILE *dump
)
5116 int insns_since_last_label
= 0;
5118 init_insn_group_barriers ();
5120 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5122 if (GET_CODE (insn
) == CODE_LABEL
)
5124 if (insns_since_last_label
)
5126 insns_since_last_label
= 0;
5128 else if (GET_CODE (insn
) == NOTE
5129 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5131 if (insns_since_last_label
)
5133 insns_since_last_label
= 0;
5135 else if (GET_CODE (insn
) == INSN
5136 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5137 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5139 init_insn_group_barriers ();
5142 else if (INSN_P (insn
))
5144 insns_since_last_label
= 1;
5146 if (group_barrier_needed_p (insn
))
5151 fprintf (dump
, "Emitting stop before label %d\n",
5152 INSN_UID (last_label
));
5153 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5156 init_insn_group_barriers ();
5164 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5165 This function has to emit all necessary group barriers. */
5168 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
5172 init_insn_group_barriers ();
5174 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5176 if (GET_CODE (insn
) == BARRIER
)
5178 rtx last
= prev_active_insn (insn
);
5182 if (GET_CODE (last
) == JUMP_INSN
5183 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5184 last
= prev_active_insn (last
);
5185 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5186 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5188 init_insn_group_barriers ();
5190 else if (INSN_P (insn
))
5192 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5193 init_insn_group_barriers ();
5194 else if (group_barrier_needed_p (insn
))
5196 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5197 init_insn_group_barriers ();
5198 group_barrier_needed_p (insn
);
5205 static int errata_find_address_regs (rtx
*, void *);
5206 static void errata_emit_nops (rtx
);
5207 static void fixup_errata (void);
5209 /* This structure is used to track some details about the previous insns
5210 groups so we can determine if it may be necessary to insert NOPs to
5211 workaround hardware errata. */
5214 HARD_REG_SET p_reg_set
;
5215 HARD_REG_SET gr_reg_conditionally_set
;
5218 /* Index into the last_group array. */
5219 static int group_idx
;
5221 /* Called through for_each_rtx; determines if a hard register that was
5222 conditionally set in the previous group is used as an address register.
5223 It ensures that for_each_rtx returns 1 in that case. */
5225 errata_find_address_regs (rtx
*xp
, void *data ATTRIBUTE_UNUSED
)
5228 if (GET_CODE (x
) != MEM
)
5231 if (GET_CODE (x
) == POST_MODIFY
)
5233 if (GET_CODE (x
) == REG
)
5235 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5236 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5244 /* Called for each insn; this function keeps track of the state in
5245 last_group and emits additional NOPs if necessary to work around
5246 an Itanium A/B step erratum. */
5248 errata_emit_nops (rtx insn
)
5250 struct group
*this_group
= last_group
+ group_idx
;
5251 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5252 rtx pat
= PATTERN (insn
);
5253 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5254 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5255 enum attr_type type
;
5258 if (GET_CODE (real_pat
) == USE
5259 || GET_CODE (real_pat
) == CLOBBER
5260 || GET_CODE (real_pat
) == ASM_INPUT
5261 || GET_CODE (real_pat
) == ADDR_VEC
5262 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5263 || asm_noperands (PATTERN (insn
)) >= 0)
5266 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5269 if (GET_CODE (set
) == PARALLEL
)
5272 set
= XVECEXP (real_pat
, 0, 0);
5273 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5274 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5275 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5282 if (set
&& GET_CODE (set
) != SET
)
5285 type
= get_attr_type (insn
);
5288 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5289 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5291 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5292 && REG_P (SET_DEST (set
))
5293 && GET_CODE (SET_SRC (set
)) != PLUS
5294 && GET_CODE (SET_SRC (set
)) != MINUS
5295 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5296 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5297 && (GET_CODE (SET_SRC (set
)) != MEM
5298 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5299 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5301 if (!COMPARISON_P (cond
)
5302 || !REG_P (XEXP (cond
, 0)))
5305 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5306 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5308 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5310 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5311 emit_insn_before (gen_nop (), insn
);
5312 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5314 memset (last_group
, 0, sizeof last_group
);
5318 /* Emit extra nops if they are required to work around hardware errata. */
5325 if (! TARGET_B_STEP
)
5329 memset (last_group
, 0, sizeof last_group
);
5331 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5336 if (ia64_safe_type (insn
) == TYPE_S
)
5339 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5342 errata_emit_nops (insn
);
5347 /* Instruction scheduling support. */
5349 #define NR_BUNDLES 10
5351 /* A list of names of all available bundles. */
5353 static const char *bundle_name
[NR_BUNDLES
] =
5359 #if NR_BUNDLES == 10
5369 /* Nonzero if we should insert stop bits into the schedule. */
5371 int ia64_final_schedule
= 0;
5373 /* Codes of the corresponding quieryied units: */
5375 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
5376 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
5378 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
5379 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
5381 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
5383 /* The following variable value is an insn group barrier. */
5385 static rtx dfa_stop_insn
;
5387 /* The following variable value is the last issued insn. */
5389 static rtx last_scheduled_insn
;
5391 /* The following variable value is size of the DFA state. */
5393 static size_t dfa_state_size
;
5395 /* The following variable value is pointer to a DFA state used as
5396 temporary variable. */
5398 static state_t temp_dfa_state
= NULL
;
5400 /* The following variable value is DFA state after issuing the last
5403 static state_t prev_cycle_state
= NULL
;
5405 /* The following array element values are TRUE if the corresponding
5406 insn requires to add stop bits before it. */
5408 static char *stops_p
;
5410 /* The following variable is used to set up the mentioned above array. */
5412 static int stop_before_p
= 0;
5414 /* The following variable value is length of the arrays `clocks' and
5417 static int clocks_length
;
5419 /* The following array element values are cycles on which the
5420 corresponding insn will be issued. The array is used only for
5425 /* The following array element values are numbers of cycles should be
5426 added to improve insn scheduling for MM_insns for Itanium1. */
5428 static int *add_cycles
;
5430 static rtx
ia64_single_set (rtx
);
5431 static void ia64_emit_insn_before (rtx
, rtx
);
5433 /* Map a bundle number to its pseudo-op. */
5436 get_bundle_name (int b
)
5438 return bundle_name
[b
];
5442 /* Return the maximum number of instructions a cpu can issue. */
5445 ia64_issue_rate (void)
5450 /* Helper function - like single_set, but look inside COND_EXEC. */
5453 ia64_single_set (rtx insn
)
5455 rtx x
= PATTERN (insn
), ret
;
5456 if (GET_CODE (x
) == COND_EXEC
)
5457 x
= COND_EXEC_CODE (x
);
5458 if (GET_CODE (x
) == SET
)
5461 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5462 Although they are not classical single set, the second set is there just
5463 to protect it from moving past FP-relative stack accesses. */
5464 switch (recog_memoized (insn
))
5466 case CODE_FOR_prologue_allocate_stack
:
5467 case CODE_FOR_epilogue_deallocate_stack
:
5468 ret
= XVECEXP (x
, 0, 0);
5472 ret
= single_set_2 (insn
, x
);
5479 /* Adjust the cost of a scheduling dependency. Return the new cost of
5480 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5483 ia64_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
5485 enum attr_itanium_class dep_class
;
5486 enum attr_itanium_class insn_class
;
5488 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
5491 insn_class
= ia64_safe_itanium_class (insn
);
5492 dep_class
= ia64_safe_itanium_class (dep_insn
);
5493 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
5494 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
5500 /* Like emit_insn_before, but skip cycle_display notes.
5501 ??? When cycle display notes are implemented, update this. */
5504 ia64_emit_insn_before (rtx insn
, rtx before
)
5506 emit_insn_before (insn
, before
);
5509 /* The following function marks insns who produce addresses for load
5510 and store insns. Such insns will be placed into M slots because it
5511 decrease latency time for Itanium1 (see function
5512 `ia64_produce_address_p' and the DFA descriptions). */
5515 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
5517 rtx insn
, link
, next
, next_tail
;
5519 next_tail
= NEXT_INSN (tail
);
5520 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5523 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5525 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
5527 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
5529 next
= XEXP (link
, 0);
5530 if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_ST
5531 || ia64_safe_itanium_class (next
) == ITANIUM_CLASS_STF
)
5532 && ia64_st_address_bypass_p (insn
, next
))
5534 else if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_LD
5535 || ia64_safe_itanium_class (next
)
5536 == ITANIUM_CLASS_FLD
)
5537 && ia64_ld_address_bypass_p (insn
, next
))
5540 insn
->call
= link
!= 0;
5544 /* We're beginning a new block. Initialize data structures as necessary. */
5547 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
5548 int sched_verbose ATTRIBUTE_UNUSED
,
5549 int max_ready ATTRIBUTE_UNUSED
)
5551 #ifdef ENABLE_CHECKING
5554 if (reload_completed
)
5555 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
5556 insn
!= current_sched_info
->next_tail
;
5557 insn
= NEXT_INSN (insn
))
5558 if (SCHED_GROUP_P (insn
))
5561 last_scheduled_insn
= NULL_RTX
;
5562 init_insn_group_barriers ();
5565 /* We are about to being issuing insns for this clock cycle.
5566 Override the default sort algorithm to better slot instructions. */
5569 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
5570 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
,
5574 int n_ready
= *pn_ready
;
5575 rtx
*e_ready
= ready
+ n_ready
;
5579 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
5581 if (reorder_type
== 0)
5583 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5585 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5586 if (insnp
< e_ready
)
5589 enum attr_type t
= ia64_safe_type (insn
);
5590 if (t
== TYPE_UNKNOWN
)
5592 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5593 || asm_noperands (PATTERN (insn
)) >= 0)
5595 rtx lowest
= ready
[n_asms
];
5596 ready
[n_asms
] = insn
;
5602 rtx highest
= ready
[n_ready
- 1];
5603 ready
[n_ready
- 1] = insn
;
5610 if (n_asms
< n_ready
)
5612 /* Some normal insns to process. Skip the asms. */
5616 else if (n_ready
> 0)
5620 if (ia64_final_schedule
)
5623 int nr_need_stop
= 0;
5625 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5626 if (safe_group_barrier_needed_p (*insnp
))
5629 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
5631 if (reorder_type
== 0)
5634 /* Move down everything that needs a stop bit, preserving
5636 while (insnp
-- > ready
+ deleted
)
5637 while (insnp
>= ready
+ deleted
)
5640 if (! safe_group_barrier_needed_p (insn
))
5642 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
5653 /* We are about to being issuing insns for this clock cycle. Override
5654 the default sort algorithm to better slot instructions. */
5657 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
5660 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
5661 pn_ready
, clock_var
, 0);
5664 /* Like ia64_sched_reorder, but called after issuing each insn.
5665 Override the default sort algorithm to better slot instructions. */
5668 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
5669 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
5670 int *pn_ready
, int clock_var
)
5672 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
5673 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
5674 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5678 /* We are about to issue INSN. Return the number of insns left on the
5679 ready queue that can be issued this cycle. */
5682 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
5683 int sched_verbose ATTRIBUTE_UNUSED
,
5684 rtx insn ATTRIBUTE_UNUSED
,
5685 int can_issue_more ATTRIBUTE_UNUSED
)
5687 last_scheduled_insn
= insn
;
5688 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
5689 if (reload_completed
)
5691 if (group_barrier_needed_p (insn
))
5693 if (GET_CODE (insn
) == CALL_INSN
)
5694 init_insn_group_barriers ();
5695 stops_p
[INSN_UID (insn
)] = stop_before_p
;
5701 /* We are choosing insn from the ready queue. Return nonzero if INSN
5705 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
5707 if (insn
== NULL_RTX
|| !INSN_P (insn
))
5709 return (!reload_completed
5710 || !safe_group_barrier_needed_p (insn
));
5713 /* The following variable value is pseudo-insn used by the DFA insn
5714 scheduler to change the DFA state when the simulated clock is
5717 static rtx dfa_pre_cycle_insn
;
5719 /* We are about to being issuing INSN. Return nonzero if we cannot
5720 issue it on given cycle CLOCK and return zero if we should not sort
5721 the ready queue on the next clock start. */
5724 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
5725 int clock
, int *sort_p
)
5727 int setup_clocks_p
= FALSE
;
5729 if (insn
== NULL_RTX
|| !INSN_P (insn
))
5731 if ((reload_completed
&& safe_group_barrier_needed_p (insn
))
5732 || (last_scheduled_insn
5733 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
5734 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
5735 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
5737 init_insn_group_barriers ();
5738 if (verbose
&& dump
)
5739 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
5740 last_clock
== clock
? " + cycle advance" : "");
5742 if (last_clock
== clock
)
5744 state_transition (curr_state
, dfa_stop_insn
);
5745 if (TARGET_EARLY_STOP_BITS
)
5746 *sort_p
= (last_scheduled_insn
== NULL_RTX
5747 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
5752 else if (reload_completed
)
5753 setup_clocks_p
= TRUE
;
5754 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
5755 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
5756 state_reset (curr_state
);
5759 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
5760 state_transition (curr_state
, dfa_stop_insn
);
5761 state_transition (curr_state
, dfa_pre_cycle_insn
);
5762 state_transition (curr_state
, NULL
);
5765 else if (reload_completed
)
5766 setup_clocks_p
= TRUE
;
5767 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
5768 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
5769 && asm_noperands (PATTERN (insn
)) < 0)
5771 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
5773 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
5778 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
5779 if (REG_NOTE_KIND (link
) == 0)
5781 enum attr_itanium_class dep_class
;
5782 rtx dep_insn
= XEXP (link
, 0);
5784 dep_class
= ia64_safe_itanium_class (dep_insn
);
5785 if ((dep_class
== ITANIUM_CLASS_MMMUL
5786 || dep_class
== ITANIUM_CLASS_MMSHF
)
5787 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
5789 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
5790 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
5793 add_cycles
[INSN_UID (insn
)] = 3 - d
;
5801 /* The following page contains abstract data `bundle states' which are
5802 used for bundling insns (inserting nops and template generation). */
5804 /* The following describes state of insn bundling. */
5808 /* Unique bundle state number to identify them in the debugging
5811 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
5812 /* number nops before and after the insn */
5813 short before_nops_num
, after_nops_num
;
5814 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
5816 int cost
; /* cost of the state in cycles */
5817 int accumulated_insns_num
; /* number of all previous insns including
5818 nops. L is considered as 2 insns */
5819 int branch_deviation
; /* deviation of previous branches from 3rd slots */
5820 struct bundle_state
*next
; /* next state with the same insn_num */
5821 struct bundle_state
*originator
; /* originator (previous insn state) */
5822 /* All bundle states are in the following chain. */
5823 struct bundle_state
*allocated_states_chain
;
5824 /* The DFA State after issuing the insn and the nops. */
5828 /* The following is map insn number to the corresponding bundle state. */
5830 static struct bundle_state
**index_to_bundle_states
;
5832 /* The unique number of next bundle state. */
5834 static int bundle_states_num
;
5836 /* All allocated bundle states are in the following chain. */
5838 static struct bundle_state
*allocated_bundle_states_chain
;
5840 /* All allocated but not used bundle states are in the following
5843 static struct bundle_state
*free_bundle_state_chain
;
5846 /* The following function returns a free bundle state. */
5848 static struct bundle_state
*
5849 get_free_bundle_state (void)
5851 struct bundle_state
*result
;
5853 if (free_bundle_state_chain
!= NULL
)
5855 result
= free_bundle_state_chain
;
5856 free_bundle_state_chain
= result
->next
;
5860 result
= xmalloc (sizeof (struct bundle_state
));
5861 result
->dfa_state
= xmalloc (dfa_state_size
);
5862 result
->allocated_states_chain
= allocated_bundle_states_chain
;
5863 allocated_bundle_states_chain
= result
;
5865 result
->unique_num
= bundle_states_num
++;
5870 /* The following function frees given bundle state. */
5873 free_bundle_state (struct bundle_state
*state
)
5875 state
->next
= free_bundle_state_chain
;
5876 free_bundle_state_chain
= state
;
5879 /* Start work with abstract data `bundle states'. */
5882 initiate_bundle_states (void)
5884 bundle_states_num
= 0;
5885 free_bundle_state_chain
= NULL
;
5886 allocated_bundle_states_chain
= NULL
;
5889 /* Finish work with abstract data `bundle states'. */
5892 finish_bundle_states (void)
5894 struct bundle_state
*curr_state
, *next_state
;
5896 for (curr_state
= allocated_bundle_states_chain
;
5898 curr_state
= next_state
)
5900 next_state
= curr_state
->allocated_states_chain
;
5901 free (curr_state
->dfa_state
);
5906 /* Hash table of the bundle states. The key is dfa_state and insn_num
5907 of the bundle states. */
5909 static htab_t bundle_state_table
;
5911 /* The function returns hash of BUNDLE_STATE. */
5914 bundle_state_hash (const void *bundle_state
)
5916 const struct bundle_state
*state
= (struct bundle_state
*) bundle_state
;
5919 for (result
= i
= 0; i
< dfa_state_size
; i
++)
5920 result
+= (((unsigned char *) state
->dfa_state
) [i
]
5921 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
5922 return result
+ state
->insn_num
;
5925 /* The function returns nonzero if the bundle state keys are equal. */
5928 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
5930 const struct bundle_state
* state1
= (struct bundle_state
*) bundle_state_1
;
5931 const struct bundle_state
* state2
= (struct bundle_state
*) bundle_state_2
;
5933 return (state1
->insn_num
== state2
->insn_num
5934 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
5935 dfa_state_size
) == 0);
5938 /* The function inserts the BUNDLE_STATE into the hash table. The
5939 function returns nonzero if the bundle has been inserted into the
5940 table. The table contains the best bundle state with given key. */
5943 insert_bundle_state (struct bundle_state
*bundle_state
)
5947 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
5948 if (*entry_ptr
== NULL
)
5950 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
5951 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
5952 *entry_ptr
= (void *) bundle_state
;
5955 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
5956 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
5957 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
5958 > bundle_state
->accumulated_insns_num
5959 || (((struct bundle_state
*)
5960 *entry_ptr
)->accumulated_insns_num
5961 == bundle_state
->accumulated_insns_num
5962 && ((struct bundle_state
*)
5963 *entry_ptr
)->branch_deviation
5964 > bundle_state
->branch_deviation
))))
5967 struct bundle_state temp
;
5969 temp
= *(struct bundle_state
*) *entry_ptr
;
5970 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
5971 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
5972 *bundle_state
= temp
;
5977 /* Start work with the hash table. */
5980 initiate_bundle_state_table (void)
5982 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
5986 /* Finish work with the hash table. */
5989 finish_bundle_state_table (void)
5991 htab_delete (bundle_state_table
);
5996 /* The following variable is a insn `nop' used to check bundle states
5997 with different number of inserted nops. */
5999 static rtx ia64_nop
;
6001 /* The following function tries to issue NOPS_NUM nops for the current
6002 state without advancing processor cycle. If it failed, the
6003 function returns FALSE and frees the current state. */
6006 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
6010 for (i
= 0; i
< nops_num
; i
++)
6011 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
6013 free_bundle_state (curr_state
);
6019 /* The following function tries to issue INSN for the current
6020 state without advancing processor cycle. If it failed, the
6021 function returns FALSE and frees the current state. */
6024 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
6026 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
6028 free_bundle_state (curr_state
);
6034 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6035 starting with ORIGINATOR without advancing processor cycle. If
6036 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6037 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6038 If it was successful, the function creates new bundle state and
6039 insert into the hash table and into `index_to_bundle_states'. */
6042 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
6043 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
6045 struct bundle_state
*curr_state
;
6047 curr_state
= get_free_bundle_state ();
6048 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
6049 curr_state
->insn
= insn
;
6050 curr_state
->insn_num
= originator
->insn_num
+ 1;
6051 curr_state
->cost
= originator
->cost
;
6052 curr_state
->originator
= originator
;
6053 curr_state
->before_nops_num
= before_nops_num
;
6054 curr_state
->after_nops_num
= 0;
6055 curr_state
->accumulated_insns_num
6056 = originator
->accumulated_insns_num
+ before_nops_num
;
6057 curr_state
->branch_deviation
= originator
->branch_deviation
;
6058 if (insn
== NULL_RTX
)
6060 else if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
6062 if (GET_MODE (insn
) == TImode
)
6064 if (!try_issue_nops (curr_state
, before_nops_num
))
6066 if (!try_issue_insn (curr_state
, insn
))
6068 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
6069 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
6070 && curr_state
->accumulated_insns_num
% 3 != 0)
6072 free_bundle_state (curr_state
);
6076 else if (GET_MODE (insn
) != TImode
)
6078 if (!try_issue_nops (curr_state
, before_nops_num
))
6080 if (!try_issue_insn (curr_state
, insn
))
6082 curr_state
->accumulated_insns_num
++;
6083 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6084 || asm_noperands (PATTERN (insn
)) >= 0)
6086 if (ia64_safe_type (insn
) == TYPE_L
)
6087 curr_state
->accumulated_insns_num
++;
6091 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
6092 state_transition (curr_state
->dfa_state
, NULL
);
6094 if (!try_issue_nops (curr_state
, before_nops_num
))
6096 if (!try_issue_insn (curr_state
, insn
))
6098 curr_state
->accumulated_insns_num
++;
6099 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6100 || asm_noperands (PATTERN (insn
)) >= 0)
6102 /* Finish bundle containing asm insn. */
6103 curr_state
->after_nops_num
6104 = 3 - curr_state
->accumulated_insns_num
% 3;
6105 curr_state
->accumulated_insns_num
6106 += 3 - curr_state
->accumulated_insns_num
% 3;
6108 else if (ia64_safe_type (insn
) == TYPE_L
)
6109 curr_state
->accumulated_insns_num
++;
6111 if (ia64_safe_type (insn
) == TYPE_B
)
6112 curr_state
->branch_deviation
6113 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
6114 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
6116 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
6119 struct bundle_state
*curr_state1
;
6120 struct bundle_state
*allocated_states_chain
;
6122 curr_state1
= get_free_bundle_state ();
6123 dfa_state
= curr_state1
->dfa_state
;
6124 allocated_states_chain
= curr_state1
->allocated_states_chain
;
6125 *curr_state1
= *curr_state
;
6126 curr_state1
->dfa_state
= dfa_state
;
6127 curr_state1
->allocated_states_chain
= allocated_states_chain
;
6128 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
6130 curr_state
= curr_state1
;
6132 if (!try_issue_nops (curr_state
,
6133 3 - curr_state
->accumulated_insns_num
% 3))
6135 curr_state
->after_nops_num
6136 = 3 - curr_state
->accumulated_insns_num
% 3;
6137 curr_state
->accumulated_insns_num
6138 += 3 - curr_state
->accumulated_insns_num
% 3;
6140 if (!insert_bundle_state (curr_state
))
6141 free_bundle_state (curr_state
);
6145 /* The following function returns position in the two window bundle
6149 get_max_pos (state_t state
)
6151 if (cpu_unit_reservation_p (state
, pos_6
))
6153 else if (cpu_unit_reservation_p (state
, pos_5
))
6155 else if (cpu_unit_reservation_p (state
, pos_4
))
6157 else if (cpu_unit_reservation_p (state
, pos_3
))
6159 else if (cpu_unit_reservation_p (state
, pos_2
))
6161 else if (cpu_unit_reservation_p (state
, pos_1
))
6167 /* The function returns code of a possible template for given position
6168 and state. The function should be called only with 2 values of
6169 position equal to 3 or 6. */
6172 get_template (state_t state
, int pos
)
6177 if (cpu_unit_reservation_p (state
, _0mii_
))
6179 else if (cpu_unit_reservation_p (state
, _0mmi_
))
6181 else if (cpu_unit_reservation_p (state
, _0mfi_
))
6183 else if (cpu_unit_reservation_p (state
, _0mmf_
))
6185 else if (cpu_unit_reservation_p (state
, _0bbb_
))
6187 else if (cpu_unit_reservation_p (state
, _0mbb_
))
6189 else if (cpu_unit_reservation_p (state
, _0mib_
))
6191 else if (cpu_unit_reservation_p (state
, _0mmb_
))
6193 else if (cpu_unit_reservation_p (state
, _0mfb_
))
6195 else if (cpu_unit_reservation_p (state
, _0mlx_
))
6200 if (cpu_unit_reservation_p (state
, _1mii_
))
6202 else if (cpu_unit_reservation_p (state
, _1mmi_
))
6204 else if (cpu_unit_reservation_p (state
, _1mfi_
))
6206 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
6208 else if (cpu_unit_reservation_p (state
, _1bbb_
))
6210 else if (cpu_unit_reservation_p (state
, _1mbb_
))
6212 else if (cpu_unit_reservation_p (state
, _1mib_
))
6214 else if (cpu_unit_reservation_p (state
, _1mmb_
))
6216 else if (cpu_unit_reservation_p (state
, _1mfb_
))
6218 else if (cpu_unit_reservation_p (state
, _1mlx_
))
6227 /* The following function returns an insn important for insn bundling
6228 followed by INSN and before TAIL. */
6231 get_next_important_insn (rtx insn
, rtx tail
)
6233 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
6235 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6236 && GET_CODE (PATTERN (insn
)) != USE
6237 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6242 /* The following function does insn bundling. Bundling means
6243 inserting templates and nop insns to fit insn groups into permitted
6244 templates. Instruction scheduling uses NDFA (non-deterministic
6245 finite automata) encoding informations about the templates and the
6246 inserted nops. Nondeterminism of the automata permits follows
6247 all possible insn sequences very fast.
6249 Unfortunately it is not possible to get information about inserting
6250 nop insns and used templates from the automata states. The
6251 automata only says that we can issue an insn possibly inserting
6252 some nops before it and using some template. Therefore insn
6253 bundling in this function is implemented by using DFA
6254 (deterministic finite automata). We follows all possible insn
6255 sequences by inserting 0-2 nops (that is what the NDFA describe for
6256 insn scheduling) before/after each insn being bundled. We know the
6257 start of simulated processor cycle from insn scheduling (insn
6258 starting a new cycle has TImode).
6260 Simple implementation of insn bundling would create enormous
6261 number of possible insn sequences satisfying information about new
6262 cycle ticks taken from the insn scheduling. To make the algorithm
6263 practical we use dynamic programming. Each decision (about
6264 inserting nops and implicitly about previous decisions) is described
6265 by structure bundle_state (see above). If we generate the same
6266 bundle state (key is automaton state after issuing the insns and
6267 nops for it), we reuse already generated one. As consequence we
6268 reject some decisions which cannot improve the solution and
6269 reduce memory for the algorithm.
6271 When we reach the end of EBB (extended basic block), we choose the
6272 best sequence and then, moving back in EBB, insert templates for
6273 the best alternative. The templates are taken from querying
6274 automaton state for each insn in chosen bundle states.
6276 So the algorithm makes two (forward and backward) passes through
6277 EBB. There is an additional forward pass through EBB for Itanium1
6278 processor. This pass inserts more nops to make dependency between
6279 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6282 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
6284 struct bundle_state
*curr_state
, *next_state
, *best_state
;
6285 rtx insn
, next_insn
;
6287 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
6288 int pos
= 0, max_pos
, template0
, template1
;
6291 enum attr_type type
;
6294 /* Count insns in the EBB. */
6295 for (insn
= NEXT_INSN (prev_head_insn
);
6296 insn
&& insn
!= tail
;
6297 insn
= NEXT_INSN (insn
))
6303 dfa_clean_insn_cache ();
6304 initiate_bundle_state_table ();
6305 index_to_bundle_states
= xmalloc ((insn_num
+ 2)
6306 * sizeof (struct bundle_state
*));
6307 /* First (forward) pass -- generation of bundle states. */
6308 curr_state
= get_free_bundle_state ();
6309 curr_state
->insn
= NULL
;
6310 curr_state
->before_nops_num
= 0;
6311 curr_state
->after_nops_num
= 0;
6312 curr_state
->insn_num
= 0;
6313 curr_state
->cost
= 0;
6314 curr_state
->accumulated_insns_num
= 0;
6315 curr_state
->branch_deviation
= 0;
6316 curr_state
->next
= NULL
;
6317 curr_state
->originator
= NULL
;
6318 state_reset (curr_state
->dfa_state
);
6319 index_to_bundle_states
[0] = curr_state
;
6321 /* Shift cycle mark if it is put on insn which could be ignored. */
6322 for (insn
= NEXT_INSN (prev_head_insn
);
6324 insn
= NEXT_INSN (insn
))
6326 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6327 || GET_CODE (PATTERN (insn
)) == USE
6328 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6329 && GET_MODE (insn
) == TImode
)
6331 PUT_MODE (insn
, VOIDmode
);
6332 for (next_insn
= NEXT_INSN (insn
);
6334 next_insn
= NEXT_INSN (next_insn
))
6335 if (INSN_P (next_insn
)
6336 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
6337 && GET_CODE (PATTERN (next_insn
)) != USE
6338 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
6340 PUT_MODE (next_insn
, TImode
);
6344 /* Froward pass: generation of bundle states. */
6345 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6350 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6351 || GET_CODE (PATTERN (insn
)) == USE
6352 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6354 type
= ia64_safe_type (insn
);
6355 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6357 index_to_bundle_states
[insn_num
] = NULL
;
6358 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
6360 curr_state
= next_state
)
6362 pos
= curr_state
->accumulated_insns_num
% 3;
6363 next_state
= curr_state
->next
;
6364 /* We must fill up the current bundle in order to start a
6365 subsequent asm insn in a new bundle. Asm insn is always
6366 placed in a separate bundle. */
6368 = (next_insn
!= NULL_RTX
6369 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
6370 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
6371 /* We may fill up the current bundle if it is the cycle end
6372 without a group barrier. */
6374 = (only_bundle_end_p
|| next_insn
== NULL_RTX
6375 || (GET_MODE (next_insn
) == TImode
6376 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
6377 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
6379 /* We need to insert 2 nops for cases like M_MII. To
6380 guarantee issuing all insns on the same cycle for
6381 Itanium 1, we need to issue 2 nops after the first M
6382 insn (MnnMII where n is a nop insn). */
6383 || ((type
== TYPE_M
|| type
== TYPE_A
)
6384 && ia64_tune
== PROCESSOR_ITANIUM
6385 && !bundle_end_p
&& pos
== 1))
6386 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
6388 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
6390 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
6393 if (index_to_bundle_states
[insn_num
] == NULL
)
6395 for (curr_state
= index_to_bundle_states
[insn_num
];
6397 curr_state
= curr_state
->next
)
6398 if (verbose
>= 2 && dump
)
6400 /* This structure is taken from generated code of the
6401 pipeline hazard recognizer (see file insn-attrtab.c).
6402 Please don't forget to change the structure if a new
6403 automaton is added to .md file. */
6406 unsigned short one_automaton_state
;
6407 unsigned short oneb_automaton_state
;
6408 unsigned short two_automaton_state
;
6409 unsigned short twob_automaton_state
;
6414 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6415 curr_state
->unique_num
,
6416 (curr_state
->originator
== NULL
6417 ? -1 : curr_state
->originator
->unique_num
),
6419 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6420 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6421 (ia64_tune
== PROCESSOR_ITANIUM
6422 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6423 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6427 if (index_to_bundle_states
[insn_num
] == NULL
)
6428 /* We should find a solution because the 2nd insn scheduling has
6431 /* Find a state corresponding to the best insn sequence. */
6433 for (curr_state
= index_to_bundle_states
[insn_num
];
6435 curr_state
= curr_state
->next
)
6436 /* We are just looking at the states with fully filled up last
6437 bundle. The first we prefer insn sequences with minimal cost
6438 then with minimal inserted nops and finally with branch insns
6439 placed in the 3rd slots. */
6440 if (curr_state
->accumulated_insns_num
% 3 == 0
6441 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
6442 || (best_state
->cost
== curr_state
->cost
6443 && (curr_state
->accumulated_insns_num
6444 < best_state
->accumulated_insns_num
6445 || (curr_state
->accumulated_insns_num
6446 == best_state
->accumulated_insns_num
6447 && curr_state
->branch_deviation
6448 < best_state
->branch_deviation
)))))
6449 best_state
= curr_state
;
6450 /* Second (backward) pass: adding nops and templates. */
6451 insn_num
= best_state
->before_nops_num
;
6452 template0
= template1
= -1;
6453 for (curr_state
= best_state
;
6454 curr_state
->originator
!= NULL
;
6455 curr_state
= curr_state
->originator
)
6457 insn
= curr_state
->insn
;
6458 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6459 || asm_noperands (PATTERN (insn
)) >= 0);
6461 if (verbose
>= 2 && dump
)
6465 unsigned short one_automaton_state
;
6466 unsigned short oneb_automaton_state
;
6467 unsigned short two_automaton_state
;
6468 unsigned short twob_automaton_state
;
6473 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6474 curr_state
->unique_num
,
6475 (curr_state
->originator
== NULL
6476 ? -1 : curr_state
->originator
->unique_num
),
6478 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6479 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6480 (ia64_tune
== PROCESSOR_ITANIUM
6481 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6482 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6485 /* Find the position in the current bundle window. The window can
6486 contain at most two bundles. Two bundle window means that
6487 the processor will make two bundle rotation. */
6488 max_pos
= get_max_pos (curr_state
->dfa_state
);
6490 /* The following (negative template number) means that the
6491 processor did one bundle rotation. */
6492 || (max_pos
== 3 && template0
< 0))
6494 /* We are at the end of the window -- find template(s) for
6498 template0
= get_template (curr_state
->dfa_state
, 3);
6501 template1
= get_template (curr_state
->dfa_state
, 3);
6502 template0
= get_template (curr_state
->dfa_state
, 6);
6505 if (max_pos
> 3 && template1
< 0)
6506 /* It may happen when we have the stop inside a bundle. */
6510 template1
= get_template (curr_state
->dfa_state
, 3);
6514 /* Emit nops after the current insn. */
6515 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
6518 emit_insn_after (nop
, insn
);
6524 /* We are at the start of a bundle: emit the template
6525 (it should be defined). */
6528 b
= gen_bundle_selector (GEN_INT (template0
));
6529 ia64_emit_insn_before (b
, nop
);
6530 /* If we have two bundle window, we make one bundle
6531 rotation. Otherwise template0 will be undefined
6532 (negative value). */
6533 template0
= template1
;
6537 /* Move the position backward in the window. Group barrier has
6538 no slot. Asm insn takes all bundle. */
6539 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6540 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6541 && asm_noperands (PATTERN (insn
)) < 0)
6543 /* Long insn takes 2 slots. */
6544 if (ia64_safe_type (insn
) == TYPE_L
)
6549 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6550 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6551 && asm_noperands (PATTERN (insn
)) < 0)
6553 /* The current insn is at the bundle start: emit the
6557 b
= gen_bundle_selector (GEN_INT (template0
));
6558 ia64_emit_insn_before (b
, insn
);
6559 b
= PREV_INSN (insn
);
6561 /* See comment above in analogous place for emitting nops
6563 template0
= template1
;
6566 /* Emit nops after the current insn. */
6567 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
6570 ia64_emit_insn_before (nop
, insn
);
6571 nop
= PREV_INSN (insn
);
6578 /* See comment above in analogous place for emitting nops
6582 b
= gen_bundle_selector (GEN_INT (template0
));
6583 ia64_emit_insn_before (b
, insn
);
6584 b
= PREV_INSN (insn
);
6586 template0
= template1
;
6591 if (ia64_tune
== PROCESSOR_ITANIUM
)
6592 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6593 Itanium1 has a strange design, if the distance between an insn
6594 and dependent MM-insn is less 4 then we have a 6 additional
6595 cycles stall. So we make the distance equal to 4 cycles if it
6597 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6602 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6603 || GET_CODE (PATTERN (insn
)) == USE
6604 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6606 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6607 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
6608 /* We found a MM-insn which needs additional cycles. */
6614 /* Now we are searching for a template of the bundle in
6615 which the MM-insn is placed and the position of the
6616 insn in the bundle (0, 1, 2). Also we are searching
6617 for that there is a stop before the insn. */
6618 last
= prev_active_insn (insn
);
6619 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
6621 last
= prev_active_insn (last
);
6623 for (;; last
= prev_active_insn (last
))
6624 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
6626 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
6628 /* The insn is in MLX bundle. Change the template
6629 onto MFI because we will add nops before the
6630 insn. It simplifies subsequent code a lot. */
6632 = gen_bundle_selector (const2_rtx
); /* -> MFI */
6635 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
6636 && (ia64_safe_itanium_class (last
)
6637 != ITANIUM_CLASS_IGNORE
))
6639 /* Some check of correctness: the stop is not at the
6640 bundle start, there are no more 3 insns in the bundle,
6641 and the MM-insn is not at the start of bundle with
6643 if ((pred_stop_p
&& n
== 0) || n
> 2
6644 || (template0
== 9 && n
!= 0))
6646 /* Put nops after the insn in the bundle. */
6647 for (j
= 3 - n
; j
> 0; j
--)
6648 ia64_emit_insn_before (gen_nop (), insn
);
6649 /* It takes into account that we will add more N nops
6650 before the insn lately -- please see code below. */
6651 add_cycles
[INSN_UID (insn
)]--;
6652 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
6653 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6656 add_cycles
[INSN_UID (insn
)]--;
6657 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
6659 /* Insert "MII;" template. */
6660 ia64_emit_insn_before (gen_bundle_selector (const0_rtx
),
6662 ia64_emit_insn_before (gen_nop (), insn
);
6663 ia64_emit_insn_before (gen_nop (), insn
);
6666 /* To decrease code size, we use "MI;I;"
6668 ia64_emit_insn_before
6669 (gen_insn_group_barrier (GEN_INT (3)), insn
);
6672 ia64_emit_insn_before (gen_nop (), insn
);
6673 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6676 /* Put the MM-insn in the same slot of a bundle with the
6677 same template as the original one. */
6678 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0
)),
6680 /* To put the insn in the same slot, add necessary number
6682 for (j
= n
; j
> 0; j
--)
6683 ia64_emit_insn_before (gen_nop (), insn
);
6684 /* Put the stop if the original bundle had it. */
6686 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6690 free (index_to_bundle_states
);
6691 finish_bundle_state_table ();
6693 dfa_clean_insn_cache ();
6696 /* The following function is called at the end of scheduling BB or
6697 EBB. After reload, it inserts stop bits and does insn bundling. */
6700 ia64_sched_finish (FILE *dump
, int sched_verbose
)
6703 fprintf (dump
, "// Finishing schedule.\n");
6704 if (!reload_completed
)
6706 if (reload_completed
)
6708 final_emit_insn_group_barriers (dump
);
6709 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
6710 current_sched_info
->next_tail
);
6711 if (sched_verbose
&& dump
)
6712 fprintf (dump
, "// finishing %d-%d\n",
6713 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
6714 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
6720 /* The following function inserts stop bits in scheduled BB or EBB. */
6723 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
6726 int need_barrier_p
= 0;
6727 rtx prev_insn
= NULL_RTX
;
6729 init_insn_group_barriers ();
6731 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6732 insn
!= current_sched_info
->next_tail
;
6733 insn
= NEXT_INSN (insn
))
6735 if (GET_CODE (insn
) == BARRIER
)
6737 rtx last
= prev_active_insn (insn
);
6741 if (GET_CODE (last
) == JUMP_INSN
6742 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6743 last
= prev_active_insn (last
);
6744 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6745 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6747 init_insn_group_barriers ();
6749 prev_insn
= NULL_RTX
;
6751 else if (INSN_P (insn
))
6753 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6755 init_insn_group_barriers ();
6757 prev_insn
= NULL_RTX
;
6759 else if (need_barrier_p
|| group_barrier_needed_p (insn
))
6761 if (TARGET_EARLY_STOP_BITS
)
6766 last
!= current_sched_info
->prev_head
;
6767 last
= PREV_INSN (last
))
6768 if (INSN_P (last
) && GET_MODE (last
) == TImode
6769 && stops_p
[INSN_UID (last
)])
6771 if (last
== current_sched_info
->prev_head
)
6773 last
= prev_active_insn (last
);
6775 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6776 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6778 init_insn_group_barriers ();
6779 for (last
= NEXT_INSN (last
);
6781 last
= NEXT_INSN (last
))
6783 group_barrier_needed_p (last
);
6787 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6789 init_insn_group_barriers ();
6791 group_barrier_needed_p (insn
);
6792 prev_insn
= NULL_RTX
;
6794 else if (recog_memoized (insn
) >= 0)
6796 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
6797 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6798 || asm_noperands (PATTERN (insn
)) >= 0);
6805 /* If the following function returns TRUE, we will use the the DFA
6809 ia64_first_cycle_multipass_dfa_lookahead (void)
6811 return (reload_completed
? 6 : 4);
6814 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6817 ia64_init_dfa_pre_cycle_insn (void)
6819 if (temp_dfa_state
== NULL
)
6821 dfa_state_size
= state_size ();
6822 temp_dfa_state
= xmalloc (dfa_state_size
);
6823 prev_cycle_state
= xmalloc (dfa_state_size
);
6825 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
6826 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
6827 recog_memoized (dfa_pre_cycle_insn
);
6828 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6829 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
6830 recog_memoized (dfa_stop_insn
);
6833 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6834 used by the DFA insn scheduler. */
6837 ia64_dfa_pre_cycle_insn (void)
6839 return dfa_pre_cycle_insn
;
6842 /* The following function returns TRUE if PRODUCER (of type ilog or
6843 ld) produces address for CONSUMER (of type st or stf). */
6846 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
6850 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
6852 dest
= ia64_single_set (producer
);
6853 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
6854 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
6856 if (GET_CODE (reg
) == SUBREG
)
6857 reg
= SUBREG_REG (reg
);
6858 dest
= ia64_single_set (consumer
);
6859 if (dest
== NULL_RTX
|| (mem
= SET_DEST (dest
)) == NULL_RTX
6860 || GET_CODE (mem
) != MEM
)
6862 return reg_mentioned_p (reg
, mem
);
6865 /* The following function returns TRUE if PRODUCER (of type ilog or
6866 ld) produces address for CONSUMER (of type ld or fld). */
6869 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
6871 rtx dest
, src
, reg
, mem
;
6873 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
6875 dest
= ia64_single_set (producer
);
6876 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
6877 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
6879 if (GET_CODE (reg
) == SUBREG
)
6880 reg
= SUBREG_REG (reg
);
6881 src
= ia64_single_set (consumer
);
6882 if (src
== NULL_RTX
|| (mem
= SET_SRC (src
)) == NULL_RTX
)
6884 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
6885 mem
= XVECEXP (mem
, 0, 0);
6886 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
6887 mem
= XEXP (mem
, 0);
6889 /* Note that LO_SUM is used for GOT loads. */
6890 if (GET_CODE (mem
) != LO_SUM
&& GET_CODE (mem
) != MEM
)
6893 return reg_mentioned_p (reg
, mem
);
6896 /* The following function returns TRUE if INSN produces address for a
6897 load/store insn. We will place such insns into M slot because it
6898 decreases its latency time. */
6901 ia64_produce_address_p (rtx insn
)
6907 /* Emit pseudo-ops for the assembler to describe predicate relations.
6908 At present this assumes that we only consider predicate pairs to
6909 be mutex, and that the assembler can deduce proper values from
6910 straight-line code. */
6913 emit_predicate_relation_info (void)
6917 FOR_EACH_BB_REVERSE (bb
)
6920 rtx head
= BB_HEAD (bb
);
6922 /* We only need such notes at code labels. */
6923 if (GET_CODE (head
) != CODE_LABEL
)
6925 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6926 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6927 head
= NEXT_INSN (head
);
6929 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6930 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6932 rtx p
= gen_rtx_REG (BImode
, r
);
6933 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6934 if (head
== BB_END (bb
))
6940 /* Look for conditional calls that do not return, and protect predicate
6941 relations around them. Otherwise the assembler will assume the call
6942 returns, and complain about uses of call-clobbered predicates after
6944 FOR_EACH_BB_REVERSE (bb
)
6946 rtx insn
= BB_HEAD (bb
);
6950 if (GET_CODE (insn
) == CALL_INSN
6951 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6952 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6954 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6955 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6956 if (BB_HEAD (bb
) == insn
)
6958 if (BB_END (bb
) == insn
)
6962 if (insn
== BB_END (bb
))
6964 insn
= NEXT_INSN (insn
);
6969 /* Perform machine dependent operations on the rtl chain INSNS. */
6974 /* We are freeing block_for_insn in the toplev to keep compatibility
6975 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6976 compute_bb_for_insn ();
6978 /* If optimizing, we'll have split before scheduling. */
6980 split_all_insns (0);
6982 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6983 non-optimizing bootstrap. */
6984 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
6986 if (ia64_flag_schedule_insns2
)
6988 timevar_push (TV_SCHED2
);
6989 ia64_final_schedule
= 1;
6991 initiate_bundle_states ();
6992 ia64_nop
= make_insn_raw (gen_nop ());
6993 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
6994 recog_memoized (ia64_nop
);
6995 clocks_length
= get_max_uid () + 1;
6996 stops_p
= xcalloc (1, clocks_length
);
6997 if (ia64_tune
== PROCESSOR_ITANIUM
)
6999 clocks
= xcalloc (clocks_length
, sizeof (int));
7000 add_cycles
= xcalloc (clocks_length
, sizeof (int));
7002 if (ia64_tune
== PROCESSOR_ITANIUM2
)
7004 pos_1
= get_cpu_unit_code ("2_1");
7005 pos_2
= get_cpu_unit_code ("2_2");
7006 pos_3
= get_cpu_unit_code ("2_3");
7007 pos_4
= get_cpu_unit_code ("2_4");
7008 pos_5
= get_cpu_unit_code ("2_5");
7009 pos_6
= get_cpu_unit_code ("2_6");
7010 _0mii_
= get_cpu_unit_code ("2b_0mii.");
7011 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
7012 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
7013 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
7014 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
7015 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
7016 _0mib_
= get_cpu_unit_code ("2b_0mib.");
7017 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
7018 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
7019 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
7020 _1mii_
= get_cpu_unit_code ("2b_1mii.");
7021 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
7022 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
7023 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
7024 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
7025 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
7026 _1mib_
= get_cpu_unit_code ("2b_1mib.");
7027 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
7028 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
7029 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
7033 pos_1
= get_cpu_unit_code ("1_1");
7034 pos_2
= get_cpu_unit_code ("1_2");
7035 pos_3
= get_cpu_unit_code ("1_3");
7036 pos_4
= get_cpu_unit_code ("1_4");
7037 pos_5
= get_cpu_unit_code ("1_5");
7038 pos_6
= get_cpu_unit_code ("1_6");
7039 _0mii_
= get_cpu_unit_code ("1b_0mii.");
7040 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
7041 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
7042 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
7043 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
7044 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
7045 _0mib_
= get_cpu_unit_code ("1b_0mib.");
7046 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
7047 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
7048 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
7049 _1mii_
= get_cpu_unit_code ("1b_1mii.");
7050 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
7051 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
7052 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
7053 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
7054 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
7055 _1mib_
= get_cpu_unit_code ("1b_1mib.");
7056 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
7057 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
7058 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
7060 schedule_ebbs (dump_file
);
7061 finish_bundle_states ();
7062 if (ia64_tune
== PROCESSOR_ITANIUM
)
7068 emit_insn_group_barriers (dump_file
);
7070 ia64_final_schedule
= 0;
7071 timevar_pop (TV_SCHED2
);
7074 emit_all_insn_group_barriers (dump_file
);
7076 /* A call must not be the last instruction in a function, so that the
7077 return address is still within the function, so that unwinding works
7078 properly. Note that IA-64 differs from dwarf2 on this point. */
7079 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7084 insn
= get_last_insn ();
7085 if (! INSN_P (insn
))
7086 insn
= prev_active_insn (insn
);
7087 /* Skip over insns that expand to nothing. */
7088 while (GET_CODE (insn
) == INSN
&& get_attr_empty (insn
) == EMPTY_YES
)
7090 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7091 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7093 insn
= prev_active_insn (insn
);
7095 if (GET_CODE (insn
) == CALL_INSN
)
7098 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7099 emit_insn (gen_break_f ());
7100 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7105 emit_predicate_relation_info ();
7107 if (ia64_flag_var_tracking
)
7109 timevar_push (TV_VAR_TRACKING
);
7110 variable_tracking_main ();
7111 timevar_pop (TV_VAR_TRACKING
);
7115 /* Return true if REGNO is used by the epilogue. */
7118 ia64_epilogue_uses (int regno
)
7123 /* With a call to a function in another module, we will write a new
7124 value to "gp". After returning from such a call, we need to make
7125 sure the function restores the original gp-value, even if the
7126 function itself does not use the gp anymore. */
7127 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
7129 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7130 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7131 /* For functions defined with the syscall_linkage attribute, all
7132 input registers are marked as live at all function exits. This
7133 prevents the register allocator from using the input registers,
7134 which in turn makes it possible to restart a system call after
7135 an interrupt without having to save/restore the input registers.
7136 This also prevents kernel data from leaking to application code. */
7137 return lookup_attribute ("syscall_linkage",
7138 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7141 /* Conditional return patterns can't represent the use of `b0' as
7142 the return address, so we force the value live this way. */
7146 /* Likewise for ar.pfs, which is used by br.ret. */
7154 /* Return true if REGNO is used by the frame unwinder. */
7157 ia64_eh_uses (int regno
)
7159 if (! reload_completed
)
7162 if (current_frame_info
.reg_save_b0
7163 && regno
== current_frame_info
.reg_save_b0
)
7165 if (current_frame_info
.reg_save_pr
7166 && regno
== current_frame_info
.reg_save_pr
)
7168 if (current_frame_info
.reg_save_ar_pfs
7169 && regno
== current_frame_info
.reg_save_ar_pfs
)
7171 if (current_frame_info
.reg_save_ar_unat
7172 && regno
== current_frame_info
.reg_save_ar_unat
)
7174 if (current_frame_info
.reg_save_ar_lc
7175 && regno
== current_frame_info
.reg_save_ar_lc
)
7181 /* Return true if this goes in small data/bss. */
7183 /* ??? We could also support own long data here. Generating movl/add/ld8
7184 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7185 code faster because there is one less load. This also includes incomplete
7186 types which can't go in sdata/sbss. */
7189 ia64_in_small_data_p (tree exp
)
7191 if (TARGET_NO_SDATA
)
7194 /* We want to merge strings, so we never consider them small data. */
7195 if (TREE_CODE (exp
) == STRING_CST
)
7198 /* Functions are never small data. */
7199 if (TREE_CODE (exp
) == FUNCTION_DECL
)
7202 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7204 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7205 if (strcmp (section
, ".sdata") == 0
7206 || strcmp (section
, ".sbss") == 0)
7211 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7213 /* If this is an incomplete type with size 0, then we can't put it
7214 in sdata because it might be too big when completed. */
7215 if (size
> 0 && size
<= ia64_section_threshold
)
7222 /* Output assembly directives for prologue regions. */
7224 /* The current basic block number. */
7226 static bool last_block
;
7228 /* True if we need a copy_state command at the start of the next block. */
7230 static bool need_copy_state
;
7232 /* The function emits unwind directives for the start of an epilogue. */
7235 process_epilogue (void)
7237 /* If this isn't the last block of the function, then we need to label the
7238 current state, and copy it back in at the start of the next block. */
7242 fprintf (asm_out_file
, "\t.label_state 1\n");
7243 need_copy_state
= true;
7246 fprintf (asm_out_file
, "\t.restore sp\n");
7249 /* This function processes a SET pattern looking for specific patterns
7250 which result in emitting an assembly directive required for unwinding. */
7253 process_set (FILE *asm_out_file
, rtx pat
)
7255 rtx src
= SET_SRC (pat
);
7256 rtx dest
= SET_DEST (pat
);
7257 int src_regno
, dest_regno
;
7259 /* Look for the ALLOC insn. */
7260 if (GET_CODE (src
) == UNSPEC_VOLATILE
7261 && XINT (src
, 1) == UNSPECV_ALLOC
7262 && GET_CODE (dest
) == REG
)
7264 dest_regno
= REGNO (dest
);
7266 /* If this isn't the final destination for ar.pfs, the alloc
7267 shouldn't have been marked frame related. */
7268 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7271 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7272 ia64_dbx_register_number (dest_regno
));
7276 /* Look for SP = .... */
7277 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7279 if (GET_CODE (src
) == PLUS
)
7281 rtx op0
= XEXP (src
, 0);
7282 rtx op1
= XEXP (src
, 1);
7283 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7285 if (INTVAL (op1
) < 0)
7286 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
7289 process_epilogue ();
7294 else if (GET_CODE (src
) == REG
7295 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7296 process_epilogue ();
7303 /* Register move we need to look at. */
7304 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7306 src_regno
= REGNO (src
);
7307 dest_regno
= REGNO (dest
);
7312 /* Saving return address pointer. */
7313 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7315 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7316 ia64_dbx_register_number (dest_regno
));
7320 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7322 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7323 ia64_dbx_register_number (dest_regno
));
7326 case AR_UNAT_REGNUM
:
7327 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7329 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7330 ia64_dbx_register_number (dest_regno
));
7334 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7336 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7337 ia64_dbx_register_number (dest_regno
));
7340 case STACK_POINTER_REGNUM
:
7341 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7342 || ! frame_pointer_needed
)
7344 fprintf (asm_out_file
, "\t.vframe r%d\n",
7345 ia64_dbx_register_number (dest_regno
));
7349 /* Everything else should indicate being stored to memory. */
7354 /* Memory store we need to look at. */
7355 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7361 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7363 base
= XEXP (dest
, 0);
7366 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7367 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7369 base
= XEXP (XEXP (dest
, 0), 0);
7370 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7375 if (base
== hard_frame_pointer_rtx
)
7377 saveop
= ".savepsp";
7380 else if (base
== stack_pointer_rtx
)
7385 src_regno
= REGNO (src
);
7389 if (current_frame_info
.reg_save_b0
!= 0)
7391 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7395 if (current_frame_info
.reg_save_pr
!= 0)
7397 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7401 if (current_frame_info
.reg_save_ar_lc
!= 0)
7403 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7407 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7409 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7412 case AR_UNAT_REGNUM
:
7413 if (current_frame_info
.reg_save_ar_unat
!= 0)
7415 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7422 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7423 1 << (src_regno
- GR_REG (4)));
7431 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7432 1 << (src_regno
- BR_REG (1)));
7439 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7440 1 << (src_regno
- FR_REG (2)));
7443 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7444 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7445 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7446 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7447 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7448 1 << (src_regno
- FR_REG (12)));
7460 /* This function looks at a single insn and emits any directives
7461 required to unwind this insn. */
7463 process_for_unwind_directive (FILE *asm_out_file
, rtx insn
)
7465 if (flag_unwind_tables
7466 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7470 if (GET_CODE (insn
) == NOTE
7471 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7473 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7475 /* Restore unwind state from immediately before the epilogue. */
7476 if (need_copy_state
)
7478 fprintf (asm_out_file
, "\t.body\n");
7479 fprintf (asm_out_file
, "\t.copy_state 1\n");
7480 need_copy_state
= false;
7484 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7487 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7489 pat
= XEXP (pat
, 0);
7491 pat
= PATTERN (insn
);
7493 switch (GET_CODE (pat
))
7496 process_set (asm_out_file
, pat
);
7502 int limit
= XVECLEN (pat
, 0);
7503 for (par_index
= 0; par_index
< limit
; par_index
++)
7505 rtx x
= XVECEXP (pat
, 0, par_index
);
7506 if (GET_CODE (x
) == SET
)
7507 process_set (asm_out_file
, x
);
7520 ia64_init_builtins (void)
7522 tree psi_type_node
= build_pointer_type (integer_type_node
);
7523 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7525 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7526 tree si_ftype_psi_si_si
7527 = build_function_type_list (integer_type_node
,
7528 psi_type_node
, integer_type_node
,
7529 integer_type_node
, NULL_TREE
);
7531 /* __sync_val_compare_and_swap_di */
7532 tree di_ftype_pdi_di_di
7533 = build_function_type_list (long_integer_type_node
,
7534 pdi_type_node
, long_integer_type_node
,
7535 long_integer_type_node
, NULL_TREE
);
7536 /* __sync_bool_compare_and_swap_di */
7537 tree si_ftype_pdi_di_di
7538 = build_function_type_list (integer_type_node
,
7539 pdi_type_node
, long_integer_type_node
,
7540 long_integer_type_node
, NULL_TREE
);
7541 /* __sync_synchronize */
7542 tree void_ftype_void
7543 = build_function_type (void_type_node
, void_list_node
);
7545 /* __sync_lock_test_and_set_si */
7546 tree si_ftype_psi_si
7547 = build_function_type_list (integer_type_node
,
7548 psi_type_node
, integer_type_node
, NULL_TREE
);
7550 /* __sync_lock_test_and_set_di */
7551 tree di_ftype_pdi_di
7552 = build_function_type_list (long_integer_type_node
,
7553 pdi_type_node
, long_integer_type_node
,
7556 /* __sync_lock_release_si */
7558 = build_function_type_list (void_type_node
, psi_type_node
, NULL_TREE
);
7560 /* __sync_lock_release_di */
7562 = build_function_type_list (void_type_node
, pdi_type_node
, NULL_TREE
);
7567 /* The __fpreg type. */
7568 fpreg_type
= make_node (REAL_TYPE
);
7569 /* ??? The back end should know to load/save __fpreg variables using
7570 the ldf.fill and stf.spill instructions. */
7571 TYPE_PRECISION (fpreg_type
) = 80;
7572 layout_type (fpreg_type
);
7573 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
7575 /* The __float80 type. */
7576 float80_type
= make_node (REAL_TYPE
);
7577 TYPE_PRECISION (float80_type
) = 80;
7578 layout_type (float80_type
);
7579 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
7581 /* The __float128 type. */
7584 tree float128_type
= make_node (REAL_TYPE
);
7585 TYPE_PRECISION (float128_type
) = 128;
7586 layout_type (float128_type
);
7587 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
7590 /* Under HPUX, this is a synonym for "long double". */
7591 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
7594 #define def_builtin(name, type, code) \
7595 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7598 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7599 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7600 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7601 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7602 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7603 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7604 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di
,
7605 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7607 def_builtin ("__sync_synchronize", void_ftype_void
,
7608 IA64_BUILTIN_SYNCHRONIZE
);
7610 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7611 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7612 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7613 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7614 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7615 IA64_BUILTIN_LOCK_RELEASE_SI
);
7616 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7617 IA64_BUILTIN_LOCK_RELEASE_DI
);
7619 def_builtin ("__builtin_ia64_bsp",
7620 build_function_type (ptr_type_node
, void_list_node
),
7623 def_builtin ("__builtin_ia64_flushrs",
7624 build_function_type (void_type_node
, void_list_node
),
7625 IA64_BUILTIN_FLUSHRS
);
7627 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7628 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7629 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7630 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7631 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7632 IA64_BUILTIN_FETCH_AND_OR_SI
);
7633 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7634 IA64_BUILTIN_FETCH_AND_AND_SI
);
7635 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7636 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7637 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7638 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7640 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7641 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7642 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7643 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7644 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7645 IA64_BUILTIN_OR_AND_FETCH_SI
);
7646 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7647 IA64_BUILTIN_AND_AND_FETCH_SI
);
7648 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7649 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7650 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7651 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7653 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7654 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7655 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7656 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7657 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7658 IA64_BUILTIN_FETCH_AND_OR_DI
);
7659 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7660 IA64_BUILTIN_FETCH_AND_AND_DI
);
7661 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7662 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7663 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7664 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7666 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7667 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7668 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7669 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7670 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7671 IA64_BUILTIN_OR_AND_FETCH_DI
);
7672 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7673 IA64_BUILTIN_AND_AND_FETCH_DI
);
7674 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7675 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7676 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7677 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7682 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7690 cmpxchgsz.acq tmp = [ptr], tmp
7691 } while (tmp != ret)
7695 ia64_expand_fetch_and_op (optab binoptab
, enum machine_mode mode
,
7696 tree arglist
, rtx target
)
7698 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7701 arg0
= TREE_VALUE (arglist
);
7702 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7703 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7704 #ifdef POINTERS_EXTEND_UNSIGNED
7705 if (GET_MODE(mem
) != Pmode
)
7706 mem
= convert_memory_address (Pmode
, mem
);
7708 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7710 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7711 MEM_VOLATILE_P (mem
) = 1;
7713 if (target
&& register_operand (target
, mode
))
7716 ret
= gen_reg_rtx (mode
);
7718 emit_insn (gen_mf ());
7720 /* Special case for fetchadd instructions. */
7721 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7724 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7726 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7731 tmp
= gen_reg_rtx (mode
);
7732 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7733 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7734 emit_move_insn (tmp
, mem
);
7736 label
= gen_label_rtx ();
7738 emit_move_insn (ret
, tmp
);
7739 convert_move (ccv
, tmp
, /*unsignedp=*/1);
7741 /* Perform the specific operation. Special case NAND by noticing
7742 one_cmpl_optab instead. */
7743 if (binoptab
== one_cmpl_optab
)
7745 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7746 binoptab
= and_optab
;
7748 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7751 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7753 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7756 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7761 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7768 ret = tmp <op> value;
7769 cmpxchgsz.acq tmp = [ptr], ret
7770 } while (tmp != old)
7774 ia64_expand_op_and_fetch (optab binoptab
, enum machine_mode mode
,
7775 tree arglist
, rtx target
)
7777 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7780 arg0
= TREE_VALUE (arglist
);
7781 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7782 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7783 #ifdef POINTERS_EXTEND_UNSIGNED
7784 if (GET_MODE(mem
) != Pmode
)
7785 mem
= convert_memory_address (Pmode
, mem
);
7788 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7790 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7791 MEM_VOLATILE_P (mem
) = 1;
7793 if (target
&& ! register_operand (target
, mode
))
7796 emit_insn (gen_mf ());
7797 tmp
= gen_reg_rtx (mode
);
7798 old
= gen_reg_rtx (mode
);
7799 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7800 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7802 emit_move_insn (tmp
, mem
);
7804 label
= gen_label_rtx ();
7806 emit_move_insn (old
, tmp
);
7807 convert_move (ccv
, tmp
, /*unsignedp=*/1);
7809 /* Perform the specific operation. Special case NAND by noticing
7810 one_cmpl_optab instead. */
7811 if (binoptab
== one_cmpl_optab
)
7813 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7814 binoptab
= and_optab
;
7816 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7819 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7821 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7824 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
7829 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7833 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7836 For bool_ it's the same except return ret == oldval.
7840 ia64_expand_compare_and_swap (enum machine_mode rmode
, enum machine_mode mode
,
7841 int boolp
, tree arglist
, rtx target
)
7843 tree arg0
, arg1
, arg2
;
7844 rtx mem
, old
, new, ccv
, tmp
, insn
;
7846 arg0
= TREE_VALUE (arglist
);
7847 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7848 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7849 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7850 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7851 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7853 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7854 MEM_VOLATILE_P (mem
) = 1;
7856 if (GET_MODE (old
) != mode
)
7857 old
= convert_to_mode (mode
, old
, /*unsignedp=*/1);
7858 if (GET_MODE (new) != mode
)
7859 new = convert_to_mode (mode
, new, /*unsignedp=*/1);
7861 if (! register_operand (old
, mode
))
7862 old
= copy_to_mode_reg (mode
, old
);
7863 if (! register_operand (new, mode
))
7864 new = copy_to_mode_reg (mode
, new);
7866 if (! boolp
&& target
&& register_operand (target
, mode
))
7869 tmp
= gen_reg_rtx (mode
);
7871 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7872 convert_move (ccv
, old
, /*unsignedp=*/1);
7873 emit_insn (gen_mf ());
7875 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7877 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7883 target
= gen_reg_rtx (rmode
);
7884 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7890 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7893 ia64_expand_lock_test_and_set (enum machine_mode mode
, tree arglist
,
7897 rtx mem
, new, ret
, insn
;
7899 arg0
= TREE_VALUE (arglist
);
7900 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7901 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7902 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7904 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7905 MEM_VOLATILE_P (mem
) = 1;
7906 if (! register_operand (new, mode
))
7907 new = copy_to_mode_reg (mode
, new);
7909 if (target
&& register_operand (target
, mode
))
7912 ret
= gen_reg_rtx (mode
);
7915 insn
= gen_xchgsi (ret
, mem
, new);
7917 insn
= gen_xchgdi (ret
, mem
, new);
7923 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7926 ia64_expand_lock_release (enum machine_mode mode
, tree arglist
,
7927 rtx target ATTRIBUTE_UNUSED
)
7932 arg0
= TREE_VALUE (arglist
);
7933 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7935 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7936 MEM_VOLATILE_P (mem
) = 1;
7938 emit_move_insn (mem
, const0_rtx
);
7944 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
7945 enum machine_mode mode ATTRIBUTE_UNUSED
,
7946 int ignore ATTRIBUTE_UNUSED
)
7948 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7949 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7950 tree arglist
= TREE_OPERAND (exp
, 1);
7951 enum machine_mode rmode
= VOIDmode
;
7955 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7956 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7961 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7962 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7963 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7964 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7965 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7966 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7967 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7968 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7969 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7970 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7971 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7972 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7973 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7974 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7978 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7983 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7988 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7989 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7990 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7991 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7992 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7993 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7994 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7995 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7996 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7997 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7998 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7999 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8000 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8001 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8011 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8012 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8013 return ia64_expand_compare_and_swap (rmode
, mode
, 1, arglist
,
8016 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8017 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8018 return ia64_expand_compare_and_swap (rmode
, mode
, 0, arglist
,
8021 case IA64_BUILTIN_SYNCHRONIZE
:
8022 emit_insn (gen_mf ());
8025 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8026 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8027 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
8029 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8030 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8031 return ia64_expand_lock_release (mode
, arglist
, target
);
8033 case IA64_BUILTIN_BSP
:
8034 if (! target
|| ! register_operand (target
, DImode
))
8035 target
= gen_reg_rtx (DImode
);
8036 emit_insn (gen_bsp_value (target
));
8037 #ifdef POINTERS_EXTEND_UNSIGNED
8038 target
= convert_memory_address (ptr_mode
, target
);
8042 case IA64_BUILTIN_FLUSHRS
:
8043 emit_insn (gen_flushrs ());
8046 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8047 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8048 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
8050 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8051 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8052 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
8054 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8055 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8056 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
8058 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8059 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8060 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
8062 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8063 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8064 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
8066 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8067 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8068 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
8070 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8071 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8072 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
8074 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8075 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8076 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
8078 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8079 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8080 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
8082 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8083 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8084 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
8086 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8087 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8088 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
8090 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8091 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8092 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
8101 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8102 most significant bits of the stack slot. */
8105 ia64_hpux_function_arg_padding (enum machine_mode mode
, tree type
)
8107 /* Exception to normal case for structures/unions/etc. */
8109 if (type
&& AGGREGATE_TYPE_P (type
)
8110 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8113 /* Fall back to the default. */
8114 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
8117 /* Linked list of all external functions that are to be emitted by GCC.
8118 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8119 order to avoid putting out names that are never really used. */
8121 struct extern_func_list
GTY(())
8123 struct extern_func_list
*next
;
8127 static GTY(()) struct extern_func_list
*extern_func_head
;
8130 ia64_hpux_add_extern_decl (tree decl
)
8132 struct extern_func_list
*p
= ggc_alloc (sizeof (struct extern_func_list
));
8135 p
->next
= extern_func_head
;
8136 extern_func_head
= p
;
8139 /* Print out the list of used global functions. */
8142 ia64_hpux_file_end (void)
8144 struct extern_func_list
*p
;
8146 for (p
= extern_func_head
; p
; p
= p
->next
)
8148 tree decl
= p
->decl
;
8149 tree id
= DECL_ASSEMBLER_NAME (decl
);
8154 if (!TREE_ASM_WRITTEN (decl
) && TREE_SYMBOL_REFERENCED (id
))
8156 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
8158 TREE_ASM_WRITTEN (decl
) = 1;
8159 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
8160 fputs (TYPE_ASM_OP
, asm_out_file
);
8161 assemble_name (asm_out_file
, name
);
8162 fprintf (asm_out_file
, "," TYPE_OPERAND_FMT
"\n", "function");
8166 extern_func_head
= 0;
8169 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8170 modes of word_mode and larger. Rename the TFmode libfuncs using the
8171 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8172 backward compatibility. */
8175 ia64_init_libfuncs (void)
8177 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
8178 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
8179 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
8180 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
8182 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
8183 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
8184 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
8185 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
8186 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
8188 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
8189 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
8190 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
8191 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
8192 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
8193 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
8195 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
8196 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
8197 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
8198 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
8200 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
8201 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
8204 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8207 ia64_hpux_init_libfuncs (void)
8209 ia64_init_libfuncs ();
8211 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
8212 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
8213 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
8215 /* ia64_expand_compare uses this. */
8216 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
8218 /* These should never be used. */
8219 set_optab_libfunc (eq_optab
, TFmode
, 0);
8220 set_optab_libfunc (ne_optab
, TFmode
, 0);
8221 set_optab_libfunc (gt_optab
, TFmode
, 0);
8222 set_optab_libfunc (ge_optab
, TFmode
, 0);
8223 set_optab_libfunc (lt_optab
, TFmode
, 0);
8224 set_optab_libfunc (le_optab
, TFmode
, 0);
8227 /* Rename the division and modulus functions in VMS. */
8230 ia64_vms_init_libfuncs (void)
8232 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
8233 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
8234 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
8235 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
8236 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
8237 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
8238 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
8239 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
8242 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8243 the HPUX conventions. */
8246 ia64_sysv4_init_libfuncs (void)
8248 ia64_init_libfuncs ();
8250 /* These functions are not part of the HPUX TFmode interface. We
8251 use them instead of _U_Qfcmp, which doesn't work the way we
8253 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
8254 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
8255 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
8256 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
8257 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
8258 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
8260 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8261 glibc doesn't have them. */
8264 /* Switch to the section to which we should output X. The only thing
8265 special we do here is to honor small data. */
8268 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
8269 unsigned HOST_WIDE_INT align
)
8271 if (GET_MODE_SIZE (mode
) > 0
8272 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8275 default_elf_select_rtx_section (mode
, x
, align
);
8278 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8279 Pretend flag_pic is always set. */
8282 ia64_rwreloc_select_section (tree exp
, int reloc
, unsigned HOST_WIDE_INT align
)
8284 default_elf_select_section_1 (exp
, reloc
, align
, true);
8288 ia64_rwreloc_unique_section (tree decl
, int reloc
)
8290 default_unique_section_1 (decl
, reloc
, true);
8294 ia64_rwreloc_select_rtx_section (enum machine_mode mode
, rtx x
,
8295 unsigned HOST_WIDE_INT align
)
8297 int save_pic
= flag_pic
;
8299 ia64_select_rtx_section (mode
, x
, align
);
8300 flag_pic
= save_pic
;
8304 ia64_rwreloc_section_type_flags (tree decl
, const char *name
, int reloc
)
8306 return default_section_type_flags_1 (decl
, name
, reloc
, true);
8309 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8310 structure type and that the address of that type should be passed
8311 in out0, rather than in r8. */
8314 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
8316 tree ret_type
= TREE_TYPE (fntype
);
8318 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8319 as the structure return address parameter, if the return value
8320 type has a non-trivial copy constructor or destructor. It is not
8321 clear if this same convention should be used for other
8322 programming languages. Until G++ 3.4, we incorrectly used r8 for
8323 these return values. */
8324 return (abi_version_at_least (2)
8326 && TYPE_MODE (ret_type
) == BLKmode
8327 && TREE_ADDRESSABLE (ret_type
)
8328 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
8331 /* Output the assembler code for a thunk function. THUNK_DECL is the
8332 declaration for the thunk function itself, FUNCTION is the decl for
8333 the target function. DELTA is an immediate constant offset to be
8334 added to THIS. If VCALL_OFFSET is nonzero, the word at
8335 *(*this + vcall_offset) should be added to THIS. */
8338 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
8339 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
8342 rtx
this, insn
, funexp
;
8343 unsigned int this_parmno
;
8344 unsigned int this_regno
;
8346 reload_completed
= 1;
8347 epilogue_completed
= 1;
8349 reset_block_changes ();
8351 /* Set things up as ia64_expand_prologue might. */
8352 last_scratch_gr_reg
= 15;
8354 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
8355 current_frame_info
.spill_cfa_off
= -16;
8356 current_frame_info
.n_input_regs
= 1;
8357 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
8359 /* Mark the end of the (empty) prologue. */
8360 emit_note (NOTE_INSN_PROLOGUE_END
);
8362 /* Figure out whether "this" will be the first parameter (the
8363 typical case) or the second parameter (as happens when the
8364 virtual function returns certain class objects). */
8366 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
8368 this_regno
= IN_REG (this_parmno
);
8369 if (!TARGET_REG_NAMES
)
8370 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
8372 this = gen_rtx_REG (Pmode
, this_regno
);
8375 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
8376 REG_POINTER (tmp
) = 1;
8377 if (delta
&& CONST_OK_FOR_I (delta
))
8379 emit_insn (gen_ptr_extend_plus_imm (this, tmp
, GEN_INT (delta
)));
8383 emit_insn (gen_ptr_extend (this, tmp
));
8386 /* Apply the constant offset, if required. */
8389 rtx delta_rtx
= GEN_INT (delta
);
8391 if (!CONST_OK_FOR_I (delta
))
8393 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8394 emit_move_insn (tmp
, delta_rtx
);
8397 emit_insn (gen_adddi3 (this, this, delta_rtx
));
8400 /* Apply the offset from the vtable, if required. */
8403 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
8404 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8408 rtx t
= gen_rtx_REG (ptr_mode
, 2);
8409 REG_POINTER (t
) = 1;
8410 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this));
8411 if (CONST_OK_FOR_I (vcall_offset
))
8413 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
,
8418 emit_insn (gen_ptr_extend (tmp
, t
));
8421 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this));
8425 if (!CONST_OK_FOR_J (vcall_offset
))
8427 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
8428 emit_move_insn (tmp2
, vcall_offset_rtx
);
8429 vcall_offset_rtx
= tmp2
;
8431 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
8435 emit_move_insn (gen_rtx_REG (ptr_mode
, 2),
8436 gen_rtx_MEM (ptr_mode
, tmp
));
8438 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
8440 emit_insn (gen_adddi3 (this, this, tmp
));
8443 /* Generate a tail call to the target function. */
8444 if (! TREE_USED (function
))
8446 assemble_external (function
);
8447 TREE_USED (function
) = 1;
8449 funexp
= XEXP (DECL_RTL (function
), 0);
8450 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
8451 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
8452 insn
= get_last_insn ();
8453 SIBLING_CALL_P (insn
) = 1;
8455 /* Code generation for calls relies on splitting. */
8456 reload_completed
= 1;
8457 epilogue_completed
= 1;
8458 try_split (PATTERN (insn
), insn
, 0);
8462 /* Run just enough of rest_of_compilation to get the insns emitted.
8463 There's not really enough bulk here to make other passes such as
8464 instruction scheduling worth while. Note that use_thunk calls
8465 assemble_start_function and assemble_end_function. */
8467 insn_locators_initialize ();
8468 emit_all_insn_group_barriers (NULL
);
8469 insn
= get_insns ();
8470 shorten_branches (insn
);
8471 final_start_function (insn
, file
, 1);
8472 final (insn
, file
, 1, 0);
8473 final_end_function ();
8475 reload_completed
= 0;
8476 epilogue_completed
= 0;
8480 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8483 ia64_struct_value_rtx (tree fntype
,
8484 int incoming ATTRIBUTE_UNUSED
)
8486 if (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
))
8488 return gen_rtx_REG (Pmode
, GR_REG (8));
8492 ia64_scalar_mode_supported_p (enum machine_mode mode
)
8516 #include "gt-ia64.h"