1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
33 #include "stringpool.h"
39 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
46 #include "insn-attr.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* This is used for communication between ASM_OUTPUT_LABEL and
71 ASM_OUTPUT_LABELREF. */
72 int ia64_asm_output_label
= 0;
74 /* Register names for ia64_expand_prologue. */
75 static const char * const ia64_reg_numbers
[96] =
76 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
77 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
78 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
79 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
80 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
81 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
82 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
83 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
84 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
85 "r104","r105","r106","r107","r108","r109","r110","r111",
86 "r112","r113","r114","r115","r116","r117","r118","r119",
87 "r120","r121","r122","r123","r124","r125","r126","r127"};
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_input_reg_names
[8] =
91 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
93 /* ??? These strings could be shared with REGISTER_NAMES. */
94 static const char * const ia64_local_reg_names
[80] =
95 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
96 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
97 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
98 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
99 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
100 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
101 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
102 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
103 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
104 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
106 /* ??? These strings could be shared with REGISTER_NAMES. */
107 static const char * const ia64_output_reg_names
[8] =
108 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
110 /* Variables which are this size or smaller are put in the sdata/sbss
113 unsigned int ia64_section_threshold
;
115 /* The following variable is used by the DFA insn scheduler. The value is
116 TRUE if we do insn bundling instead of insn scheduling. */
128 number_of_ia64_frame_regs
131 /* Structure to be filled in by ia64_compute_frame_size with register
132 save masks and offsets for the current function. */
134 struct ia64_frame_info
136 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
137 the caller's scratch area. */
138 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
139 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
140 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
141 HARD_REG_SET mask
; /* mask of saved registers. */
142 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
143 registers or long-term scratches. */
144 int n_spilled
; /* number of spilled registers. */
145 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
146 int n_input_regs
; /* number of input registers used. */
147 int n_local_regs
; /* number of local registers used. */
148 int n_output_regs
; /* number of output registers used. */
149 int n_rotate_regs
; /* number of rotating registers used. */
151 char need_regstk
; /* true if a .regstk directive needed. */
152 char initialized
; /* true if the data is finalized. */
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info
;
157 /* The actual registers that are emitted. */
158 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
160 static int ia64_first_cycle_multipass_dfa_lookahead (void);
161 static void ia64_dependencies_evaluation_hook (rtx_insn
*, rtx_insn
*);
162 static void ia64_init_dfa_pre_cycle_insn (void);
163 static rtx
ia64_dfa_pre_cycle_insn (void);
164 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
165 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn
*, int, int, int *);
166 static void ia64_h_i_d_extended (void);
167 static void * ia64_alloc_sched_context (void);
168 static void ia64_init_sched_context (void *, bool);
169 static void ia64_set_sched_context (void *);
170 static void ia64_clear_sched_context (void *);
171 static void ia64_free_sched_context (void *);
172 static int ia64_mode_to_int (machine_mode
);
173 static void ia64_set_sched_flags (spec_info_t
);
174 static ds_t
ia64_get_insn_spec_ds (rtx_insn
*);
175 static ds_t
ia64_get_insn_checked_ds (rtx_insn
*);
176 static bool ia64_skip_rtx_p (const_rtx
);
177 static int ia64_speculate_insn (rtx_insn
*, ds_t
, rtx
*);
178 static bool ia64_needs_block_p (ds_t
);
179 static rtx
ia64_gen_spec_check (rtx_insn
*, rtx_insn
*, ds_t
);
180 static int ia64_spec_check_p (rtx
);
181 static int ia64_spec_check_src_p (rtx
);
182 static rtx
gen_tls_get_addr (void);
183 static rtx
gen_thread_pointer (void);
184 static int find_gr_spill (enum ia64_frame_regs
, int);
185 static int next_scratch_gr_reg (void);
186 static void mark_reg_gr_used_mask (rtx
, void *);
187 static void ia64_compute_frame_size (HOST_WIDE_INT
);
188 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
189 static void finish_spill_pointers (void);
190 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
191 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
192 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
193 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
194 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
195 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
197 static void ia64_option_override (void);
198 static bool ia64_can_eliminate (const int, const int);
199 static machine_mode
hfa_element_mode (const_tree
, bool);
200 static void ia64_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
202 static int ia64_arg_partial_bytes (cumulative_args_t
, machine_mode
,
204 static rtx
ia64_function_arg_1 (cumulative_args_t
, machine_mode
,
205 const_tree
, bool, bool);
206 static rtx
ia64_function_arg (cumulative_args_t
, machine_mode
,
208 static rtx
ia64_function_incoming_arg (cumulative_args_t
,
209 machine_mode
, const_tree
, bool);
210 static void ia64_function_arg_advance (cumulative_args_t
, machine_mode
,
212 static pad_direction
ia64_function_arg_padding (machine_mode
, const_tree
);
213 static unsigned int ia64_function_arg_boundary (machine_mode
,
215 static bool ia64_function_ok_for_sibcall (tree
, tree
);
216 static bool ia64_return_in_memory (const_tree
, const_tree
);
217 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
218 static rtx
ia64_libcall_value (machine_mode
, const_rtx
);
219 static bool ia64_function_value_regno_p (const unsigned int);
220 static int ia64_register_move_cost (machine_mode
, reg_class_t
,
222 static int ia64_memory_move_cost (machine_mode mode
, reg_class_t
,
224 static bool ia64_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
225 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
226 static void fix_range (const char *);
227 static struct machine_function
* ia64_init_machine_status (void);
228 static void emit_insn_group_barriers (FILE *);
229 static void emit_all_insn_group_barriers (FILE *);
230 static void final_emit_insn_group_barriers (FILE *);
231 static void emit_predicate_relation_info (void);
232 static void ia64_reorg (void);
233 static bool ia64_in_small_data_p (const_tree
);
234 static void process_epilogue (FILE *, rtx
, bool, bool);
236 static bool ia64_assemble_integer (rtx
, unsigned int, int);
237 static void ia64_output_function_prologue (FILE *);
238 static void ia64_output_function_epilogue (FILE *);
239 static void ia64_output_function_end_prologue (FILE *);
241 static void ia64_print_operand (FILE *, rtx
, int);
242 static void ia64_print_operand_address (FILE *, machine_mode
, rtx
);
243 static bool ia64_print_operand_punct_valid_p (unsigned char code
);
245 static int ia64_issue_rate (void);
246 static int ia64_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, dw_t
);
247 static void ia64_sched_init (FILE *, int, int);
248 static void ia64_sched_init_global (FILE *, int, int);
249 static void ia64_sched_finish_global (FILE *, int);
250 static void ia64_sched_finish (FILE *, int);
251 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn
**, int *, int, int);
252 static int ia64_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
253 static int ia64_sched_reorder2 (FILE *, int, rtx_insn
**, int *, int);
254 static int ia64_variable_issue (FILE *, int, rtx_insn
*, int);
256 static void ia64_asm_unwind_emit (FILE *, rtx_insn
*);
257 static void ia64_asm_emit_except_personality (rtx
);
258 static void ia64_asm_init_sections (void);
260 static enum unwind_info_type
ia64_debug_unwind_info (void);
262 static struct bundle_state
*get_free_bundle_state (void);
263 static void free_bundle_state (struct bundle_state
*);
264 static void initiate_bundle_states (void);
265 static void finish_bundle_states (void);
266 static int insert_bundle_state (struct bundle_state
*);
267 static void initiate_bundle_state_table (void);
268 static void finish_bundle_state_table (void);
269 static int try_issue_nops (struct bundle_state
*, int);
270 static int try_issue_insn (struct bundle_state
*, rtx
);
271 static void issue_nops_and_insn (struct bundle_state
*, int, rtx_insn
*,
273 static int get_max_pos (state_t
);
274 static int get_template (state_t
, int);
276 static rtx_insn
*get_next_important_insn (rtx_insn
*, rtx_insn
*);
277 static bool important_for_bundling_p (rtx_insn
*);
278 static bool unknown_for_bundling_p (rtx_insn
*);
279 static void bundling (FILE *, int, rtx_insn
*, rtx_insn
*);
281 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
282 HOST_WIDE_INT
, tree
);
283 static void ia64_file_start (void);
284 static void ia64_globalize_decl_name (FILE *, tree
);
286 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
287 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
288 static section
*ia64_select_rtx_section (machine_mode
, rtx
,
289 unsigned HOST_WIDE_INT
);
290 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
292 static unsigned int ia64_section_type_flags (tree
, const char *, int);
293 static void ia64_init_libfuncs (void)
295 static void ia64_hpux_init_libfuncs (void)
297 static void ia64_sysv4_init_libfuncs (void)
299 static void ia64_vms_init_libfuncs (void)
301 static void ia64_soft_fp_init_libfuncs (void)
303 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode
)
305 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
308 static bool ia64_attribute_takes_identifier_p (const_tree
);
309 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
310 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
311 static void ia64_encode_section_info (tree
, rtx
, int);
312 static rtx
ia64_struct_value_rtx (tree
, int);
313 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
314 static bool ia64_scalar_mode_supported_p (scalar_mode mode
);
315 static bool ia64_vector_mode_supported_p (machine_mode mode
);
316 static bool ia64_legitimate_constant_p (machine_mode
, rtx
);
317 static bool ia64_legitimate_address_p (machine_mode
, rtx
, bool);
318 static bool ia64_cannot_force_const_mem (machine_mode
, rtx
);
319 static const char *ia64_mangle_type (const_tree
);
320 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
321 static const char *ia64_invalid_unary_op (int, const_tree
);
322 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
323 static machine_mode
ia64_c_mode_for_suffix (char);
324 static void ia64_trampoline_init (rtx
, tree
, rtx
);
325 static void ia64_override_options_after_change (void);
326 static bool ia64_member_type_forces_blk (const_tree
, machine_mode
);
328 static tree
ia64_fold_builtin (tree
, int, tree
*, bool);
329 static tree
ia64_builtin_decl (unsigned, bool);
331 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
332 static machine_mode
ia64_get_reg_raw_mode (int regno
);
333 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
336 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
337 const unsigned char *sel
);
339 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode
);
340 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode
);
341 static bool ia64_modes_tieable_p (machine_mode
, machine_mode
);
343 #define MAX_VECT_LEN 8
345 struct expand_vec_perm_d
347 rtx target
, op0
, op1
;
348 unsigned char perm
[MAX_VECT_LEN
];
355 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
);
358 /* Table of valid machine attributes. */
359 static const struct attribute_spec ia64_attribute_table
[] =
361 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
362 affects_type_identity } */
363 { "syscall_linkage", 0, 0, false, true, true, NULL
, false },
364 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
,
366 #if TARGET_ABI_OPEN_VMS
367 { "common_object", 1, 1, true, false, false,
368 ia64_vms_common_object_attribute
, false },
370 { "version_id", 1, 1, true, false, false,
371 ia64_handle_version_id_attribute
, false },
372 { NULL
, 0, 0, false, false, false, NULL
, false }
375 /* Initialize the GCC target structure. */
376 #undef TARGET_ATTRIBUTE_TABLE
377 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
379 #undef TARGET_INIT_BUILTINS
380 #define TARGET_INIT_BUILTINS ia64_init_builtins
382 #undef TARGET_FOLD_BUILTIN
383 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
385 #undef TARGET_EXPAND_BUILTIN
386 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
388 #undef TARGET_BUILTIN_DECL
389 #define TARGET_BUILTIN_DECL ia64_builtin_decl
391 #undef TARGET_ASM_BYTE_OP
392 #define TARGET_ASM_BYTE_OP "\tdata1\t"
393 #undef TARGET_ASM_ALIGNED_HI_OP
394 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
395 #undef TARGET_ASM_ALIGNED_SI_OP
396 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
397 #undef TARGET_ASM_ALIGNED_DI_OP
398 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
399 #undef TARGET_ASM_UNALIGNED_HI_OP
400 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
401 #undef TARGET_ASM_UNALIGNED_SI_OP
402 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
403 #undef TARGET_ASM_UNALIGNED_DI_OP
404 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
405 #undef TARGET_ASM_INTEGER
406 #define TARGET_ASM_INTEGER ia64_assemble_integer
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE ia64_option_override
411 #undef TARGET_ASM_FUNCTION_PROLOGUE
412 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
413 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
414 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
415 #undef TARGET_ASM_FUNCTION_EPILOGUE
416 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
418 #undef TARGET_PRINT_OPERAND
419 #define TARGET_PRINT_OPERAND ia64_print_operand
420 #undef TARGET_PRINT_OPERAND_ADDRESS
421 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
422 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
423 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
425 #undef TARGET_IN_SMALL_DATA_P
426 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
428 #undef TARGET_SCHED_ADJUST_COST
429 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
430 #undef TARGET_SCHED_ISSUE_RATE
431 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
432 #undef TARGET_SCHED_VARIABLE_ISSUE
433 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
434 #undef TARGET_SCHED_INIT
435 #define TARGET_SCHED_INIT ia64_sched_init
436 #undef TARGET_SCHED_FINISH
437 #define TARGET_SCHED_FINISH ia64_sched_finish
438 #undef TARGET_SCHED_INIT_GLOBAL
439 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
440 #undef TARGET_SCHED_FINISH_GLOBAL
441 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
442 #undef TARGET_SCHED_REORDER
443 #define TARGET_SCHED_REORDER ia64_sched_reorder
444 #undef TARGET_SCHED_REORDER2
445 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
447 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
448 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
450 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
451 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
453 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
454 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
455 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
456 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
458 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
459 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
460 ia64_first_cycle_multipass_dfa_lookahead_guard
462 #undef TARGET_SCHED_DFA_NEW_CYCLE
463 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
465 #undef TARGET_SCHED_H_I_D_EXTENDED
466 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
468 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
469 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
471 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
472 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
474 #undef TARGET_SCHED_SET_SCHED_CONTEXT
475 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
477 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
478 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
480 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
481 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
483 #undef TARGET_SCHED_SET_SCHED_FLAGS
484 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
486 #undef TARGET_SCHED_GET_INSN_SPEC_DS
487 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
489 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
490 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
492 #undef TARGET_SCHED_SPECULATE_INSN
493 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
495 #undef TARGET_SCHED_NEEDS_BLOCK_P
496 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
498 #undef TARGET_SCHED_GEN_SPEC_CHECK
499 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
501 #undef TARGET_SCHED_SKIP_RTX_P
502 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
504 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
505 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
506 #undef TARGET_ARG_PARTIAL_BYTES
507 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
508 #undef TARGET_FUNCTION_ARG
509 #define TARGET_FUNCTION_ARG ia64_function_arg
510 #undef TARGET_FUNCTION_INCOMING_ARG
511 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_PADDING
515 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
516 #undef TARGET_FUNCTION_ARG_BOUNDARY
517 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
519 #undef TARGET_ASM_OUTPUT_MI_THUNK
520 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
521 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
522 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
524 #undef TARGET_ASM_FILE_START
525 #define TARGET_ASM_FILE_START ia64_file_start
527 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
528 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
530 #undef TARGET_REGISTER_MOVE_COST
531 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
532 #undef TARGET_MEMORY_MOVE_COST
533 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
534 #undef TARGET_RTX_COSTS
535 #define TARGET_RTX_COSTS ia64_rtx_costs
536 #undef TARGET_ADDRESS_COST
537 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
539 #undef TARGET_UNSPEC_MAY_TRAP_P
540 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
542 #undef TARGET_MACHINE_DEPENDENT_REORG
543 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
545 #undef TARGET_ENCODE_SECTION_INFO
546 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
548 #undef TARGET_SECTION_TYPE_FLAGS
549 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
552 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
553 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
556 /* ??? Investigate. */
558 #undef TARGET_PROMOTE_PROTOTYPES
559 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
562 #undef TARGET_FUNCTION_VALUE
563 #define TARGET_FUNCTION_VALUE ia64_function_value
564 #undef TARGET_LIBCALL_VALUE
565 #define TARGET_LIBCALL_VALUE ia64_libcall_value
566 #undef TARGET_FUNCTION_VALUE_REGNO_P
567 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
569 #undef TARGET_STRUCT_VALUE_RTX
570 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
571 #undef TARGET_RETURN_IN_MEMORY
572 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
573 #undef TARGET_SETUP_INCOMING_VARARGS
574 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
575 #undef TARGET_STRICT_ARGUMENT_NAMING
576 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
577 #undef TARGET_MUST_PASS_IN_STACK
578 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
579 #undef TARGET_GET_RAW_RESULT_MODE
580 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
581 #undef TARGET_GET_RAW_ARG_MODE
582 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
584 #undef TARGET_MEMBER_TYPE_FORCES_BLK
585 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
587 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
588 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
590 #undef TARGET_ASM_UNWIND_EMIT
591 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
592 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
593 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
594 #undef TARGET_ASM_INIT_SECTIONS
595 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
597 #undef TARGET_DEBUG_UNWIND_INFO
598 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
600 #undef TARGET_SCALAR_MODE_SUPPORTED_P
601 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
602 #undef TARGET_VECTOR_MODE_SUPPORTED_P
603 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
605 #undef TARGET_LEGITIMATE_CONSTANT_P
606 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
611 #define TARGET_LRA_P hook_bool_void_false
613 #undef TARGET_CANNOT_FORCE_CONST_MEM
614 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
616 #undef TARGET_MANGLE_TYPE
617 #define TARGET_MANGLE_TYPE ia64_mangle_type
619 #undef TARGET_INVALID_CONVERSION
620 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
621 #undef TARGET_INVALID_UNARY_OP
622 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
623 #undef TARGET_INVALID_BINARY_OP
624 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
626 #undef TARGET_C_MODE_FOR_SUFFIX
627 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
629 #undef TARGET_CAN_ELIMINATE
630 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
632 #undef TARGET_TRAMPOLINE_INIT
633 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
635 #undef TARGET_CAN_USE_DOLOOP_P
636 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
637 #undef TARGET_INVALID_WITHIN_DOLOOP
638 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
640 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
641 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
643 #undef TARGET_PREFERRED_RELOAD_CLASS
644 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
646 #undef TARGET_DELAY_SCHED2
647 #define TARGET_DELAY_SCHED2 true
649 /* Variable tracking should be run after all optimizations which
650 change order of insns. It also needs a valid CFG. */
651 #undef TARGET_DELAY_VARTRACK
652 #define TARGET_DELAY_VARTRACK true
654 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
655 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
657 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
658 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
660 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
661 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
663 #undef TARGET_HARD_REGNO_NREGS
664 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
665 #undef TARGET_HARD_REGNO_MODE_OK
666 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
668 #undef TARGET_MODES_TIEABLE_P
669 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
671 struct gcc_target targetm
= TARGET_INITIALIZER
;
673 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
674 identifier as an argument, so the front end shouldn't look it up. */
677 ia64_attribute_takes_identifier_p (const_tree attr_id
)
679 if (is_attribute_p ("model", attr_id
))
681 #if TARGET_ABI_OPEN_VMS
682 if (is_attribute_p ("common_object", attr_id
))
690 ADDR_AREA_NORMAL
, /* normal address area */
691 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
695 static GTY(()) tree small_ident1
;
696 static GTY(()) tree small_ident2
;
701 if (small_ident1
== 0)
703 small_ident1
= get_identifier ("small");
704 small_ident2
= get_identifier ("__small__");
708 /* Retrieve the address area that has been chosen for the given decl. */
710 static ia64_addr_area
711 ia64_get_addr_area (tree decl
)
715 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
721 id
= TREE_VALUE (TREE_VALUE (model_attr
));
722 if (id
== small_ident1
|| id
== small_ident2
)
723 return ADDR_AREA_SMALL
;
725 return ADDR_AREA_NORMAL
;
729 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
730 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
732 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
734 tree arg
, decl
= *node
;
737 arg
= TREE_VALUE (args
);
738 if (arg
== small_ident1
|| arg
== small_ident2
)
740 addr_area
= ADDR_AREA_SMALL
;
744 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
746 *no_add_attrs
= true;
749 switch (TREE_CODE (decl
))
752 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
754 && !TREE_STATIC (decl
))
756 error_at (DECL_SOURCE_LOCATION (decl
),
757 "an address area attribute cannot be specified for "
759 *no_add_attrs
= true;
761 area
= ia64_get_addr_area (decl
);
762 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
764 error ("address area of %q+D conflicts with previous "
765 "declaration", decl
);
766 *no_add_attrs
= true;
771 error_at (DECL_SOURCE_LOCATION (decl
),
772 "address area attribute cannot be specified for "
774 *no_add_attrs
= true;
778 warning (OPT_Wattributes
, "%qE attribute ignored",
780 *no_add_attrs
= true;
787 /* Part of the low level implementation of DEC Ada pragma Common_Object which
788 enables the shared use of variables stored in overlaid linker areas
789 corresponding to the use of Fortran COMMON. */
792 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
793 int flags ATTRIBUTE_UNUSED
,
799 gcc_assert (DECL_P (decl
));
801 DECL_COMMON (decl
) = 1;
802 id
= TREE_VALUE (args
);
803 if (TREE_CODE (id
) != IDENTIFIER_NODE
&& TREE_CODE (id
) != STRING_CST
)
805 error ("%qE attribute requires a string constant argument", name
);
806 *no_add_attrs
= true;
812 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
815 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
816 unsigned HOST_WIDE_INT size
,
819 tree attr
= DECL_ATTRIBUTES (decl
);
822 attr
= lookup_attribute ("common_object", attr
);
825 tree id
= TREE_VALUE (TREE_VALUE (attr
));
828 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
829 name
= IDENTIFIER_POINTER (id
);
830 else if (TREE_CODE (id
) == STRING_CST
)
831 name
= TREE_STRING_POINTER (id
);
835 fprintf (file
, "\t.vms_common\t\"%s\",", name
);
838 fprintf (file
, "%s", COMMON_ASM_OP
);
840 /* Code from elfos.h. */
841 assemble_name (file
, name
);
842 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u",
843 size
, align
/ BITS_PER_UNIT
);
849 ia64_encode_addr_area (tree decl
, rtx symbol
)
853 flags
= SYMBOL_REF_FLAGS (symbol
);
854 switch (ia64_get_addr_area (decl
))
856 case ADDR_AREA_NORMAL
: break;
857 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
858 default: gcc_unreachable ();
860 SYMBOL_REF_FLAGS (symbol
) = flags
;
864 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
866 default_encode_section_info (decl
, rtl
, first
);
868 /* Careful not to prod global register variables. */
869 if (TREE_CODE (decl
) == VAR_DECL
870 && GET_CODE (DECL_RTL (decl
)) == MEM
871 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
872 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
873 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
876 /* Return 1 if the operands of a move are ok. */
879 ia64_move_ok (rtx dst
, rtx src
)
881 /* If we're under init_recog_no_volatile, we'll not be able to use
882 memory_operand. So check the code directly and don't worry about
883 the validity of the underlying address, which should have been
884 checked elsewhere anyway. */
885 if (GET_CODE (dst
) != MEM
)
887 if (GET_CODE (src
) == MEM
)
889 if (register_operand (src
, VOIDmode
))
892 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
893 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
894 return src
== const0_rtx
;
896 return satisfies_constraint_G (src
);
899 /* Return 1 if the operands are ok for a floating point load pair. */
902 ia64_load_pair_ok (rtx dst
, rtx src
)
904 /* ??? There is a thinko in the implementation of the "x" constraint and the
905 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
906 also return false for it. */
907 if (GET_CODE (dst
) != REG
908 || !(FP_REGNO_P (REGNO (dst
)) && FP_REGNO_P (REGNO (dst
) + 1)))
910 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
912 switch (GET_CODE (XEXP (src
, 0)))
921 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
923 if (GET_CODE (adjust
) != CONST_INT
924 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
935 addp4_optimize_ok (rtx op1
, rtx op2
)
937 return (basereg_operand (op1
, GET_MODE(op1
)) !=
938 basereg_operand (op2
, GET_MODE(op2
)));
941 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
942 Return the length of the field, or <= 0 on failure. */
945 ia64_depz_field_mask (rtx rop
, rtx rshift
)
947 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
948 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
950 /* Get rid of the zero bits we're shifting in. */
953 /* We must now have a solid block of 1's at bit 0. */
954 return exact_log2 (op
+ 1);
957 /* Return the TLS model to use for ADDR. */
959 static enum tls_model
960 tls_symbolic_operand_type (rtx addr
)
962 enum tls_model tls_kind
= TLS_MODEL_NONE
;
964 if (GET_CODE (addr
) == CONST
)
966 if (GET_CODE (XEXP (addr
, 0)) == PLUS
967 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
968 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
970 else if (GET_CODE (addr
) == SYMBOL_REF
)
971 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
976 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
977 as a base register. */
980 ia64_reg_ok_for_base_p (const_rtx reg
, bool strict
)
983 && REGNO_OK_FOR_BASE_P (REGNO (reg
)))
986 && (GENERAL_REGNO_P (REGNO (reg
))
987 || !HARD_REGISTER_P (reg
)))
994 ia64_legitimate_address_reg (const_rtx reg
, bool strict
)
996 if ((REG_P (reg
) && ia64_reg_ok_for_base_p (reg
, strict
))
997 || (GET_CODE (reg
) == SUBREG
&& REG_P (XEXP (reg
, 0))
998 && ia64_reg_ok_for_base_p (XEXP (reg
, 0), strict
)))
1005 ia64_legitimate_address_disp (const_rtx reg
, const_rtx disp
, bool strict
)
1007 if (GET_CODE (disp
) == PLUS
1008 && rtx_equal_p (reg
, XEXP (disp
, 0))
1009 && (ia64_legitimate_address_reg (XEXP (disp
, 1), strict
)
1010 || (CONST_INT_P (XEXP (disp
, 1))
1011 && IN_RANGE (INTVAL (XEXP (disp
, 1)), -256, 255))))
1017 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1020 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED
,
1023 if (ia64_legitimate_address_reg (x
, strict
))
1025 else if ((GET_CODE (x
) == POST_INC
|| GET_CODE (x
) == POST_DEC
)
1026 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1027 && XEXP (x
, 0) != arg_pointer_rtx
)
1029 else if (GET_CODE (x
) == POST_MODIFY
1030 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1031 && XEXP (x
, 0) != arg_pointer_rtx
1032 && ia64_legitimate_address_disp (XEXP (x
, 0), XEXP (x
, 1), strict
))
1038 /* Return true if X is a constant that is valid for some immediate
1039 field in an instruction. */
1042 ia64_legitimate_constant_p (machine_mode mode
, rtx x
)
1044 switch (GET_CODE (x
))
1051 if (GET_MODE (x
) == VOIDmode
|| mode
== SFmode
|| mode
== DFmode
)
1053 return satisfies_constraint_G (x
);
1057 /* ??? Short term workaround for PR 28490. We must make the code here
1058 match the code in ia64_expand_move and move_operand, even though they
1059 are both technically wrong. */
1060 if (tls_symbolic_operand_type (x
) == 0)
1062 HOST_WIDE_INT addend
= 0;
1065 if (GET_CODE (op
) == CONST
1066 && GET_CODE (XEXP (op
, 0)) == PLUS
1067 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1069 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
1070 op
= XEXP (XEXP (op
, 0), 0);
1073 if (any_offset_symbol_operand (op
, mode
)
1074 || function_operand (op
, mode
))
1076 if (aligned_offset_symbol_operand (op
, mode
))
1077 return (addend
& 0x3fff) == 0;
1083 if (mode
== V2SFmode
)
1084 return satisfies_constraint_Y (x
);
1086 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1087 && GET_MODE_SIZE (mode
) <= 8);
1094 /* Don't allow TLS addresses to get spilled to memory. */
1097 ia64_cannot_force_const_mem (machine_mode mode
, rtx x
)
1101 return tls_symbolic_operand_type (x
) != 0;
1104 /* Expand a symbolic constant load. */
1107 ia64_expand_load_address (rtx dest
, rtx src
)
1109 gcc_assert (GET_CODE (dest
) == REG
);
1111 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1112 having to pointer-extend the value afterward. Other forms of address
1113 computation below are also more natural to compute as 64-bit quantities.
1114 If we've been given an SImode destination register, change it. */
1115 if (GET_MODE (dest
) != Pmode
)
1116 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1117 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1121 if (small_addr_symbolic_operand (src
, VOIDmode
))
1124 if (TARGET_AUTO_PIC
)
1125 emit_insn (gen_load_gprel64 (dest
, src
));
1126 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1127 emit_insn (gen_load_fptr (dest
, src
));
1128 else if (sdata_symbolic_operand (src
, VOIDmode
))
1129 emit_insn (gen_load_gprel (dest
, src
));
1130 else if (local_symbolic_operand64 (src
, VOIDmode
))
1132 /* We want to use @gprel rather than @ltoff relocations for local
1134 - @gprel does not require dynamic linker
1135 - and does not use .sdata section
1136 https://gcc.gnu.org/bugzilla/60465 */
1137 emit_insn (gen_load_gprel64 (dest
, src
));
1141 HOST_WIDE_INT addend
= 0;
1144 /* We did split constant offsets in ia64_expand_move, and we did try
1145 to keep them split in move_operand, but we also allowed reload to
1146 rematerialize arbitrary constants rather than spill the value to
1147 the stack and reload it. So we have to be prepared here to split
1148 them apart again. */
1149 if (GET_CODE (src
) == CONST
)
1151 HOST_WIDE_INT hi
, lo
;
1153 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1154 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1160 src
= plus_constant (Pmode
, XEXP (XEXP (src
, 0), 0), hi
);
1164 tmp
= gen_rtx_HIGH (Pmode
, src
);
1165 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1166 emit_insn (gen_rtx_SET (dest
, tmp
));
1168 tmp
= gen_rtx_LO_SUM (Pmode
, gen_const_mem (Pmode
, dest
), src
);
1169 emit_insn (gen_rtx_SET (dest
, tmp
));
1173 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1174 emit_insn (gen_rtx_SET (dest
, tmp
));
1181 static GTY(()) rtx gen_tls_tga
;
1183 gen_tls_get_addr (void)
1186 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1190 static GTY(()) rtx thread_pointer_rtx
;
1192 gen_thread_pointer (void)
1194 if (!thread_pointer_rtx
)
1195 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1196 return thread_pointer_rtx
;
1200 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1201 rtx orig_op1
, HOST_WIDE_INT addend
)
1203 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
;
1206 HOST_WIDE_INT addend_lo
, addend_hi
;
1210 case TLS_MODEL_GLOBAL_DYNAMIC
:
1213 tga_op1
= gen_reg_rtx (Pmode
);
1214 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1216 tga_op2
= gen_reg_rtx (Pmode
);
1217 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1219 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1221 tga_op1
, Pmode
, tga_op2
, Pmode
);
1223 insns
= get_insns ();
1226 if (GET_MODE (op0
) != Pmode
)
1228 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1231 case TLS_MODEL_LOCAL_DYNAMIC
:
1232 /* ??? This isn't the completely proper way to do local-dynamic
1233 If the call to __tls_get_addr is used only by a single symbol,
1234 then we should (somehow) move the dtprel to the second arg
1235 to avoid the extra add. */
1238 tga_op1
= gen_reg_rtx (Pmode
);
1239 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1241 tga_op2
= const0_rtx
;
1243 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1245 tga_op1
, Pmode
, tga_op2
, Pmode
);
1247 insns
= get_insns ();
1250 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1252 tmp
= gen_reg_rtx (Pmode
);
1253 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1255 if (!register_operand (op0
, Pmode
))
1256 op0
= gen_reg_rtx (Pmode
);
1259 emit_insn (gen_load_dtprel (op0
, op1
));
1260 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1263 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1266 case TLS_MODEL_INITIAL_EXEC
:
1267 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1268 addend_hi
= addend
- addend_lo
;
1270 op1
= plus_constant (Pmode
, op1
, addend_hi
);
1273 tmp
= gen_reg_rtx (Pmode
);
1274 emit_insn (gen_load_tprel (tmp
, op1
));
1276 if (!register_operand (op0
, Pmode
))
1277 op0
= gen_reg_rtx (Pmode
);
1278 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1281 case TLS_MODEL_LOCAL_EXEC
:
1282 if (!register_operand (op0
, Pmode
))
1283 op0
= gen_reg_rtx (Pmode
);
1289 emit_insn (gen_load_tprel (op0
, op1
));
1290 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1293 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1301 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1302 orig_op0
, 1, OPTAB_DIRECT
);
1303 if (orig_op0
== op0
)
1305 if (GET_MODE (orig_op0
) == Pmode
)
1307 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1311 ia64_expand_move (rtx op0
, rtx op1
)
1313 machine_mode mode
= GET_MODE (op0
);
1315 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1316 op1
= force_reg (mode
, op1
);
1318 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1320 HOST_WIDE_INT addend
= 0;
1321 enum tls_model tls_kind
;
1324 if (GET_CODE (op1
) == CONST
1325 && GET_CODE (XEXP (op1
, 0)) == PLUS
1326 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1328 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1329 sym
= XEXP (XEXP (op1
, 0), 0);
1332 tls_kind
= tls_symbolic_operand_type (sym
);
1334 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1336 if (any_offset_symbol_operand (sym
, mode
))
1338 else if (aligned_offset_symbol_operand (sym
, mode
))
1340 HOST_WIDE_INT addend_lo
, addend_hi
;
1342 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1343 addend_hi
= addend
- addend_lo
;
1347 op1
= plus_constant (mode
, sym
, addend_hi
);
1356 if (reload_completed
)
1358 /* We really should have taken care of this offset earlier. */
1359 gcc_assert (addend
== 0);
1360 if (ia64_expand_load_address (op0
, op1
))
1366 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1368 emit_insn (gen_rtx_SET (subtarget
, op1
));
1370 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1371 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1380 /* Split a move from OP1 to OP0 conditional on COND. */
1383 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1385 rtx_insn
*insn
, *first
= get_last_insn ();
1387 emit_move_insn (op0
, op1
);
1389 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1391 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1395 /* Split a post-reload TImode or TFmode reference into two DImode
1396 components. This is made extra difficult by the fact that we do
1397 not get any scratch registers to work with, because reload cannot
1398 be prevented from giving us a scratch that overlaps the register
1399 pair involved. So instead, when addressing memory, we tweak the
1400 pointer register up and back down with POST_INCs. Or up and not
1401 back down when we can get away with it.
1403 REVERSED is true when the loads must be done in reversed order
1404 (high word first) for correctness. DEAD is true when the pointer
1405 dies with the second insn we generate and therefore the second
1406 address must not carry a postmodify.
1408 May return an insn which is to be emitted after the moves. */
1411 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1415 switch (GET_CODE (in
))
1418 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1419 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1424 /* Cannot occur reversed. */
1425 gcc_assert (!reversed
);
1427 if (GET_MODE (in
) != TFmode
)
1428 split_double (in
, &out
[0], &out
[1]);
1430 /* split_double does not understand how to split a TFmode
1431 quantity into a pair of DImode constants. */
1433 unsigned HOST_WIDE_INT p
[2];
1434 long l
[4]; /* TFmode is 128 bits */
1436 real_to_target (l
, CONST_DOUBLE_REAL_VALUE (in
), TFmode
);
1438 if (FLOAT_WORDS_BIG_ENDIAN
)
1440 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1441 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1445 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1446 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1448 out
[0] = GEN_INT (p
[0]);
1449 out
[1] = GEN_INT (p
[1]);
1455 rtx base
= XEXP (in
, 0);
1458 switch (GET_CODE (base
))
1463 out
[0] = adjust_automodify_address
1464 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1465 out
[1] = adjust_automodify_address
1466 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1470 /* Reversal requires a pre-increment, which can only
1471 be done as a separate insn. */
1472 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1473 out
[0] = adjust_automodify_address
1474 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1475 out
[1] = adjust_address (in
, DImode
, 0);
1480 gcc_assert (!reversed
&& !dead
);
1482 /* Just do the increment in two steps. */
1483 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1484 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1488 gcc_assert (!reversed
&& !dead
);
1490 /* Add 8, subtract 24. */
1491 base
= XEXP (base
, 0);
1492 out
[0] = adjust_automodify_address
1493 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1494 out
[1] = adjust_automodify_address
1496 gen_rtx_POST_MODIFY (Pmode
, base
,
1497 plus_constant (Pmode
, base
, -24)),
1502 gcc_assert (!reversed
&& !dead
);
1504 /* Extract and adjust the modification. This case is
1505 trickier than the others, because we might have an
1506 index register, or we might have a combined offset that
1507 doesn't fit a signed 9-bit displacement field. We can
1508 assume the incoming expression is already legitimate. */
1509 offset
= XEXP (base
, 1);
1510 base
= XEXP (base
, 0);
1512 out
[0] = adjust_automodify_address
1513 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1515 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1517 /* Can't adjust the postmodify to match. Emit the
1518 original, then a separate addition insn. */
1519 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1520 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1524 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1525 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1527 /* Again the postmodify cannot be made to match,
1528 but in this case it's more efficient to get rid
1529 of the postmodify entirely and fix up with an
1531 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1533 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1537 /* Combined offset still fits in the displacement field.
1538 (We cannot overflow it at the high end.) */
1539 out
[1] = adjust_automodify_address
1540 (in
, DImode
, gen_rtx_POST_MODIFY
1541 (Pmode
, base
, gen_rtx_PLUS
1543 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1562 /* Split a TImode or TFmode move instruction after reload.
1563 This is used by *movtf_internal and *movti_internal. */
1565 ia64_split_tmode_move (rtx operands
[])
1567 rtx in
[2], out
[2], insn
;
1570 bool reversed
= false;
1572 /* It is possible for reload to decide to overwrite a pointer with
1573 the value it points to. In that case we have to do the loads in
1574 the appropriate order so that the pointer is not destroyed too
1575 early. Also we must not generate a postmodify for that second
1576 load, or rws_access_regno will die. And we must not generate a
1577 postmodify for the second load if the destination register
1578 overlaps with the base register. */
1579 if (GET_CODE (operands
[1]) == MEM
1580 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1582 rtx base
= XEXP (operands
[1], 0);
1583 while (GET_CODE (base
) != REG
)
1584 base
= XEXP (base
, 0);
1586 if (REGNO (base
) == REGNO (operands
[0]))
1589 if (refers_to_regno_p (REGNO (operands
[0]),
1590 REGNO (operands
[0])+2,
1594 /* Another reason to do the moves in reversed order is if the first
1595 element of the target register pair is also the second element of
1596 the source register pair. */
1597 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1598 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1601 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1602 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1604 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1605 if (GET_CODE (EXP) == MEM \
1606 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1607 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1608 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1609 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1611 insn
= emit_insn (gen_rtx_SET (out
[0], in
[0]));
1612 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1613 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1615 insn
= emit_insn (gen_rtx_SET (out
[1], in
[1]));
1616 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1617 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1620 emit_insn (fixup
[0]);
1622 emit_insn (fixup
[1]);
1624 #undef MAYBE_ADD_REG_INC_NOTE
1627 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1628 through memory plus an extra GR scratch register. Except that you can
1629 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1630 SECONDARY_RELOAD_CLASS, but not both.
1632 We got into problems in the first place by allowing a construct like
1633 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1634 This solution attempts to prevent this situation from occurring. When
1635 we see something like the above, we spill the inner register to memory. */
1638 spill_xfmode_rfmode_operand (rtx in
, int force
, machine_mode mode
)
1640 if (GET_CODE (in
) == SUBREG
1641 && GET_MODE (SUBREG_REG (in
)) == TImode
1642 && GET_CODE (SUBREG_REG (in
)) == REG
)
1644 rtx memt
= assign_stack_temp (TImode
, 16);
1645 emit_move_insn (memt
, SUBREG_REG (in
));
1646 return adjust_address (memt
, mode
, 0);
1648 else if (force
&& GET_CODE (in
) == REG
)
1650 rtx memx
= assign_stack_temp (mode
, 16);
1651 emit_move_insn (memx
, in
);
1658 /* Expand the movxf or movrf pattern (MODE says which) with the given
1659 OPERANDS, returning true if the pattern should then invoke
1663 ia64_expand_movxf_movrf (machine_mode mode
, rtx operands
[])
1665 rtx op0
= operands
[0];
1667 if (GET_CODE (op0
) == SUBREG
)
1668 op0
= SUBREG_REG (op0
);
1670 /* We must support XFmode loads into general registers for stdarg/vararg,
1671 unprototyped calls, and a rare case where a long double is passed as
1672 an argument after a float HFA fills the FP registers. We split them into
1673 DImode loads for convenience. We also need to support XFmode stores
1674 for the last case. This case does not happen for stdarg/vararg routines,
1675 because we do a block store to memory of unnamed arguments. */
1677 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1681 /* We're hoping to transform everything that deals with XFmode
1682 quantities and GR registers early in the compiler. */
1683 gcc_assert (can_create_pseudo_p ());
1685 /* Struct to register can just use TImode instead. */
1686 if ((GET_CODE (operands
[1]) == SUBREG
1687 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1688 || (GET_CODE (operands
[1]) == REG
1689 && GR_REGNO_P (REGNO (operands
[1]))))
1691 rtx op1
= operands
[1];
1693 if (GET_CODE (op1
) == SUBREG
)
1694 op1
= SUBREG_REG (op1
);
1696 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1698 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1702 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1704 /* Don't word-swap when reading in the constant. */
1705 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1706 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1708 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1709 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1714 /* If the quantity is in a register not known to be GR, spill it. */
1715 if (register_operand (operands
[1], mode
))
1716 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1718 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1720 /* Don't word-swap when reading in the value. */
1721 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1722 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1724 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1725 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1729 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1731 /* We're hoping to transform everything that deals with XFmode
1732 quantities and GR registers early in the compiler. */
1733 gcc_assert (can_create_pseudo_p ());
1735 /* Op0 can't be a GR_REG here, as that case is handled above.
1736 If op0 is a register, then we spill op1, so that we now have a
1737 MEM operand. This requires creating an XFmode subreg of a TImode reg
1738 to force the spill. */
1739 if (register_operand (operands
[0], mode
))
1741 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1742 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1743 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1750 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1752 /* Don't word-swap when writing out the value. */
1753 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1754 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1756 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1757 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1762 if (!reload_in_progress
&& !reload_completed
)
1764 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1766 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1768 rtx memt
, memx
, in
= operands
[1];
1769 if (CONSTANT_P (in
))
1770 in
= validize_mem (force_const_mem (mode
, in
));
1771 if (GET_CODE (in
) == MEM
)
1772 memt
= adjust_address (in
, TImode
, 0);
1775 memt
= assign_stack_temp (TImode
, 16);
1776 memx
= adjust_address (memt
, mode
, 0);
1777 emit_move_insn (memx
, in
);
1779 emit_move_insn (op0
, memt
);
1783 if (!ia64_move_ok (operands
[0], operands
[1]))
1784 operands
[1] = force_reg (mode
, operands
[1]);
1790 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1791 with the expression that holds the compare result (in VOIDmode). */
1793 static GTY(()) rtx cmptf_libfunc
;
1796 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1798 enum rtx_code code
= GET_CODE (*expr
);
1801 /* If we have a BImode input, then we already have a compare result, and
1802 do not need to emit another comparison. */
1803 if (GET_MODE (*op0
) == BImode
)
1805 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1808 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1809 magic number as its third argument, that indicates what to do.
1810 The return value is an integer to be compared against zero. */
1811 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1814 QCMP_INV
= 1, /* Raise FP_INVALID on NaNs as a side effect. */
1821 enum rtx_code ncode
;
1824 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1827 /* 1 = equal, 0 = not equal. Equality operators do
1828 not raise FP_INVALID when given a NaN operand. */
1829 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1830 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1831 /* isunordered() from C99. */
1832 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1833 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1834 /* Relational operators raise FP_INVALID when given
1836 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1837 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1838 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1839 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1840 /* Unordered relational operators do not raise FP_INVALID
1841 when given a NaN operand. */
1842 case UNLT
: magic
= QCMP_LT
|QCMP_UNORD
; ncode
= NE
; break;
1843 case UNLE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1844 case UNGT
: magic
= QCMP_GT
|QCMP_UNORD
; ncode
= NE
; break;
1845 case UNGE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1846 /* Not supported. */
1849 default: gcc_unreachable ();
1854 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
,
1855 *op0
, TFmode
, *op1
, TFmode
,
1856 GEN_INT (magic
), DImode
);
1857 cmp
= gen_reg_rtx (BImode
);
1858 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (ncode
, BImode
,
1861 rtx_insn
*insns
= get_insns ();
1864 emit_libcall_block (insns
, cmp
, cmp
,
1865 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1870 cmp
= gen_reg_rtx (BImode
);
1871 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1875 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1880 /* Generate an integral vector comparison. Return true if the condition has
1881 been reversed, and so the sense of the comparison should be inverted. */
1884 ia64_expand_vecint_compare (enum rtx_code code
, machine_mode mode
,
1885 rtx dest
, rtx op0
, rtx op1
)
1887 bool negate
= false;
1890 /* Canonicalize the comparison to EQ, GT, GTU. */
1901 code
= reverse_condition (code
);
1907 code
= reverse_condition (code
);
1913 code
= swap_condition (code
);
1914 x
= op0
, op0
= op1
, op1
= x
;
1921 /* Unsigned parallel compare is not supported by the hardware. Play some
1922 tricks to turn this into a signed comparison against 0. */
1931 /* Subtract (-(INT MAX) - 1) from both operands to make
1933 mask
= gen_int_mode (0x80000000, SImode
);
1934 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1935 mask
= force_reg (mode
, mask
);
1936 t1
= gen_reg_rtx (mode
);
1937 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1938 t2
= gen_reg_rtx (mode
);
1939 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1948 /* Perform a parallel unsigned saturating subtraction. */
1949 x
= gen_reg_rtx (mode
);
1950 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, op0
, op1
)));
1954 op1
= CONST0_RTX (mode
);
1963 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1964 emit_insn (gen_rtx_SET (dest
, x
));
1969 /* Emit an integral vector conditional move. */
1972 ia64_expand_vecint_cmov (rtx operands
[])
1974 machine_mode mode
= GET_MODE (operands
[0]);
1975 enum rtx_code code
= GET_CODE (operands
[3]);
1979 cmp
= gen_reg_rtx (mode
);
1980 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1981 operands
[4], operands
[5]);
1983 ot
= operands
[1+negate
];
1984 of
= operands
[2-negate
];
1986 if (ot
== CONST0_RTX (mode
))
1988 if (of
== CONST0_RTX (mode
))
1990 emit_move_insn (operands
[0], ot
);
1994 x
= gen_rtx_NOT (mode
, cmp
);
1995 x
= gen_rtx_AND (mode
, x
, of
);
1996 emit_insn (gen_rtx_SET (operands
[0], x
));
1998 else if (of
== CONST0_RTX (mode
))
2000 x
= gen_rtx_AND (mode
, cmp
, ot
);
2001 emit_insn (gen_rtx_SET (operands
[0], x
));
2007 t
= gen_reg_rtx (mode
);
2008 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
2009 emit_insn (gen_rtx_SET (t
, x
));
2011 f
= gen_reg_rtx (mode
);
2012 x
= gen_rtx_NOT (mode
, cmp
);
2013 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
2014 emit_insn (gen_rtx_SET (f
, x
));
2016 x
= gen_rtx_IOR (mode
, t
, f
);
2017 emit_insn (gen_rtx_SET (operands
[0], x
));
2021 /* Emit an integral vector min or max operation. Return true if all done. */
2024 ia64_expand_vecint_minmax (enum rtx_code code
, machine_mode mode
,
2029 /* These four combinations are supported directly. */
2030 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
2032 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
2035 /* This combination can be implemented with only saturating subtraction. */
2036 if (mode
== V4HImode
&& code
== UMAX
)
2038 rtx x
, tmp
= gen_reg_rtx (mode
);
2040 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
2041 emit_insn (gen_rtx_SET (tmp
, x
));
2043 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
2047 /* Everything else implemented via vector comparisons. */
2048 xops
[0] = operands
[0];
2049 xops
[4] = xops
[1] = operands
[1];
2050 xops
[5] = xops
[2] = operands
[2];
2069 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
2071 ia64_expand_vecint_cmov (xops
);
2075 /* The vectors LO and HI each contain N halves of a double-wide vector.
2076 Reassemble either the first N/2 or the second N/2 elements. */
2079 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
2081 machine_mode vmode
= GET_MODE (lo
);
2082 unsigned int i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
2083 struct expand_vec_perm_d d
;
2086 d
.target
= gen_lowpart (vmode
, out
);
2087 d
.op0
= (TARGET_BIG_ENDIAN
? hi
: lo
);
2088 d
.op1
= (TARGET_BIG_ENDIAN
? lo
: hi
);
2091 d
.one_operand_p
= false;
2092 d
.testing_p
= false;
2094 high
= (highp
? nelt
/ 2 : 0);
2095 for (i
= 0; i
< nelt
/ 2; ++i
)
2097 d
.perm
[i
* 2] = i
+ high
;
2098 d
.perm
[i
* 2 + 1] = i
+ high
+ nelt
;
2101 ok
= ia64_expand_vec_perm_const_1 (&d
);
2105 /* Return a vector of the sign-extension of VEC. */
2108 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2110 machine_mode mode
= GET_MODE (vec
);
2111 rtx zero
= CONST0_RTX (mode
);
2117 rtx sign
= gen_reg_rtx (mode
);
2120 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2127 /* Emit an integral vector unpack operation. */
2130 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2132 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2133 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2136 /* Emit an integral vector widening sum operations. */
2139 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2144 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2146 wmode
= GET_MODE (operands
[0]);
2147 l
= gen_reg_rtx (wmode
);
2148 h
= gen_reg_rtx (wmode
);
2150 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2151 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2153 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2154 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2155 if (t
!= operands
[0])
2156 emit_move_insn (operands
[0], t
);
2159 /* Emit the appropriate sequence for a call. */
2162 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2167 addr
= XEXP (addr
, 0);
2168 addr
= convert_memory_address (DImode
, addr
);
2169 b0
= gen_rtx_REG (DImode
, R_BR (0));
2171 /* ??? Should do this for functions known to bind local too. */
2172 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2175 insn
= gen_sibcall_nogp (addr
);
2177 insn
= gen_call_nogp (addr
, b0
);
2179 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2180 insn
= emit_call_insn (insn
);
2185 insn
= gen_sibcall_gp (addr
);
2187 insn
= gen_call_gp (addr
, b0
);
2189 insn
= gen_call_value_gp (retval
, addr
, b0
);
2190 insn
= emit_call_insn (insn
);
2192 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2196 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2198 if (TARGET_ABI_OPEN_VMS
)
2199 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2200 gen_rtx_REG (DImode
, GR_REG (25)));
2204 reg_emitted (enum ia64_frame_regs r
)
2206 if (emitted_frame_related_regs
[r
] == 0)
2207 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2209 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2213 get_reg (enum ia64_frame_regs r
)
2216 return current_frame_info
.r
[r
];
2220 is_emitted (int regno
)
2224 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2225 if (emitted_frame_related_regs
[r
] == regno
)
2231 ia64_reload_gp (void)
2235 if (current_frame_info
.r
[reg_save_gp
])
2237 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2241 HOST_WIDE_INT offset
;
2244 offset
= (current_frame_info
.spill_cfa_off
2245 + current_frame_info
.spill_size
);
2246 if (frame_pointer_needed
)
2248 tmp
= hard_frame_pointer_rtx
;
2253 tmp
= stack_pointer_rtx
;
2254 offset
= current_frame_info
.total_size
- offset
;
2257 offset_r
= GEN_INT (offset
);
2258 if (satisfies_constraint_I (offset_r
))
2259 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2262 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2263 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2264 pic_offset_table_rtx
, tmp
));
2267 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2270 emit_move_insn (pic_offset_table_rtx
, tmp
);
2274 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2275 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2278 bool is_desc
= false;
2280 /* If we find we're calling through a register, then we're actually
2281 calling through a descriptor, so load up the values. */
2282 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2287 /* ??? We are currently constrained to *not* use peep2, because
2288 we can legitimately change the global lifetime of the GP
2289 (in the form of killing where previously live). This is
2290 because a call through a descriptor doesn't use the previous
2291 value of the GP, while a direct call does, and we do not
2292 commit to either form until the split here.
2294 That said, this means that we lack precise life info for
2295 whether ADDR is dead after this call. This is not terribly
2296 important, since we can fix things up essentially for free
2297 with the POST_DEC below, but it's nice to not use it when we
2298 can immediately tell it's not necessary. */
2299 addr_dead_p
= ((noreturn_p
|| sibcall_p
2300 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2302 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2304 /* Load the code address into scratch_b. */
2305 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2306 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2307 emit_move_insn (scratch_r
, tmp
);
2308 emit_move_insn (scratch_b
, scratch_r
);
2310 /* Load the GP address. If ADDR is not dead here, then we must
2311 revert the change made above via the POST_INCREMENT. */
2313 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2316 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2317 emit_move_insn (pic_offset_table_rtx
, tmp
);
2324 insn
= gen_sibcall_nogp (addr
);
2326 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2328 insn
= gen_call_nogp (addr
, retaddr
);
2329 emit_call_insn (insn
);
2331 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2335 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2337 This differs from the generic code in that we know about the zero-extending
2338 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2339 also know that ld.acq+cmpxchg.rel equals a full barrier.
2341 The loop we want to generate looks like
2346 new_reg = cmp_reg op val;
2347 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2348 if (cmp_reg != old_reg)
2351 Note that we only do the plain load from memory once. Subsequent
2352 iterations use the value loaded by the compare-and-swap pattern. */
2355 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2356 rtx old_dst
, rtx new_dst
, enum memmodel model
)
2358 machine_mode mode
= GET_MODE (mem
);
2359 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2360 enum insn_code icode
;
2362 /* Special case for using fetchadd. */
2363 if ((mode
== SImode
|| mode
== DImode
)
2364 && (code
== PLUS
|| code
== MINUS
)
2365 && fetchadd_operand (val
, mode
))
2368 val
= GEN_INT (-INTVAL (val
));
2371 old_dst
= gen_reg_rtx (mode
);
2375 case MEMMODEL_ACQ_REL
:
2376 case MEMMODEL_SEQ_CST
:
2377 case MEMMODEL_SYNC_SEQ_CST
:
2378 emit_insn (gen_memory_barrier ());
2380 case MEMMODEL_RELAXED
:
2381 case MEMMODEL_ACQUIRE
:
2382 case MEMMODEL_SYNC_ACQUIRE
:
2383 case MEMMODEL_CONSUME
:
2385 icode
= CODE_FOR_fetchadd_acq_si
;
2387 icode
= CODE_FOR_fetchadd_acq_di
;
2389 case MEMMODEL_RELEASE
:
2390 case MEMMODEL_SYNC_RELEASE
:
2392 icode
= CODE_FOR_fetchadd_rel_si
;
2394 icode
= CODE_FOR_fetchadd_rel_di
;
2401 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2405 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2407 if (new_reg
!= new_dst
)
2408 emit_move_insn (new_dst
, new_reg
);
2413 /* Because of the volatile mem read, we get an ld.acq, which is the
2414 front half of the full barrier. The end half is the cmpxchg.rel.
2415 For relaxed and release memory models, we don't need this. But we
2416 also don't bother trying to prevent it either. */
2417 gcc_assert (is_mm_relaxed (model
) || is_mm_release (model
)
2418 || MEM_VOLATILE_P (mem
));
2420 old_reg
= gen_reg_rtx (DImode
);
2421 cmp_reg
= gen_reg_rtx (DImode
);
2422 label
= gen_label_rtx ();
2426 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2427 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2430 emit_move_insn (cmp_reg
, mem
);
2434 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2435 emit_move_insn (old_reg
, cmp_reg
);
2436 emit_move_insn (ar_ccv
, cmp_reg
);
2439 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2444 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2445 true, OPTAB_DIRECT
);
2446 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2449 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2450 true, OPTAB_DIRECT
);
2453 new_reg
= gen_lowpart (mode
, new_reg
);
2455 emit_move_insn (new_dst
, new_reg
);
2459 case MEMMODEL_RELAXED
:
2460 case MEMMODEL_ACQUIRE
:
2461 case MEMMODEL_SYNC_ACQUIRE
:
2462 case MEMMODEL_CONSUME
:
2465 case E_QImode
: icode
= CODE_FOR_cmpxchg_acq_qi
; break;
2466 case E_HImode
: icode
= CODE_FOR_cmpxchg_acq_hi
; break;
2467 case E_SImode
: icode
= CODE_FOR_cmpxchg_acq_si
; break;
2468 case E_DImode
: icode
= CODE_FOR_cmpxchg_acq_di
; break;
2474 case MEMMODEL_RELEASE
:
2475 case MEMMODEL_SYNC_RELEASE
:
2476 case MEMMODEL_ACQ_REL
:
2477 case MEMMODEL_SEQ_CST
:
2478 case MEMMODEL_SYNC_SEQ_CST
:
2481 case E_QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2482 case E_HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2483 case E_SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2484 case E_DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2494 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2496 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2499 /* Begin the assembly file. */
2502 ia64_file_start (void)
2504 default_file_start ();
2505 emit_safe_across_calls ();
2509 emit_safe_across_calls (void)
2511 unsigned int rs
, re
;
2518 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2522 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2526 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2530 fputc (',', asm_out_file
);
2532 fprintf (asm_out_file
, "p%u", rs
);
2534 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2538 fputc ('\n', asm_out_file
);
2541 /* Globalize a declaration. */
2544 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2546 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2547 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2550 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2551 const char *p
= TREE_STRING_POINTER (v
);
2552 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2554 targetm
.asm_out
.globalize_label (stream
, name
);
2555 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2556 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2559 /* Helper function for ia64_compute_frame_size: find an appropriate general
2560 register to spill some special register to. SPECIAL_SPILL_MASK contains
2561 bits in GR0 to GR31 that have already been allocated by this routine.
2562 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2565 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2569 if (emitted_frame_related_regs
[r
] != 0)
2571 regno
= emitted_frame_related_regs
[r
];
2572 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2573 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2574 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2575 else if (crtl
->is_leaf
2576 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2577 current_frame_info
.gr_used_mask
|= 1 << regno
;
2582 /* If this is a leaf function, first try an otherwise unused
2583 call-clobbered register. */
2586 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2587 if (! df_regs_ever_live_p (regno
)
2588 && call_used_regs
[regno
]
2589 && ! fixed_regs
[regno
]
2590 && ! global_regs
[regno
]
2591 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2592 && ! is_emitted (regno
))
2594 current_frame_info
.gr_used_mask
|= 1 << regno
;
2601 regno
= current_frame_info
.n_local_regs
;
2602 /* If there is a frame pointer, then we can't use loc79, because
2603 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2604 reg_name switching code in ia64_expand_prologue. */
2605 while (regno
< (80 - frame_pointer_needed
))
2606 if (! is_emitted (LOC_REG (regno
++)))
2608 current_frame_info
.n_local_regs
= regno
;
2609 return LOC_REG (regno
- 1);
2613 /* Failed to find a general register to spill to. Must use stack. */
2617 /* In order to make for nice schedules, we try to allocate every temporary
2618 to a different register. We must of course stay away from call-saved,
2619 fixed, and global registers. We must also stay away from registers
2620 allocated in current_frame_info.gr_used_mask, since those include regs
2621 used all through the prologue.
2623 Any register allocated here must be used immediately. The idea is to
2624 aid scheduling, not to solve data flow problems. */
2626 static int last_scratch_gr_reg
;
2629 next_scratch_gr_reg (void)
2633 for (i
= 0; i
< 32; ++i
)
2635 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2636 if (call_used_regs
[regno
]
2637 && ! fixed_regs
[regno
]
2638 && ! global_regs
[regno
]
2639 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2641 last_scratch_gr_reg
= regno
;
2646 /* There must be _something_ available. */
2650 /* Helper function for ia64_compute_frame_size, called through
2651 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2654 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2656 unsigned int regno
= REGNO (reg
);
2659 unsigned int i
, n
= REG_NREGS (reg
);
2660 for (i
= 0; i
< n
; ++i
)
2661 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2666 /* Returns the number of bytes offset between the frame pointer and the stack
2667 pointer for the current function. SIZE is the number of bytes of space
2668 needed for local variables. */
2671 ia64_compute_frame_size (HOST_WIDE_INT size
)
2673 HOST_WIDE_INT total_size
;
2674 HOST_WIDE_INT spill_size
= 0;
2675 HOST_WIDE_INT extra_spill_size
= 0;
2676 HOST_WIDE_INT pretend_args_size
;
2679 int spilled_gr_p
= 0;
2680 int spilled_fr_p
= 0;
2686 if (current_frame_info
.initialized
)
2689 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2690 CLEAR_HARD_REG_SET (mask
);
2692 /* Don't allocate scratches to the return register. */
2693 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2695 /* Don't allocate scratches to the EH scratch registers. */
2696 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2697 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2698 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2699 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2701 /* Static stack checking uses r2 and r3. */
2702 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
2703 current_frame_info
.gr_used_mask
|= 0xc;
2705 /* Find the size of the register stack frame. We have only 80 local
2706 registers, because we reserve 8 for the inputs and 8 for the
2709 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2710 since we'll be adjusting that down later. */
2711 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2712 for (; regno
>= LOC_REG (0); regno
--)
2713 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2715 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2717 /* For functions marked with the syscall_linkage attribute, we must mark
2718 all eight input registers as in use, so that locals aren't visible to
2721 if (cfun
->machine
->n_varargs
> 0
2722 || lookup_attribute ("syscall_linkage",
2723 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2724 current_frame_info
.n_input_regs
= 8;
2727 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2728 if (df_regs_ever_live_p (regno
))
2730 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2733 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2734 if (df_regs_ever_live_p (regno
))
2736 i
= regno
- OUT_REG (0) + 1;
2738 #ifndef PROFILE_HOOK
2739 /* When -p profiling, we need one output register for the mcount argument.
2740 Likewise for -a profiling for the bb_init_func argument. For -ax
2741 profiling, we need two output registers for the two bb_init_trace_func
2746 current_frame_info
.n_output_regs
= i
;
2748 /* ??? No rotating register support yet. */
2749 current_frame_info
.n_rotate_regs
= 0;
2751 /* Discover which registers need spilling, and how much room that
2752 will take. Begin with floating point and general registers,
2753 which will always wind up on the stack. */
2755 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2756 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2758 SET_HARD_REG_BIT (mask
, regno
);
2764 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2765 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2767 SET_HARD_REG_BIT (mask
, regno
);
2773 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2774 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2776 SET_HARD_REG_BIT (mask
, regno
);
2781 /* Now come all special registers that might get saved in other
2782 general registers. */
2784 if (frame_pointer_needed
)
2786 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2787 /* If we did not get a register, then we take LOC79. This is guaranteed
2788 to be free, even if regs_ever_live is already set, because this is
2789 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2790 as we don't count loc79 above. */
2791 if (current_frame_info
.r
[reg_fp
] == 0)
2793 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2794 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2798 if (! crtl
->is_leaf
)
2800 /* Emit a save of BR0 if we call other functions. Do this even
2801 if this function doesn't return, as EH depends on this to be
2802 able to unwind the stack. */
2803 SET_HARD_REG_BIT (mask
, BR_REG (0));
2805 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2806 if (current_frame_info
.r
[reg_save_b0
] == 0)
2808 extra_spill_size
+= 8;
2812 /* Similarly for ar.pfs. */
2813 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2814 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2815 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2817 extra_spill_size
+= 8;
2821 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2822 registers are clobbered, so we fall back to the stack. */
2823 current_frame_info
.r
[reg_save_gp
]
2824 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2825 if (current_frame_info
.r
[reg_save_gp
] == 0)
2827 SET_HARD_REG_BIT (mask
, GR_REG (1));
2834 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2836 SET_HARD_REG_BIT (mask
, BR_REG (0));
2837 extra_spill_size
+= 8;
2841 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2843 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2844 current_frame_info
.r
[reg_save_ar_pfs
]
2845 = find_gr_spill (reg_save_ar_pfs
, 1);
2846 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2848 extra_spill_size
+= 8;
2854 /* Unwind descriptor hackery: things are most efficient if we allocate
2855 consecutive GR save registers for RP, PFS, FP in that order. However,
2856 it is absolutely critical that FP get the only hard register that's
2857 guaranteed to be free, so we allocated it first. If all three did
2858 happen to be allocated hard regs, and are consecutive, rearrange them
2859 into the preferred order now.
2861 If we have already emitted code for any of those registers,
2862 then it's already too late to change. */
2863 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2864 MIN (current_frame_info
.r
[reg_save_b0
],
2865 current_frame_info
.r
[reg_save_ar_pfs
]));
2866 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2867 MAX (current_frame_info
.r
[reg_save_b0
],
2868 current_frame_info
.r
[reg_save_ar_pfs
]));
2870 && min_regno
+ 2 == max_regno
2871 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2872 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2873 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2874 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2875 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2876 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2877 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2878 && (emitted_frame_related_regs
[reg_fp
] == 0
2879 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2881 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2882 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2883 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2886 /* See if we need to store the predicate register block. */
2887 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2888 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2890 if (regno
<= PR_REG (63))
2892 SET_HARD_REG_BIT (mask
, PR_REG (0));
2893 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2894 if (current_frame_info
.r
[reg_save_pr
] == 0)
2896 extra_spill_size
+= 8;
2900 /* ??? Mark them all as used so that register renaming and such
2901 are free to use them. */
2902 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2903 df_set_regs_ever_live (regno
, true);
2906 /* If we're forced to use st8.spill, we're forced to save and restore
2907 ar.unat as well. The check for existing liveness allows inline asm
2908 to touch ar.unat. */
2909 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2910 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2912 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2913 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2914 current_frame_info
.r
[reg_save_ar_unat
]
2915 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2916 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2918 extra_spill_size
+= 8;
2923 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2925 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2926 current_frame_info
.r
[reg_save_ar_lc
]
2927 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2928 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2930 extra_spill_size
+= 8;
2935 /* If we have an odd number of words of pretend arguments written to
2936 the stack, then the FR save area will be unaligned. We round the
2937 size of this area up to keep things 16 byte aligned. */
2939 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2941 pretend_args_size
= crtl
->args
.pretend_args_size
;
2943 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2944 + crtl
->outgoing_args_size
);
2945 total_size
= IA64_STACK_ALIGN (total_size
);
2947 /* We always use the 16-byte scratch area provided by the caller, but
2948 if we are a leaf function, there's no one to which we need to provide
2949 a scratch area. However, if the function allocates dynamic stack space,
2950 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2951 so we need to cope. */
2952 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
2953 total_size
= MAX (0, total_size
- 16);
2955 current_frame_info
.total_size
= total_size
;
2956 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2957 current_frame_info
.spill_size
= spill_size
;
2958 current_frame_info
.extra_spill_size
= extra_spill_size
;
2959 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2960 current_frame_info
.n_spilled
= n_spilled
;
2961 current_frame_info
.initialized
= reload_completed
;
2964 /* Worker function for TARGET_CAN_ELIMINATE. */
2967 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2969 return (to
== BR_REG (0) ? crtl
->is_leaf
: true);
2972 /* Compute the initial difference between the specified pair of registers. */
2975 ia64_initial_elimination_offset (int from
, int to
)
2977 HOST_WIDE_INT offset
;
2979 ia64_compute_frame_size (get_frame_size ());
2982 case FRAME_POINTER_REGNUM
:
2985 case HARD_FRAME_POINTER_REGNUM
:
2986 offset
= -current_frame_info
.total_size
;
2987 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2988 offset
+= 16 + crtl
->outgoing_args_size
;
2991 case STACK_POINTER_REGNUM
:
2993 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2994 offset
+= 16 + crtl
->outgoing_args_size
;
3002 case ARG_POINTER_REGNUM
:
3003 /* Arguments start above the 16 byte save area, unless stdarg
3004 in which case we store through the 16 byte save area. */
3007 case HARD_FRAME_POINTER_REGNUM
:
3008 offset
= 16 - crtl
->args
.pretend_args_size
;
3011 case STACK_POINTER_REGNUM
:
3012 offset
= (current_frame_info
.total_size
3013 + 16 - crtl
->args
.pretend_args_size
);
3028 /* If there are more than a trivial number of register spills, we use
3029 two interleaved iterators so that we can get two memory references
3032 In order to simplify things in the prologue and epilogue expanders,
3033 we use helper functions to fix up the memory references after the
3034 fact with the appropriate offsets to a POST_MODIFY memory mode.
3035 The following data structure tracks the state of the two iterators
3036 while insns are being emitted. */
3038 struct spill_fill_data
3040 rtx_insn
*init_after
; /* point at which to emit initializations */
3041 rtx init_reg
[2]; /* initial base register */
3042 rtx iter_reg
[2]; /* the iterator registers */
3043 rtx
*prev_addr
[2]; /* address of last memory use */
3044 rtx_insn
*prev_insn
[2]; /* the insn corresponding to prev_addr */
3045 HOST_WIDE_INT prev_off
[2]; /* last offset */
3046 int n_iter
; /* number of iterators in use */
3047 int next_iter
; /* next iterator to use */
3048 unsigned int save_gr_used_mask
;
3051 static struct spill_fill_data spill_fill_data
;
3054 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
3058 spill_fill_data
.init_after
= get_last_insn ();
3059 spill_fill_data
.init_reg
[0] = init_reg
;
3060 spill_fill_data
.init_reg
[1] = init_reg
;
3061 spill_fill_data
.prev_addr
[0] = NULL
;
3062 spill_fill_data
.prev_addr
[1] = NULL
;
3063 spill_fill_data
.prev_insn
[0] = NULL
;
3064 spill_fill_data
.prev_insn
[1] = NULL
;
3065 spill_fill_data
.prev_off
[0] = cfa_off
;
3066 spill_fill_data
.prev_off
[1] = cfa_off
;
3067 spill_fill_data
.next_iter
= 0;
3068 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
3070 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
3071 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
3073 int regno
= next_scratch_gr_reg ();
3074 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
3075 current_frame_info
.gr_used_mask
|= 1 << regno
;
3080 finish_spill_pointers (void)
3082 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
3086 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
3088 int iter
= spill_fill_data
.next_iter
;
3089 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
3090 rtx disp_rtx
= GEN_INT (disp
);
3093 if (spill_fill_data
.prev_addr
[iter
])
3095 if (satisfies_constraint_N (disp_rtx
))
3097 *spill_fill_data
.prev_addr
[iter
]
3098 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
3099 gen_rtx_PLUS (DImode
,
3100 spill_fill_data
.iter_reg
[iter
],
3102 add_reg_note (spill_fill_data
.prev_insn
[iter
],
3103 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3107 /* ??? Could use register post_modify for loads. */
3108 if (!satisfies_constraint_I (disp_rtx
))
3110 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3111 emit_move_insn (tmp
, disp_rtx
);
3114 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3115 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3118 /* Micro-optimization: if we've created a frame pointer, it's at
3119 CFA 0, which may allow the real iterator to be initialized lower,
3120 slightly increasing parallelism. Also, if there are few saves
3121 it may eliminate the iterator entirely. */
3123 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3124 && frame_pointer_needed
)
3126 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3127 set_mem_alias_set (mem
, get_varargs_alias_set ());
3136 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3137 spill_fill_data
.init_reg
[iter
]);
3142 if (!satisfies_constraint_I (disp_rtx
))
3144 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3145 emit_move_insn (tmp
, disp_rtx
);
3149 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3150 spill_fill_data
.init_reg
[iter
],
3157 /* Careful for being the first insn in a sequence. */
3158 if (spill_fill_data
.init_after
)
3159 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3162 rtx_insn
*first
= get_insns ();
3164 insn
= emit_insn_before (seq
, first
);
3166 insn
= emit_insn (seq
);
3168 spill_fill_data
.init_after
= insn
;
3171 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3173 /* ??? Not all of the spills are for varargs, but some of them are.
3174 The rest of the spills belong in an alias set of their own. But
3175 it doesn't actually hurt to include them here. */
3176 set_mem_alias_set (mem
, get_varargs_alias_set ());
3178 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3179 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3181 if (++iter
>= spill_fill_data
.n_iter
)
3183 spill_fill_data
.next_iter
= iter
;
3189 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3192 int iter
= spill_fill_data
.next_iter
;
3196 mem
= spill_restore_mem (reg
, cfa_off
);
3197 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3198 spill_fill_data
.prev_insn
[iter
] = insn
;
3205 RTX_FRAME_RELATED_P (insn
) = 1;
3207 /* Don't even pretend that the unwind code can intuit its way
3208 through a pair of interleaved post_modify iterators. Just
3209 provide the correct answer. */
3211 if (frame_pointer_needed
)
3213 base
= hard_frame_pointer_rtx
;
3218 base
= stack_pointer_rtx
;
3219 off
= current_frame_info
.total_size
- cfa_off
;
3222 add_reg_note (insn
, REG_CFA_OFFSET
,
3223 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg
),
3224 plus_constant (Pmode
,
3231 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3233 int iter
= spill_fill_data
.next_iter
;
3236 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3237 GEN_INT (cfa_off
)));
3238 spill_fill_data
.prev_insn
[iter
] = insn
;
3241 /* Wrapper functions that discards the CONST_INT spill offset. These
3242 exist so that we can give gr_spill/gr_fill the offset they need and
3243 use a consistent function interface. */
3246 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3248 return gen_movdi (dest
, src
);
3252 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3254 return gen_fr_spill (dest
, src
);
3258 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3260 return gen_fr_restore (dest
, src
);
3263 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3265 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3266 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3268 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3269 inclusive. These are offsets from the current stack pointer. BS_SIZE
3270 is the size of the backing store. ??? This clobbers r2 and r3. */
3273 ia64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
3276 rtx r2
= gen_rtx_REG (Pmode
, GR_REG (2));
3277 rtx r3
= gen_rtx_REG (Pmode
, GR_REG (3));
3278 rtx p6
= gen_rtx_REG (BImode
, PR_REG (6));
3280 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3281 of the Register Stack Engine. We also need to probe it after checking
3282 that the 2 stacks don't overlap. */
3283 emit_insn (gen_bsp_value (r3
));
3284 emit_move_insn (r2
, GEN_INT (-(first
+ size
)));
3286 /* Compare current value of BSP and SP registers. */
3287 emit_insn (gen_rtx_SET (p6
, gen_rtx_fmt_ee (LTU
, BImode
,
3288 r3
, stack_pointer_rtx
)));
3290 /* Compute the address of the probe for the Backing Store (which grows
3291 towards higher addresses). We probe only at the first offset of
3292 the next page because some OS (eg Linux/ia64) only extend the
3293 backing store when this specific address is hit (but generate a SEGV
3294 on other address). Page size is the worst case (4KB). The reserve
3295 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3296 Also compute the address of the last probe for the memory stack
3297 (which grows towards lower addresses). */
3298 emit_insn (gen_rtx_SET (r3
, plus_constant (Pmode
, r3
, 4095)));
3299 emit_insn (gen_rtx_SET (r2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3301 /* Compare them and raise SEGV if the former has topped the latter. */
3302 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3303 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3304 gen_rtx_SET (p6
, gen_rtx_fmt_ee (GEU
, BImode
,
3306 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode
, r3
, GEN_INT (12),
3309 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3310 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3311 gen_rtx_TRAP_IF (VOIDmode
, const1_rtx
,
3314 /* Probe the Backing Store if necessary. */
3316 emit_stack_probe (r3
);
3318 /* Probe the memory stack if necessary. */
3322 /* See if we have a constant small number of probes to generate. If so,
3323 that's the easy case. */
3324 else if (size
<= PROBE_INTERVAL
)
3325 emit_stack_probe (r2
);
3327 /* The run-time loop is made up of 9 insns in the generic case while this
3328 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3329 else if (size
<= 4 * PROBE_INTERVAL
)
3333 emit_move_insn (r2
, GEN_INT (-(first
+ PROBE_INTERVAL
)));
3334 emit_insn (gen_rtx_SET (r2
,
3335 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3336 emit_stack_probe (r2
);
3338 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3339 it exceeds SIZE. If only two probes are needed, this will not
3340 generate any code. Then probe at FIRST + SIZE. */
3341 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
3343 emit_insn (gen_rtx_SET (r2
,
3344 plus_constant (Pmode
, r2
, -PROBE_INTERVAL
)));
3345 emit_stack_probe (r2
);
3348 emit_insn (gen_rtx_SET (r2
,
3349 plus_constant (Pmode
, r2
,
3350 (i
- PROBE_INTERVAL
) - size
)));
3351 emit_stack_probe (r2
);
3354 /* Otherwise, do the same as above, but in a loop. Note that we must be
3355 extra careful with variables wrapping around because we might be at
3356 the very top (or the very bottom) of the address space and we have
3357 to be able to handle this case properly; in particular, we use an
3358 equality test for the loop condition. */
3361 HOST_WIDE_INT rounded_size
;
3363 emit_move_insn (r2
, GEN_INT (-first
));
3366 /* Step 1: round SIZE to the previous multiple of the interval. */
3368 rounded_size
= size
& -PROBE_INTERVAL
;
3371 /* Step 2: compute initial and final value of the loop counter. */
3373 /* TEST_ADDR = SP + FIRST. */
3374 emit_insn (gen_rtx_SET (r2
,
3375 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3377 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3378 if (rounded_size
> (1 << 21))
3380 emit_move_insn (r3
, GEN_INT (-rounded_size
));
3381 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
, r3
)));
3384 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
,
3385 GEN_INT (-rounded_size
))));
3392 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3395 while (TEST_ADDR != LAST_ADDR)
3397 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3398 until it is equal to ROUNDED_SIZE. */
3400 emit_insn (gen_probe_stack_range (r2
, r2
, r3
));
3403 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3404 that SIZE is equal to ROUNDED_SIZE. */
3406 /* TEMP = SIZE - ROUNDED_SIZE. */
3407 if (size
!= rounded_size
)
3409 emit_insn (gen_rtx_SET (r2
, plus_constant (Pmode
, r2
,
3410 rounded_size
- size
)));
3411 emit_stack_probe (r2
);
3415 /* Make sure nothing is scheduled before we are done. */
3416 emit_insn (gen_blockage ());
3419 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3420 absolute addresses. */
3423 output_probe_stack_range (rtx reg1
, rtx reg2
)
3425 static int labelno
= 0;
3429 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
3432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
3434 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3436 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
3437 output_asm_insn ("addl %0 = %1, %0", xops
);
3438 fputs ("\t;;\n", asm_out_file
);
3440 /* Probe at TEST_ADDR. */
3441 output_asm_insn ("probe.w.fault %0, 0", xops
);
3443 /* Test if TEST_ADDR == LAST_ADDR. */
3445 xops
[2] = gen_rtx_REG (BImode
, PR_REG (6));
3446 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops
);
3449 fprintf (asm_out_file
, "\t(%s) br.cond.dpnt ", reg_names
[PR_REG (7)]);
3450 assemble_name_raw (asm_out_file
, loop_lab
);
3451 fputc ('\n', asm_out_file
);
3456 /* Called after register allocation to add any instructions needed for the
3457 prologue. Using a prologue insn is favored compared to putting all of the
3458 instructions in output_function_prologue(), since it allows the scheduler
3459 to intermix instructions with the saves of the caller saved registers. In
3460 some cases, it might be necessary to emit a barrier instruction as the last
3461 insn to prevent such scheduling.
3463 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3464 so that the debug info generation code can handle them properly.
3466 The register save area is laid out like so:
3468 [ varargs spill area ]
3469 [ fr register spill area ]
3470 [ br register spill area ]
3471 [ ar register spill area ]
3472 [ pr register spill area ]
3473 [ gr register spill area ] */
3475 /* ??? Get inefficient code when the frame size is larger than can fit in an
3476 adds instruction. */
3479 ia64_expand_prologue (void)
3482 rtx ar_pfs_save_reg
, ar_unat_save_reg
;
3483 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3486 ia64_compute_frame_size (get_frame_size ());
3487 last_scratch_gr_reg
= 15;
3489 if (flag_stack_usage_info
)
3490 current_function_static_stack_size
= current_frame_info
.total_size
;
3492 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3494 HOST_WIDE_INT size
= current_frame_info
.total_size
;
3495 int bs_size
= BACKING_STORE_SIZE (current_frame_info
.n_input_regs
3496 + current_frame_info
.n_local_regs
);
3498 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3500 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
3501 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
,
3502 size
- STACK_CHECK_PROTECT
,
3504 else if (size
+ bs_size
> STACK_CHECK_PROTECT
)
3505 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, 0, bs_size
);
3507 else if (size
+ bs_size
> 0)
3508 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
, bs_size
);
3513 fprintf (dump_file
, "ia64 frame related registers "
3514 "recorded in current_frame_info.r[]:\n");
3515 #define PRINTREG(a) if (current_frame_info.r[a]) \
3516 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3518 PRINTREG(reg_save_b0
);
3519 PRINTREG(reg_save_pr
);
3520 PRINTREG(reg_save_ar_pfs
);
3521 PRINTREG(reg_save_ar_unat
);
3522 PRINTREG(reg_save_ar_lc
);
3523 PRINTREG(reg_save_gp
);
3527 /* If there is no epilogue, then we don't need some prologue insns.
3528 We need to avoid emitting the dead prologue insns, because flow
3529 will complain about them. */
3535 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
3536 if ((e
->flags
& EDGE_FAKE
) == 0
3537 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3539 epilogue_p
= (e
!= NULL
);
3544 /* Set the local, input, and output register names. We need to do this
3545 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3546 half. If we use in/loc/out register names, then we get assembler errors
3547 in crtn.S because there is no alloc insn or regstk directive in there. */
3548 if (! TARGET_REG_NAMES
)
3550 int inputs
= current_frame_info
.n_input_regs
;
3551 int locals
= current_frame_info
.n_local_regs
;
3552 int outputs
= current_frame_info
.n_output_regs
;
3554 for (i
= 0; i
< inputs
; i
++)
3555 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3556 for (i
= 0; i
< locals
; i
++)
3557 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3558 for (i
= 0; i
< outputs
; i
++)
3559 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3562 /* Set the frame pointer register name. The regnum is logically loc79,
3563 but of course we'll not have allocated that many locals. Rather than
3564 worrying about renumbering the existing rtxs, we adjust the name. */
3565 /* ??? This code means that we can never use one local register when
3566 there is a frame pointer. loc79 gets wasted in this case, as it is
3567 renamed to a register that will never be used. See also the try_locals
3568 code in find_gr_spill. */
3569 if (current_frame_info
.r
[reg_fp
])
3571 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3572 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3573 = reg_names
[current_frame_info
.r
[reg_fp
]];
3574 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3577 /* We don't need an alloc instruction if we've used no outputs or locals. */
3578 if (current_frame_info
.n_local_regs
== 0
3579 && current_frame_info
.n_output_regs
== 0
3580 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3581 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3583 /* If there is no alloc, but there are input registers used, then we
3584 need a .regstk directive. */
3585 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3586 ar_pfs_save_reg
= NULL_RTX
;
3590 current_frame_info
.need_regstk
= 0;
3592 if (current_frame_info
.r
[reg_save_ar_pfs
])
3594 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3595 reg_emitted (reg_save_ar_pfs
);
3598 regno
= next_scratch_gr_reg ();
3599 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3601 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3602 GEN_INT (current_frame_info
.n_input_regs
),
3603 GEN_INT (current_frame_info
.n_local_regs
),
3604 GEN_INT (current_frame_info
.n_output_regs
),
3605 GEN_INT (current_frame_info
.n_rotate_regs
)));
3606 if (current_frame_info
.r
[reg_save_ar_pfs
])
3608 RTX_FRAME_RELATED_P (insn
) = 1;
3609 add_reg_note (insn
, REG_CFA_REGISTER
,
3610 gen_rtx_SET (ar_pfs_save_reg
,
3611 gen_rtx_REG (DImode
, AR_PFS_REGNUM
)));
3615 /* Set up frame pointer, stack pointer, and spill iterators. */
3617 n_varargs
= cfun
->machine
->n_varargs
;
3618 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3619 stack_pointer_rtx
, 0);
3621 if (frame_pointer_needed
)
3623 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3624 RTX_FRAME_RELATED_P (insn
) = 1;
3626 /* Force the unwind info to recognize this as defining a new CFA,
3627 rather than some temp register setup. */
3628 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3631 if (current_frame_info
.total_size
!= 0)
3633 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3636 if (satisfies_constraint_I (frame_size_rtx
))
3637 offset
= frame_size_rtx
;
3640 regno
= next_scratch_gr_reg ();
3641 offset
= gen_rtx_REG (DImode
, regno
);
3642 emit_move_insn (offset
, frame_size_rtx
);
3645 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3646 stack_pointer_rtx
, offset
));
3648 if (! frame_pointer_needed
)
3650 RTX_FRAME_RELATED_P (insn
) = 1;
3651 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3652 gen_rtx_SET (stack_pointer_rtx
,
3653 gen_rtx_PLUS (DImode
,
3658 /* ??? At this point we must generate a magic insn that appears to
3659 modify the stack pointer, the frame pointer, and all spill
3660 iterators. This would allow the most scheduling freedom. For
3661 now, just hard stop. */
3662 emit_insn (gen_blockage ());
3665 /* Must copy out ar.unat before doing any integer spills. */
3666 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3668 if (current_frame_info
.r
[reg_save_ar_unat
])
3671 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3672 reg_emitted (reg_save_ar_unat
);
3676 alt_regno
= next_scratch_gr_reg ();
3677 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3678 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3681 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3682 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3683 if (current_frame_info
.r
[reg_save_ar_unat
])
3685 RTX_FRAME_RELATED_P (insn
) = 1;
3686 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3689 /* Even if we're not going to generate an epilogue, we still
3690 need to save the register so that EH works. */
3691 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3692 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3695 ar_unat_save_reg
= NULL_RTX
;
3697 /* Spill all varargs registers. Do this before spilling any GR registers,
3698 since we want the UNAT bits for the GR registers to override the UNAT
3699 bits from varargs, which we don't care about. */
3702 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3704 reg
= gen_rtx_REG (DImode
, regno
);
3705 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3708 /* Locate the bottom of the register save area. */
3709 cfa_off
= (current_frame_info
.spill_cfa_off
3710 + current_frame_info
.spill_size
3711 + current_frame_info
.extra_spill_size
);
3713 /* Save the predicate register block either in a register or in memory. */
3714 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3716 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3717 if (current_frame_info
.r
[reg_save_pr
] != 0)
3719 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3720 reg_emitted (reg_save_pr
);
3721 insn
= emit_move_insn (alt_reg
, reg
);
3723 /* ??? Denote pr spill/fill by a DImode move that modifies all
3724 64 hard registers. */
3725 RTX_FRAME_RELATED_P (insn
) = 1;
3726 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3728 /* Even if we're not going to generate an epilogue, we still
3729 need to save the register so that EH works. */
3731 emit_insn (gen_prologue_use (alt_reg
));
3735 alt_regno
= next_scratch_gr_reg ();
3736 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3737 insn
= emit_move_insn (alt_reg
, reg
);
3738 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3743 /* Handle AR regs in numerical order. All of them get special handling. */
3744 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3745 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3747 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3748 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3752 /* The alloc insn already copied ar.pfs into a general register. The
3753 only thing we have to do now is copy that register to a stack slot
3754 if we'd not allocated a local register for the job. */
3755 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3756 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3758 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3759 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3763 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3765 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3766 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3768 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3769 reg_emitted (reg_save_ar_lc
);
3770 insn
= emit_move_insn (alt_reg
, reg
);
3771 RTX_FRAME_RELATED_P (insn
) = 1;
3772 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3774 /* Even if we're not going to generate an epilogue, we still
3775 need to save the register so that EH works. */
3777 emit_insn (gen_prologue_use (alt_reg
));
3781 alt_regno
= next_scratch_gr_reg ();
3782 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3783 emit_move_insn (alt_reg
, reg
);
3784 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3789 /* Save the return pointer. */
3790 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3792 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3793 if (current_frame_info
.r
[reg_save_b0
] != 0)
3795 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3796 reg_emitted (reg_save_b0
);
3797 insn
= emit_move_insn (alt_reg
, reg
);
3798 RTX_FRAME_RELATED_P (insn
) = 1;
3799 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (alt_reg
, pc_rtx
));
3801 /* Even if we're not going to generate an epilogue, we still
3802 need to save the register so that EH works. */
3804 emit_insn (gen_prologue_use (alt_reg
));
3808 alt_regno
= next_scratch_gr_reg ();
3809 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3810 emit_move_insn (alt_reg
, reg
);
3811 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3816 if (current_frame_info
.r
[reg_save_gp
])
3818 reg_emitted (reg_save_gp
);
3819 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3820 current_frame_info
.r
[reg_save_gp
]),
3821 pic_offset_table_rtx
);
3824 /* We should now be at the base of the gr/br/fr spill area. */
3825 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3826 + current_frame_info
.spill_size
));
3828 /* Spill all general registers. */
3829 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3830 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3832 reg
= gen_rtx_REG (DImode
, regno
);
3833 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3837 /* Spill the rest of the BR registers. */
3838 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3839 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3841 alt_regno
= next_scratch_gr_reg ();
3842 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3843 reg
= gen_rtx_REG (DImode
, regno
);
3844 emit_move_insn (alt_reg
, reg
);
3845 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3849 /* Align the frame and spill all FR registers. */
3850 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3851 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3853 gcc_assert (!(cfa_off
& 15));
3854 reg
= gen_rtx_REG (XFmode
, regno
);
3855 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3859 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3861 finish_spill_pointers ();
3864 /* Output the textual info surrounding the prologue. */
3867 ia64_start_function (FILE *file
, const char *fnname
,
3868 tree decl ATTRIBUTE_UNUSED
)
3870 #if TARGET_ABI_OPEN_VMS
3871 vms_start_function (fnname
);
3874 fputs ("\t.proc ", file
);
3875 assemble_name (file
, fnname
);
3877 ASM_OUTPUT_LABEL (file
, fnname
);
3880 /* Called after register allocation to add any instructions needed for the
3881 epilogue. Using an epilogue insn is favored compared to putting all of the
3882 instructions in output_function_prologue(), since it allows the scheduler
3883 to intermix instructions with the saves of the caller saved registers. In
3884 some cases, it might be necessary to emit a barrier instruction as the last
3885 insn to prevent such scheduling. */
3888 ia64_expand_epilogue (int sibcall_p
)
3891 rtx reg
, alt_reg
, ar_unat_save_reg
;
3892 int regno
, alt_regno
, cfa_off
;
3894 ia64_compute_frame_size (get_frame_size ());
3896 /* If there is a frame pointer, then we use it instead of the stack
3897 pointer, so that the stack pointer does not need to be valid when
3898 the epilogue starts. See EXIT_IGNORE_STACK. */
3899 if (frame_pointer_needed
)
3900 setup_spill_pointers (current_frame_info
.n_spilled
,
3901 hard_frame_pointer_rtx
, 0);
3903 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3904 current_frame_info
.total_size
);
3906 if (current_frame_info
.total_size
!= 0)
3908 /* ??? At this point we must generate a magic insn that appears to
3909 modify the spill iterators and the frame pointer. This would
3910 allow the most scheduling freedom. For now, just hard stop. */
3911 emit_insn (gen_blockage ());
3914 /* Locate the bottom of the register save area. */
3915 cfa_off
= (current_frame_info
.spill_cfa_off
3916 + current_frame_info
.spill_size
3917 + current_frame_info
.extra_spill_size
);
3919 /* Restore the predicate registers. */
3920 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3922 if (current_frame_info
.r
[reg_save_pr
] != 0)
3924 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3925 reg_emitted (reg_save_pr
);
3929 alt_regno
= next_scratch_gr_reg ();
3930 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3931 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3934 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3935 emit_move_insn (reg
, alt_reg
);
3938 /* Restore the application registers. */
3940 /* Load the saved unat from the stack, but do not restore it until
3941 after the GRs have been restored. */
3942 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3944 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3947 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3948 reg_emitted (reg_save_ar_unat
);
3952 alt_regno
= next_scratch_gr_reg ();
3953 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3954 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3955 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3960 ar_unat_save_reg
= NULL_RTX
;
3962 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3964 reg_emitted (reg_save_ar_pfs
);
3965 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3966 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3967 emit_move_insn (reg
, alt_reg
);
3969 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3971 alt_regno
= next_scratch_gr_reg ();
3972 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3973 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3975 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3976 emit_move_insn (reg
, alt_reg
);
3979 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3981 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3983 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3984 reg_emitted (reg_save_ar_lc
);
3988 alt_regno
= next_scratch_gr_reg ();
3989 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3990 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3993 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3994 emit_move_insn (reg
, alt_reg
);
3997 /* Restore the return pointer. */
3998 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4000 if (current_frame_info
.r
[reg_save_b0
] != 0)
4002 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4003 reg_emitted (reg_save_b0
);
4007 alt_regno
= next_scratch_gr_reg ();
4008 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4009 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4012 reg
= gen_rtx_REG (DImode
, BR_REG (0));
4013 emit_move_insn (reg
, alt_reg
);
4016 /* We should now be at the base of the gr/br/fr spill area. */
4017 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
4018 + current_frame_info
.spill_size
));
4020 /* The GP may be stored on the stack in the prologue, but it's
4021 never restored in the epilogue. Skip the stack slot. */
4022 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
4025 /* Restore all general registers. */
4026 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
4027 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4029 reg
= gen_rtx_REG (DImode
, regno
);
4030 do_restore (gen_gr_restore
, reg
, cfa_off
);
4034 /* Restore the branch registers. */
4035 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
4036 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4038 alt_regno
= next_scratch_gr_reg ();
4039 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4040 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4042 reg
= gen_rtx_REG (DImode
, regno
);
4043 emit_move_insn (reg
, alt_reg
);
4046 /* Restore floating point registers. */
4047 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
4048 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4050 gcc_assert (!(cfa_off
& 15));
4051 reg
= gen_rtx_REG (XFmode
, regno
);
4052 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
4056 /* Restore ar.unat for real. */
4057 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
4059 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
4060 emit_move_insn (reg
, ar_unat_save_reg
);
4063 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
4065 finish_spill_pointers ();
4067 if (current_frame_info
.total_size
4068 || cfun
->machine
->ia64_eh_epilogue_sp
4069 || frame_pointer_needed
)
4071 /* ??? At this point we must generate a magic insn that appears to
4072 modify the spill iterators, the stack pointer, and the frame
4073 pointer. This would allow the most scheduling freedom. For now,
4075 emit_insn (gen_blockage ());
4078 if (cfun
->machine
->ia64_eh_epilogue_sp
)
4079 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
4080 else if (frame_pointer_needed
)
4082 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
4083 RTX_FRAME_RELATED_P (insn
) = 1;
4084 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
4086 else if (current_frame_info
.total_size
)
4088 rtx offset
, frame_size_rtx
;
4090 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
4091 if (satisfies_constraint_I (frame_size_rtx
))
4092 offset
= frame_size_rtx
;
4095 regno
= next_scratch_gr_reg ();
4096 offset
= gen_rtx_REG (DImode
, regno
);
4097 emit_move_insn (offset
, frame_size_rtx
);
4100 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
4103 RTX_FRAME_RELATED_P (insn
) = 1;
4104 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
4105 gen_rtx_SET (stack_pointer_rtx
,
4106 gen_rtx_PLUS (DImode
,
4111 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
4112 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
4115 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
4118 int fp
= GR_REG (2);
4119 /* We need a throw away register here, r0 and r1 are reserved,
4120 so r2 is the first available call clobbered register. If
4121 there was a frame_pointer register, we may have swapped the
4122 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4123 sure we're using the string "r2" when emitting the register
4124 name for the assembler. */
4125 if (current_frame_info
.r
[reg_fp
]
4126 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
4127 fp
= HARD_FRAME_POINTER_REGNUM
;
4129 /* We must emit an alloc to force the input registers to become output
4130 registers. Otherwise, if the callee tries to pass its parameters
4131 through to another call without an intervening alloc, then these
4133 /* ??? We don't need to preserve all input registers. We only need to
4134 preserve those input registers used as arguments to the sibling call.
4135 It is unclear how to compute that number here. */
4136 if (current_frame_info
.n_input_regs
!= 0)
4138 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
4140 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
4141 const0_rtx
, const0_rtx
,
4142 n_inputs
, const0_rtx
));
4143 RTX_FRAME_RELATED_P (insn
) = 1;
4145 /* ??? We need to mark the alloc as frame-related so that it gets
4146 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4147 But there's nothing dwarf2 related to be done wrt the register
4148 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4149 the empty parallel means dwarf2out will not see anything. */
4150 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4151 gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (0)));
4156 /* Return 1 if br.ret can do all the work required to return from a
4160 ia64_direct_return (void)
4162 if (reload_completed
&& ! frame_pointer_needed
)
4164 ia64_compute_frame_size (get_frame_size ());
4166 return (current_frame_info
.total_size
== 0
4167 && current_frame_info
.n_spilled
== 0
4168 && current_frame_info
.r
[reg_save_b0
] == 0
4169 && current_frame_info
.r
[reg_save_pr
] == 0
4170 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
4171 && current_frame_info
.r
[reg_save_ar_unat
] == 0
4172 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
4177 /* Return the magic cookie that we use to hold the return address
4178 during early compilation. */
4181 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
4185 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
4188 /* Split this value after reload, now that we know where the return
4189 address is saved. */
4192 ia64_split_return_addr_rtx (rtx dest
)
4196 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4198 if (current_frame_info
.r
[reg_save_b0
] != 0)
4200 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4201 reg_emitted (reg_save_b0
);
4209 /* Compute offset from CFA for BR0. */
4210 /* ??? Must be kept in sync with ia64_expand_prologue. */
4211 off
= (current_frame_info
.spill_cfa_off
4212 + current_frame_info
.spill_size
);
4213 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
4214 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4217 /* Convert CFA offset to a register based offset. */
4218 if (frame_pointer_needed
)
4219 src
= hard_frame_pointer_rtx
;
4222 src
= stack_pointer_rtx
;
4223 off
+= current_frame_info
.total_size
;
4226 /* Load address into scratch register. */
4227 off_r
= GEN_INT (off
);
4228 if (satisfies_constraint_I (off_r
))
4229 emit_insn (gen_adddi3 (dest
, src
, off_r
));
4232 emit_move_insn (dest
, off_r
);
4233 emit_insn (gen_adddi3 (dest
, src
, dest
));
4236 src
= gen_rtx_MEM (Pmode
, dest
);
4240 src
= gen_rtx_REG (DImode
, BR_REG (0));
4242 emit_move_insn (dest
, src
);
4246 ia64_hard_regno_rename_ok (int from
, int to
)
4248 /* Don't clobber any of the registers we reserved for the prologue. */
4251 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
4252 if (to
== current_frame_info
.r
[r
]
4253 || from
== current_frame_info
.r
[r
]
4254 || to
== emitted_frame_related_regs
[r
]
4255 || from
== emitted_frame_related_regs
[r
])
4258 /* Don't use output registers outside the register frame. */
4259 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
4262 /* Retain even/oddness on predicate register pairs. */
4263 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
4264 return (from
& 1) == (to
& 1);
4269 /* Implement TARGET_HARD_REGNO_NREGS.
4271 ??? We say that BImode PR values require two registers. This allows us to
4272 easily store the normal and inverted values. We use CCImode to indicate
4273 a single predicate register. */
4276 ia64_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
4278 if (regno
== PR_REG (0) && mode
== DImode
)
4280 if (PR_REGNO_P (regno
) && (mode
) == BImode
)
4282 if ((PR_REGNO_P (regno
) || GR_REGNO_P (regno
)) && mode
== CCImode
)
4284 if (FR_REGNO_P (regno
) && mode
== XFmode
)
4286 if (FR_REGNO_P (regno
) && mode
== RFmode
)
4288 if (FR_REGNO_P (regno
) && mode
== XCmode
)
4290 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
4293 /* Implement TARGET_HARD_REGNO_MODE_OK. */
4296 ia64_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
4298 if (FR_REGNO_P (regno
))
4299 return (GET_MODE_CLASS (mode
) != MODE_CC
4303 if (PR_REGNO_P (regno
))
4304 return mode
== BImode
|| GET_MODE_CLASS (mode
) == MODE_CC
;
4306 if (GR_REGNO_P (regno
))
4307 return mode
!= XFmode
&& mode
!= XCmode
&& mode
!= RFmode
;
4309 if (AR_REGNO_P (regno
))
4310 return mode
== DImode
;
4312 if (BR_REGNO_P (regno
))
4313 return mode
== DImode
;
4318 /* Implement TARGET_MODES_TIEABLE_P.
4320 Don't tie integer and FP modes, as that causes us to get integer registers
4321 allocated for FP instructions. XFmode only supported in FP registers so
4322 we can't tie it with any other modes. */
4325 ia64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
4327 return (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
)
4328 && ((mode1
== XFmode
|| mode1
== XCmode
|| mode1
== RFmode
)
4329 == (mode2
== XFmode
|| mode2
== XCmode
|| mode2
== RFmode
))
4330 && (mode1
== BImode
) == (mode2
== BImode
));
4333 /* Target hook for assembling integer objects. Handle word-sized
4334 aligned objects and detect the cases when @fptr is needed. */
4337 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4339 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
4340 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
4341 && GET_CODE (x
) == SYMBOL_REF
4342 && SYMBOL_REF_FUNCTION_P (x
))
4344 static const char * const directive
[2][2] = {
4345 /* 64-bit pointer */ /* 32-bit pointer */
4346 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4347 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4349 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
4350 output_addr_const (asm_out_file
, x
);
4351 fputs (")\n", asm_out_file
);
4354 return default_assemble_integer (x
, size
, aligned_p
);
4357 /* Emit the function prologue. */
4360 ia64_output_function_prologue (FILE *file
)
4362 int mask
, grsave
, grsave_prev
;
4364 if (current_frame_info
.need_regstk
)
4365 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
4366 current_frame_info
.n_input_regs
,
4367 current_frame_info
.n_local_regs
,
4368 current_frame_info
.n_output_regs
,
4369 current_frame_info
.n_rotate_regs
);
4371 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4374 /* Emit the .prologue directive. */
4377 grsave
= grsave_prev
= 0;
4378 if (current_frame_info
.r
[reg_save_b0
] != 0)
4381 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
4383 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
4384 && (grsave_prev
== 0
4385 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
4388 if (grsave_prev
== 0)
4389 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
4390 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4392 if (current_frame_info
.r
[reg_fp
] != 0
4393 && (grsave_prev
== 0
4394 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4397 if (grsave_prev
== 0)
4398 grsave
= HARD_FRAME_POINTER_REGNUM
;
4399 grsave_prev
= current_frame_info
.r
[reg_fp
];
4401 if (current_frame_info
.r
[reg_save_pr
] != 0
4402 && (grsave_prev
== 0
4403 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4406 if (grsave_prev
== 0)
4407 grsave
= current_frame_info
.r
[reg_save_pr
];
4410 if (mask
&& TARGET_GNU_AS
)
4411 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4412 ia64_dbx_register_number (grsave
));
4414 fputs ("\t.prologue\n", file
);
4416 /* Emit a .spill directive, if necessary, to relocate the base of
4417 the register spill area. */
4418 if (current_frame_info
.spill_cfa_off
!= -16)
4419 fprintf (file
, "\t.spill %ld\n",
4420 (long) (current_frame_info
.spill_cfa_off
4421 + current_frame_info
.spill_size
));
4424 /* Emit the .body directive at the scheduled end of the prologue. */
4427 ia64_output_function_end_prologue (FILE *file
)
4429 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4432 fputs ("\t.body\n", file
);
4435 /* Emit the function epilogue. */
4438 ia64_output_function_epilogue (FILE *)
4442 if (current_frame_info
.r
[reg_fp
])
4444 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4445 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4446 = reg_names
[current_frame_info
.r
[reg_fp
]];
4447 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4448 reg_emitted (reg_fp
);
4450 if (! TARGET_REG_NAMES
)
4452 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4453 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4454 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4455 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4456 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4457 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4460 current_frame_info
.initialized
= 0;
4464 ia64_dbx_register_number (int regno
)
4466 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4467 from its home at loc79 to something inside the register frame. We
4468 must perform the same renumbering here for the debug info. */
4469 if (current_frame_info
.r
[reg_fp
])
4471 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4472 regno
= current_frame_info
.r
[reg_fp
];
4473 else if (regno
== current_frame_info
.r
[reg_fp
])
4474 regno
= HARD_FRAME_POINTER_REGNUM
;
4477 if (IN_REGNO_P (regno
))
4478 return 32 + regno
- IN_REG (0);
4479 else if (LOC_REGNO_P (regno
))
4480 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4481 else if (OUT_REGNO_P (regno
))
4482 return (32 + current_frame_info
.n_input_regs
4483 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4488 /* Implement TARGET_TRAMPOLINE_INIT.
4490 The trampoline should set the static chain pointer to value placed
4491 into the trampoline and should branch to the specified routine.
4492 To make the normal indirect-subroutine calling convention work,
4493 the trampoline must look like a function descriptor; the first
4494 word being the target address and the second being the target's
4497 We abuse the concept of a global pointer by arranging for it
4498 to point to the data we need to load. The complete trampoline
4499 has the following form:
4501 +-------------------+ \
4502 TRAMP: | __ia64_trampoline | |
4503 +-------------------+ > fake function descriptor
4505 +-------------------+ /
4506 | target descriptor |
4507 +-------------------+
4509 +-------------------+
4513 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4515 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4516 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4518 /* The Intel assembler requires that the global __ia64_trampoline symbol
4519 be declared explicitly */
4522 static bool declared_ia64_trampoline
= false;
4524 if (!declared_ia64_trampoline
)
4526 declared_ia64_trampoline
= true;
4527 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4528 "__ia64_trampoline");
4532 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4533 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4534 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4535 static_chain
= convert_memory_address (Pmode
, static_chain
);
4537 /* Load up our iterator. */
4538 addr_reg
= copy_to_reg (addr
);
4539 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4541 /* The first two words are the fake descriptor:
4542 __ia64_trampoline, ADDR+16. */
4543 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4544 if (TARGET_ABI_OPEN_VMS
)
4546 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4547 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4548 relocation against function symbols to make it identical to the
4549 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4550 strict ELF and dereference to get the bare code address. */
4551 rtx reg
= gen_reg_rtx (Pmode
);
4552 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4553 emit_move_insn (reg
, tramp
);
4554 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4557 emit_move_insn (m_tramp
, tramp
);
4558 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4559 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4561 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (Pmode
, addr
, 16)));
4562 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4563 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4565 /* The third word is the target descriptor. */
4566 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4567 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4568 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4570 /* The fourth word is the static chain. */
4571 emit_move_insn (m_tramp
, static_chain
);
4574 /* Do any needed setup for a variadic function. CUM has not been updated
4575 for the last named argument which has type TYPE and mode MODE.
4577 We generate the actual spill instructions during prologue generation. */
4580 ia64_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4581 tree type
, int * pretend_size
,
4582 int second_time ATTRIBUTE_UNUSED
)
4584 CUMULATIVE_ARGS next_cum
= *get_cumulative_args (cum
);
4586 /* Skip the current argument. */
4587 ia64_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
, 1);
4589 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4591 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4592 *pretend_size
= n
* UNITS_PER_WORD
;
4593 cfun
->machine
->n_varargs
= n
;
4597 /* Check whether TYPE is a homogeneous floating point aggregate. If
4598 it is, return the mode of the floating point type that appears
4599 in all leafs. If it is not, return VOIDmode.
4601 An aggregate is a homogeneous floating point aggregate is if all
4602 fields/elements in it have the same floating point type (e.g,
4603 SFmode). 128-bit quad-precision floats are excluded.
4605 Variable sized aggregates should never arrive here, since we should
4606 have already decided to pass them by reference. Top-level zero-sized
4607 aggregates are excluded because our parallels crash the middle-end. */
4610 hfa_element_mode (const_tree type
, bool nested
)
4612 machine_mode element_mode
= VOIDmode
;
4614 enum tree_code code
= TREE_CODE (type
);
4615 int know_element_mode
= 0;
4618 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4623 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4624 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4625 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4626 case LANG_TYPE
: case FUNCTION_TYPE
:
4629 /* Fortran complex types are supposed to be HFAs, so we need to handle
4630 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4633 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4634 && TYPE_MODE (type
) != TCmode
)
4635 return GET_MODE_INNER (TYPE_MODE (type
));
4640 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4641 mode if this is contained within an aggregate. */
4642 if (nested
&& TYPE_MODE (type
) != TFmode
)
4643 return TYPE_MODE (type
);
4648 return hfa_element_mode (TREE_TYPE (type
), 1);
4652 case QUAL_UNION_TYPE
:
4653 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4655 if (TREE_CODE (t
) != FIELD_DECL
)
4658 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4659 if (know_element_mode
)
4661 if (mode
!= element_mode
)
4664 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4668 know_element_mode
= 1;
4669 element_mode
= mode
;
4672 return element_mode
;
4675 /* If we reach here, we probably have some front-end specific type
4676 that the backend doesn't know about. This can happen via the
4677 aggregate_value_p call in init_function_start. All we can do is
4678 ignore unknown tree types. */
4685 /* Return the number of words required to hold a quantity of TYPE and MODE
4686 when passed as an argument. */
4688 ia64_function_arg_words (const_tree type
, machine_mode mode
)
4692 if (mode
== BLKmode
)
4693 words
= int_size_in_bytes (type
);
4695 words
= GET_MODE_SIZE (mode
);
4697 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4700 /* Return the number of registers that should be skipped so the current
4701 argument (described by TYPE and WORDS) will be properly aligned.
4703 Integer and float arguments larger than 8 bytes start at the next
4704 even boundary. Aggregates larger than 8 bytes start at the next
4705 even boundary if the aggregate has 16 byte alignment. Note that
4706 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4707 but are still to be aligned in registers.
4709 ??? The ABI does not specify how to handle aggregates with
4710 alignment from 9 to 15 bytes, or greater than 16. We handle them
4711 all as if they had 16 byte alignment. Such aggregates can occur
4712 only if gcc extensions are used. */
4714 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4715 const_tree type
, int words
)
4717 /* No registers are skipped on VMS. */
4718 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4722 && TREE_CODE (type
) != INTEGER_TYPE
4723 && TREE_CODE (type
) != REAL_TYPE
)
4724 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4729 /* Return rtx for register where argument is passed, or zero if it is passed
4731 /* ??? 128-bit quad-precision floats are always passed in general
4735 ia64_function_arg_1 (cumulative_args_t cum_v
, machine_mode mode
,
4736 const_tree type
, bool named
, bool incoming
)
4738 const CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4740 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4741 int words
= ia64_function_arg_words (type
, mode
);
4742 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4743 machine_mode hfa_mode
= VOIDmode
;
4745 /* For OPEN VMS, emit the instruction setting up the argument register here,
4746 when we know this will be together with the other arguments setup related
4747 insns. This is not the conceptually best place to do this, but this is
4748 the easiest as we have convenient access to cumulative args info. */
4750 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4753 unsigned HOST_WIDE_INT regval
= cum
->words
;
4756 for (i
= 0; i
< 8; i
++)
4757 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4759 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4763 /* If all argument slots are used, then it must go on the stack. */
4764 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4767 /* On OpenVMS argument is either in Rn or Fn. */
4768 if (TARGET_ABI_OPEN_VMS
)
4770 if (FLOAT_MODE_P (mode
))
4771 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4773 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4776 /* Check for and handle homogeneous FP aggregates. */
4778 hfa_mode
= hfa_element_mode (type
, 0);
4780 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4781 and unprototyped hfas are passed specially. */
4782 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4786 int fp_regs
= cum
->fp_regs
;
4787 int int_regs
= cum
->words
+ offset
;
4788 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4792 /* If prototyped, pass it in FR regs then GR regs.
4793 If not prototyped, pass it in both FR and GR regs.
4795 If this is an SFmode aggregate, then it is possible to run out of
4796 FR regs while GR regs are still left. In that case, we pass the
4797 remaining part in the GR regs. */
4799 /* Fill the FP regs. We do this always. We stop if we reach the end
4800 of the argument, the last FP register, or the last argument slot. */
4802 byte_size
= ((mode
== BLKmode
)
4803 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4804 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4806 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4807 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4809 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4810 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4814 args_byte_size
+= hfa_size
;
4818 /* If no prototype, then the whole thing must go in GR regs. */
4819 if (! cum
->prototype
)
4821 /* If this is an SFmode aggregate, then we might have some left over
4822 that needs to go in GR regs. */
4823 else if (byte_size
!= offset
)
4824 int_regs
+= offset
/ UNITS_PER_WORD
;
4826 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4828 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4830 machine_mode gr_mode
= DImode
;
4831 unsigned int gr_size
;
4833 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4834 then this goes in a GR reg left adjusted/little endian, right
4835 adjusted/big endian. */
4836 /* ??? Currently this is handled wrong, because 4-byte hunks are
4837 always right adjusted/little endian. */
4840 /* If we have an even 4 byte hunk because the aggregate is a
4841 multiple of 4 bytes in size, then this goes in a GR reg right
4842 adjusted/little endian. */
4843 else if (byte_size
- offset
== 4)
4846 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4847 gen_rtx_REG (gr_mode
, (basereg
4851 gr_size
= GET_MODE_SIZE (gr_mode
);
4853 if (gr_size
== UNITS_PER_WORD
4854 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4856 else if (gr_size
> UNITS_PER_WORD
)
4857 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4859 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4862 /* Integral and aggregates go in general registers. If we have run out of
4863 FR registers, then FP values must also go in general registers. This can
4864 happen when we have a SFmode HFA. */
4865 else if (mode
== TFmode
|| mode
== TCmode
4866 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4868 int byte_size
= ((mode
== BLKmode
)
4869 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4870 if (BYTES_BIG_ENDIAN
4871 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4872 && byte_size
< UNITS_PER_WORD
4875 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4876 gen_rtx_REG (DImode
,
4877 (basereg
+ cum
->words
4880 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4883 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4887 /* If there is a prototype, then FP values go in a FR register when
4888 named, and in a GR register when unnamed. */
4889 else if (cum
->prototype
)
4892 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4893 /* In big-endian mode, an anonymous SFmode value must be represented
4894 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4895 the value into the high half of the general register. */
4896 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4897 return gen_rtx_PARALLEL (mode
,
4899 gen_rtx_EXPR_LIST (VOIDmode
,
4900 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4903 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4905 /* If there is no prototype, then FP values go in both FR and GR
4909 /* See comment above. */
4910 machine_mode inner_mode
=
4911 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4913 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4914 gen_rtx_REG (mode
, (FR_ARG_FIRST
4917 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4918 gen_rtx_REG (inner_mode
,
4919 (basereg
+ cum
->words
4923 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4927 /* Implement TARGET_FUNCION_ARG target hook. */
4930 ia64_function_arg (cumulative_args_t cum
, machine_mode mode
,
4931 const_tree type
, bool named
)
4933 return ia64_function_arg_1 (cum
, mode
, type
, named
, false);
4936 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4939 ia64_function_incoming_arg (cumulative_args_t cum
,
4941 const_tree type
, bool named
)
4943 return ia64_function_arg_1 (cum
, mode
, type
, named
, true);
4946 /* Return number of bytes, at the beginning of the argument, that must be
4947 put in registers. 0 is the argument is entirely in registers or entirely
4951 ia64_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
4952 tree type
, bool named ATTRIBUTE_UNUSED
)
4954 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4956 int words
= ia64_function_arg_words (type
, mode
);
4957 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4959 /* If all argument slots are used, then it must go on the stack. */
4960 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4963 /* It doesn't matter whether the argument goes in FR or GR regs. If
4964 it fits within the 8 argument slots, then it goes entirely in
4965 registers. If it extends past the last argument slot, then the rest
4966 goes on the stack. */
4968 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4971 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4974 /* Return ivms_arg_type based on machine_mode. */
4976 static enum ivms_arg_type
4977 ia64_arg_type (machine_mode mode
)
4990 /* Update CUM to point after this argument. This is patterned after
4991 ia64_function_arg. */
4994 ia64_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
4995 const_tree type
, bool named
)
4997 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4998 int words
= ia64_function_arg_words (type
, mode
);
4999 int offset
= ia64_function_arg_offset (cum
, type
, words
);
5000 machine_mode hfa_mode
= VOIDmode
;
5002 /* If all arg slots are already full, then there is nothing to do. */
5003 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
5005 cum
->words
+= words
+ offset
;
5009 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
5010 cum
->words
+= words
+ offset
;
5012 /* On OpenVMS argument is either in Rn or Fn. */
5013 if (TARGET_ABI_OPEN_VMS
)
5015 cum
->int_regs
= cum
->words
;
5016 cum
->fp_regs
= cum
->words
;
5020 /* Check for and handle homogeneous FP aggregates. */
5022 hfa_mode
= hfa_element_mode (type
, 0);
5024 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5025 and unprototyped hfas are passed specially. */
5026 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
5028 int fp_regs
= cum
->fp_regs
;
5029 /* This is the original value of cum->words + offset. */
5030 int int_regs
= cum
->words
- words
;
5031 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5035 /* If prototyped, pass it in FR regs then GR regs.
5036 If not prototyped, pass it in both FR and GR regs.
5038 If this is an SFmode aggregate, then it is possible to run out of
5039 FR regs while GR regs are still left. In that case, we pass the
5040 remaining part in the GR regs. */
5042 /* Fill the FP regs. We do this always. We stop if we reach the end
5043 of the argument, the last FP register, or the last argument slot. */
5045 byte_size
= ((mode
== BLKmode
)
5046 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
5047 args_byte_size
= int_regs
* UNITS_PER_WORD
;
5049 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
5050 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
5053 args_byte_size
+= hfa_size
;
5057 cum
->fp_regs
= fp_regs
;
5060 /* Integral and aggregates go in general registers. So do TFmode FP values.
5061 If we have run out of FR registers, then other FP values must also go in
5062 general registers. This can happen when we have a SFmode HFA. */
5063 else if (mode
== TFmode
|| mode
== TCmode
5064 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
5065 cum
->int_regs
= cum
->words
;
5067 /* If there is a prototype, then FP values go in a FR register when
5068 named, and in a GR register when unnamed. */
5069 else if (cum
->prototype
)
5072 cum
->int_regs
= cum
->words
;
5074 /* ??? Complex types should not reach here. */
5075 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5077 /* If there is no prototype, then FP values go in both FR and GR
5081 /* ??? Complex types should not reach here. */
5082 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5083 cum
->int_regs
= cum
->words
;
5087 /* Arguments with alignment larger than 8 bytes start at the next even
5088 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5089 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5092 ia64_function_arg_boundary (machine_mode mode
, const_tree type
)
5094 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
5095 return PARM_BOUNDARY
* 2;
5099 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
5100 return PARM_BOUNDARY
* 2;
5102 return PARM_BOUNDARY
;
5105 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
5106 return PARM_BOUNDARY
* 2;
5108 return PARM_BOUNDARY
;
5111 /* True if it is OK to do sibling call optimization for the specified
5112 call expression EXP. DECL will be the called function, or NULL if
5113 this is an indirect call. */
5115 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5117 /* We can't perform a sibcall if the current function has the syscall_linkage
5119 if (lookup_attribute ("syscall_linkage",
5120 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5123 /* We must always return with our current GP. This means we can
5124 only sibcall to functions defined in the current module unless
5125 TARGET_CONST_GP is set to true. */
5126 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
5130 /* Implement va_arg. */
5133 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
5136 /* Variable sized types are passed by reference. */
5137 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
5139 tree ptrtype
= build_pointer_type (type
);
5140 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
5141 return build_va_arg_indirect_ref (addr
);
5144 /* Aggregate arguments with alignment larger than 8 bytes start at
5145 the next even boundary. Integer and floating point arguments
5146 do so if they are larger than 8 bytes, whether or not they are
5147 also aligned larger than 8 bytes. */
5148 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
5149 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
5151 tree t
= fold_build_pointer_plus_hwi (valist
, 2 * UNITS_PER_WORD
- 1);
5152 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5153 build_int_cst (TREE_TYPE (t
), -2 * UNITS_PER_WORD
));
5154 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
5157 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5160 /* Return 1 if function return value returned in memory. Return 0 if it is
5164 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
5167 machine_mode hfa_mode
;
5168 HOST_WIDE_INT byte_size
;
5170 mode
= TYPE_MODE (valtype
);
5171 byte_size
= GET_MODE_SIZE (mode
);
5172 if (mode
== BLKmode
)
5174 byte_size
= int_size_in_bytes (valtype
);
5179 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5181 hfa_mode
= hfa_element_mode (valtype
, 0);
5182 if (hfa_mode
!= VOIDmode
)
5184 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5186 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
5191 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
5197 /* Return rtx for register that holds the function return value. */
5200 ia64_function_value (const_tree valtype
,
5201 const_tree fn_decl_or_type
,
5202 bool outgoing ATTRIBUTE_UNUSED
)
5205 machine_mode hfa_mode
;
5207 const_tree func
= fn_decl_or_type
;
5210 && !DECL_P (fn_decl_or_type
))
5213 mode
= TYPE_MODE (valtype
);
5214 hfa_mode
= hfa_element_mode (valtype
, 0);
5216 if (hfa_mode
!= VOIDmode
)
5224 hfa_size
= GET_MODE_SIZE (hfa_mode
);
5225 byte_size
= ((mode
== BLKmode
)
5226 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
5228 for (i
= 0; offset
< byte_size
; i
++)
5230 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5231 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
5235 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5237 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
5238 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
5241 bool need_parallel
= false;
5243 /* In big-endian mode, we need to manage the layout of aggregates
5244 in the registers so that we get the bits properly aligned in
5245 the highpart of the registers. */
5246 if (BYTES_BIG_ENDIAN
5247 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
5248 need_parallel
= true;
5250 /* Something like struct S { long double x; char a[0] } is not an
5251 HFA structure, and therefore doesn't go in fp registers. But
5252 the middle-end will give it XFmode anyway, and XFmode values
5253 don't normally fit in integer registers. So we need to smuggle
5254 the value inside a parallel. */
5255 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
5256 need_parallel
= true;
5266 bytesize
= int_size_in_bytes (valtype
);
5267 /* An empty PARALLEL is invalid here, but the return value
5268 doesn't matter for empty structs. */
5270 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5271 for (i
= 0; offset
< bytesize
; i
++)
5273 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5274 gen_rtx_REG (DImode
,
5277 offset
+= UNITS_PER_WORD
;
5279 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5282 mode
= promote_function_mode (valtype
, mode
, &unsignedp
,
5283 func
? TREE_TYPE (func
) : NULL_TREE
,
5286 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5290 /* Worker function for TARGET_LIBCALL_VALUE. */
5293 ia64_libcall_value (machine_mode mode
,
5294 const_rtx fun ATTRIBUTE_UNUSED
)
5296 return gen_rtx_REG (mode
,
5297 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
5298 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5299 && (mode
) != TFmode
)
5300 ? FR_RET_FIRST
: GR_RET_FIRST
));
5303 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5306 ia64_function_value_regno_p (const unsigned int regno
)
5308 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
5309 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
5312 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5313 We need to emit DTP-relative relocations. */
5316 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5318 gcc_assert (size
== 4 || size
== 8);
5320 fputs ("\tdata4.ua\t@dtprel(", file
);
5322 fputs ("\tdata8.ua\t@dtprel(", file
);
5323 output_addr_const (file
, x
);
5327 /* Print a memory address as an operand to reference that memory location. */
5329 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5330 also call this from ia64_print_operand for memory addresses. */
5333 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
5334 machine_mode
/*mode*/,
5335 rtx address ATTRIBUTE_UNUSED
)
5339 /* Print an operand to an assembler instruction.
5340 C Swap and print a comparison operator.
5341 D Print an FP comparison operator.
5342 E Print 32 - constant, for SImode shifts as extract.
5343 e Print 64 - constant, for DImode rotates.
5344 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5345 a floating point register emitted normally.
5346 G A floating point constant.
5347 I Invert a predicate register by adding 1.
5348 J Select the proper predicate register for a condition.
5349 j Select the inverse predicate register for a condition.
5350 O Append .acq for volatile load.
5351 P Postincrement of a MEM.
5352 Q Append .rel for volatile store.
5353 R Print .s .d or nothing for a single, double or no truncation.
5354 S Shift amount for shladd instruction.
5355 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5356 for Intel assembler.
5357 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5358 for Intel assembler.
5359 X A pair of floating point registers.
5360 r Print register name, or constant 0 as r0. HP compatibility for
5362 v Print vector constant value as an 8-byte integer value. */
5365 ia64_print_operand (FILE * file
, rtx x
, int code
)
5372 /* Handled below. */
5377 enum rtx_code c
= swap_condition (GET_CODE (x
));
5378 fputs (GET_RTX_NAME (c
), file
);
5383 switch (GET_CODE (x
))
5410 str
= GET_RTX_NAME (GET_CODE (x
));
5417 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5421 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5425 if (x
== CONST0_RTX (GET_MODE (x
)))
5426 str
= reg_names
[FR_REG (0)];
5427 else if (x
== CONST1_RTX (GET_MODE (x
)))
5428 str
= reg_names
[FR_REG (1)];
5431 gcc_assert (GET_CODE (x
) == REG
);
5432 str
= reg_names
[REGNO (x
)];
5440 real_to_target (val
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
5441 if (GET_MODE (x
) == SFmode
)
5442 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5443 else if (GET_MODE (x
) == DFmode
)
5444 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5446 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5449 output_operand_lossage ("invalid %%G mode");
5454 fputs (reg_names
[REGNO (x
) + 1], file
);
5460 unsigned int regno
= REGNO (XEXP (x
, 0));
5461 if (GET_CODE (x
) == EQ
)
5465 fputs (reg_names
[regno
], file
);
5470 if (MEM_VOLATILE_P (x
))
5471 fputs(".acq", file
);
5476 HOST_WIDE_INT value
;
5478 switch (GET_CODE (XEXP (x
, 0)))
5484 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5485 if (GET_CODE (x
) == CONST_INT
)
5489 gcc_assert (GET_CODE (x
) == REG
);
5490 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5496 value
= GET_MODE_SIZE (GET_MODE (x
));
5500 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5504 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5509 if (MEM_VOLATILE_P (x
))
5510 fputs(".rel", file
);
5514 if (x
== CONST0_RTX (GET_MODE (x
)))
5516 else if (x
== CONST1_RTX (GET_MODE (x
)))
5518 else if (x
== CONST2_RTX (GET_MODE (x
)))
5521 output_operand_lossage ("invalid %%R value");
5525 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5529 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5531 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5537 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5539 const char *prefix
= "0x";
5540 if (INTVAL (x
) & 0x80000000)
5542 fprintf (file
, "0xffffffff");
5545 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5552 unsigned int regno
= REGNO (x
);
5553 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5558 /* If this operand is the constant zero, write it as register zero.
5559 Any register, zero, or CONST_INT value is OK here. */
5560 if (GET_CODE (x
) == REG
)
5561 fputs (reg_names
[REGNO (x
)], file
);
5562 else if (x
== CONST0_RTX (GET_MODE (x
)))
5564 else if (GET_CODE (x
) == CONST_INT
)
5565 output_addr_const (file
, x
);
5567 output_operand_lossage ("invalid %%r value");
5571 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5572 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5579 /* For conditional branches, returns or calls, substitute
5580 sptk, dptk, dpnt, or spnt for %s. */
5581 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5584 int pred_val
= profile_probability::from_reg_br_prob_note
5585 (XINT (x
, 0)).to_reg_br_prob_base ();
5587 /* Guess top and bottom 10% statically predicted. */
5588 if (pred_val
< REG_BR_PROB_BASE
/ 50
5589 && br_prob_note_reliable_p (x
))
5591 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5593 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5594 || !br_prob_note_reliable_p (x
))
5599 else if (CALL_P (current_output_insn
))
5604 fputs (which
, file
);
5609 x
= current_insn_predicate
;
5612 unsigned int regno
= REGNO (XEXP (x
, 0));
5613 if (GET_CODE (x
) == EQ
)
5615 fprintf (file
, "(%s) ", reg_names
[regno
]);
5620 output_operand_lossage ("ia64_print_operand: unknown code");
5624 switch (GET_CODE (x
))
5626 /* This happens for the spill/restore instructions. */
5634 fputs (reg_names
[REGNO (x
)], file
);
5639 rtx addr
= XEXP (x
, 0);
5640 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5641 addr
= XEXP (addr
, 0);
5642 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5647 output_addr_const (file
, x
);
5654 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5657 ia64_print_operand_punct_valid_p (unsigned char code
)
5659 return (code
== '+' || code
== ',');
5662 /* Compute a (partial) cost for rtx X. Return true if the complete
5663 cost has been computed, and false if subexpressions should be
5664 scanned. In either case, *TOTAL contains the cost result. */
5665 /* ??? This is incomplete. */
5668 ia64_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
5669 int opno ATTRIBUTE_UNUSED
,
5670 int *total
, bool speed ATTRIBUTE_UNUSED
)
5672 int code
= GET_CODE (x
);
5680 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5683 if (satisfies_constraint_I (x
))
5685 else if (satisfies_constraint_J (x
))
5688 *total
= COSTS_N_INSNS (1);
5691 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5694 *total
= COSTS_N_INSNS (1);
5699 *total
= COSTS_N_INSNS (1);
5705 *total
= COSTS_N_INSNS (3);
5709 *total
= COSTS_N_INSNS (4);
5713 /* For multiplies wider than HImode, we have to go to the FPU,
5714 which normally involves copies. Plus there's the latency
5715 of the multiply itself, and the latency of the instructions to
5716 transfer integer regs to FP regs. */
5717 if (FLOAT_MODE_P (mode
))
5718 *total
= COSTS_N_INSNS (4);
5719 else if (GET_MODE_SIZE (mode
) > 2)
5720 *total
= COSTS_N_INSNS (10);
5722 *total
= COSTS_N_INSNS (2);
5727 if (FLOAT_MODE_P (mode
))
5729 *total
= COSTS_N_INSNS (4);
5737 *total
= COSTS_N_INSNS (1);
5744 /* We make divide expensive, so that divide-by-constant will be
5745 optimized to a multiply. */
5746 *total
= COSTS_N_INSNS (60);
5754 /* Calculate the cost of moving data from a register in class FROM to
5755 one in class TO, using MODE. */
5758 ia64_register_move_cost (machine_mode mode
, reg_class_t from
,
5761 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5762 if (to
== ADDL_REGS
)
5764 if (from
== ADDL_REGS
)
5767 /* All costs are symmetric, so reduce cases by putting the
5768 lower number class as the destination. */
5771 reg_class_t tmp
= to
;
5772 to
= from
, from
= tmp
;
5775 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5776 so that we get secondary memory reloads. Between FR_REGS,
5777 we have to make this at least as expensive as memory_move_cost
5778 to avoid spectacularly poor register class preferencing. */
5779 if (mode
== XFmode
|| mode
== RFmode
)
5781 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5782 return memory_move_cost (mode
, to
, false);
5790 /* Moving between PR registers takes two insns. */
5791 if (from
== PR_REGS
)
5793 /* Moving between PR and anything but GR is impossible. */
5794 if (from
!= GR_REGS
)
5795 return memory_move_cost (mode
, to
, false);
5799 /* Moving between BR and anything but GR is impossible. */
5800 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5801 return memory_move_cost (mode
, to
, false);
5806 /* Moving between AR and anything but GR is impossible. */
5807 if (from
!= GR_REGS
)
5808 return memory_move_cost (mode
, to
, false);
5814 case GR_AND_FR_REGS
:
5815 case GR_AND_BR_REGS
:
5826 /* Calculate the cost of moving data of MODE from a register to or from
5830 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
5832 bool in ATTRIBUTE_UNUSED
)
5834 if (rclass
== GENERAL_REGS
5835 || rclass
== FR_REGS
5836 || rclass
== FP_REGS
5837 || rclass
== GR_AND_FR_REGS
)
5843 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5844 on RCLASS to use when copying X into that class. */
5847 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5853 /* Don't allow volatile mem reloads into floating point registers.
5854 This is defined to force reload to choose the r/m case instead
5855 of the f/f case when reloading (set (reg fX) (mem/v)). */
5856 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5859 /* Force all unrecognized constants into the constant pool. */
5877 /* This function returns the register class required for a secondary
5878 register when copying between one of the registers in RCLASS, and X,
5879 using MODE. A return value of NO_REGS means that no secondary register
5883 ia64_secondary_reload_class (enum reg_class rclass
,
5884 machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5888 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5889 regno
= true_regnum (x
);
5896 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5897 interaction. We end up with two pseudos with overlapping lifetimes
5898 both of which are equiv to the same constant, and both which need
5899 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5900 changes depending on the path length, which means the qty_first_reg
5901 check in make_regs_eqv can give different answers at different times.
5902 At some point I'll probably need a reload_indi pattern to handle
5905 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5906 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5907 non-general registers for good measure. */
5908 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5911 /* This is needed if a pseudo used as a call_operand gets spilled to a
5913 if (GET_CODE (x
) == MEM
)
5919 /* Need to go through general registers to get to other class regs. */
5920 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5923 /* This can happen when a paradoxical subreg is an operand to the
5925 /* ??? This shouldn't be necessary after instruction scheduling is
5926 enabled, because paradoxical subregs are not accepted by
5927 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5928 stop the paradoxical subreg stupidity in the *_operand functions
5930 if (GET_CODE (x
) == MEM
5931 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5932 || GET_MODE (x
) == QImode
))
5935 /* This can happen because of the ior/and/etc patterns that accept FP
5936 registers as operands. If the third operand is a constant, then it
5937 needs to be reloaded into a FP register. */
5938 if (GET_CODE (x
) == CONST_INT
)
5941 /* This can happen because of register elimination in a muldi3 insn.
5942 E.g. `26107 * (unsigned long)&u'. */
5943 if (GET_CODE (x
) == PLUS
)
5948 /* ??? This happens if we cse/gcse a BImode value across a call,
5949 and the function has a nonlocal goto. This is because global
5950 does not allocate call crossing pseudos to hard registers when
5951 crtl->has_nonlocal_goto is true. This is relatively
5952 common for C++ programs that use exceptions. To reproduce,
5953 return NO_REGS and compile libstdc++. */
5954 if (GET_CODE (x
) == MEM
)
5957 /* This can happen when we take a BImode subreg of a DImode value,
5958 and that DImode value winds up in some non-GR register. */
5959 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5971 /* Implement targetm.unspec_may_trap_p hook. */
5973 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5975 switch (XINT (x
, 1))
5981 case UNSPEC_CHKACLR
:
5983 /* These unspecs are just wrappers. */
5984 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5987 return default_unspec_may_trap_p (x
, flags
);
5991 /* Parse the -mfixed-range= option string. */
5994 fix_range (const char *const_str
)
5997 char *str
, *dash
, *comma
;
5999 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6000 REG2 are either register names or register numbers. The effect
6001 of this option is to mark the registers in the range from REG1 to
6002 REG2 as ``fixed'' so they won't be used by the compiler. This is
6003 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6005 i
= strlen (const_str
);
6006 str
= (char *) alloca (i
+ 1);
6007 memcpy (str
, const_str
, i
+ 1);
6011 dash
= strchr (str
, '-');
6014 warning (0, "value of -mfixed-range must have form REG1-REG2");
6019 comma
= strchr (dash
+ 1, ',');
6023 first
= decode_reg_name (str
);
6026 warning (0, "unknown register name: %s", str
);
6030 last
= decode_reg_name (dash
+ 1);
6033 warning (0, "unknown register name: %s", dash
+ 1);
6041 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
6045 for (i
= first
; i
<= last
; ++i
)
6046 fixed_regs
[i
] = call_used_regs
[i
] = 1;
6056 /* Implement TARGET_OPTION_OVERRIDE. */
6059 ia64_option_override (void)
6062 cl_deferred_option
*opt
;
6063 vec
<cl_deferred_option
> *v
6064 = (vec
<cl_deferred_option
> *) ia64_deferred_options
;
6067 FOR_EACH_VEC_ELT (*v
, i
, opt
)
6069 switch (opt
->opt_index
)
6071 case OPT_mfixed_range_
:
6072 fix_range (opt
->arg
);
6080 if (TARGET_AUTO_PIC
)
6081 target_flags
|= MASK_CONST_GP
;
6083 /* Numerous experiment shows that IRA based loop pressure
6084 calculation works better for RTL loop invariant motion on targets
6085 with enough (>= 32) registers. It is an expensive optimization.
6086 So it is on only for peak performance. */
6088 flag_ira_loop_pressure
= 1;
6091 ia64_section_threshold
= (global_options_set
.x_g_switch_value
6093 : IA64_DEFAULT_GVALUE
);
6095 init_machine_status
= ia64_init_machine_status
;
6097 if (align_functions
<= 0)
6098 align_functions
= 64;
6099 if (align_loops
<= 0)
6101 if (TARGET_ABI_OPEN_VMS
)
6104 ia64_override_options_after_change();
6107 /* Implement targetm.override_options_after_change. */
6110 ia64_override_options_after_change (void)
6113 && !global_options_set
.x_flag_selective_scheduling
6114 && !global_options_set
.x_flag_selective_scheduling2
)
6116 flag_selective_scheduling2
= 1;
6117 flag_sel_sched_pipelining
= 1;
6119 if (mflag_sched_control_spec
== 2)
6121 /* Control speculation is on by default for the selective scheduler,
6122 but not for the Haifa scheduler. */
6123 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
6125 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
6127 /* FIXME: remove this when we'd implement breaking autoinsns as
6128 a transformation. */
6129 flag_auto_inc_dec
= 0;
6133 /* Initialize the record of emitted frame related registers. */
6135 void ia64_init_expanders (void)
6137 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
6140 static struct machine_function
*
6141 ia64_init_machine_status (void)
6143 return ggc_cleared_alloc
<machine_function
> ();
6146 static enum attr_itanium_class
ia64_safe_itanium_class (rtx_insn
*);
6147 static enum attr_type
ia64_safe_type (rtx_insn
*);
6149 static enum attr_itanium_class
6150 ia64_safe_itanium_class (rtx_insn
*insn
)
6152 if (recog_memoized (insn
) >= 0)
6153 return get_attr_itanium_class (insn
);
6154 else if (DEBUG_INSN_P (insn
))
6155 return ITANIUM_CLASS_IGNORE
;
6157 return ITANIUM_CLASS_UNKNOWN
;
6160 static enum attr_type
6161 ia64_safe_type (rtx_insn
*insn
)
6163 if (recog_memoized (insn
) >= 0)
6164 return get_attr_type (insn
);
6166 return TYPE_UNKNOWN
;
6169 /* The following collection of routines emit instruction group stop bits as
6170 necessary to avoid dependencies. */
6172 /* Need to track some additional registers as far as serialization is
6173 concerned so we can properly handle br.call and br.ret. We could
6174 make these registers visible to gcc, but since these registers are
6175 never explicitly used in gcc generated code, it seems wasteful to
6176 do so (plus it would make the call and return patterns needlessly
6178 #define REG_RP (BR_REG (0))
6179 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6180 /* This is used for volatile asms which may require a stop bit immediately
6181 before and after them. */
6182 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6183 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6184 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6186 /* For each register, we keep track of how it has been written in the
6187 current instruction group.
6189 If a register is written unconditionally (no qualifying predicate),
6190 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6192 If a register is written if its qualifying predicate P is true, we
6193 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6194 may be written again by the complement of P (P^1) and when this happens,
6195 WRITE_COUNT gets set to 2.
6197 The result of this is that whenever an insn attempts to write a register
6198 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6200 If a predicate register is written by a floating-point insn, we set
6201 WRITTEN_BY_FP to true.
6203 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6204 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6206 #if GCC_VERSION >= 4000
6207 #define RWS_FIELD_TYPE __extension__ unsigned short
6209 #define RWS_FIELD_TYPE unsigned int
6211 struct reg_write_state
6213 RWS_FIELD_TYPE write_count
: 2;
6214 RWS_FIELD_TYPE first_pred
: 10;
6215 RWS_FIELD_TYPE written_by_fp
: 1;
6216 RWS_FIELD_TYPE written_by_and
: 1;
6217 RWS_FIELD_TYPE written_by_or
: 1;
6220 /* Cumulative info for the current instruction group. */
6221 struct reg_write_state rws_sum
[NUM_REGS
];
6223 /* Bitmap whether a register has been written in the current insn. */
6224 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
6225 / HOST_BITS_PER_WIDEST_FAST_INT
];
6228 rws_insn_set (int regno
)
6230 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
6231 SET_HARD_REG_BIT (rws_insn
, regno
);
6235 rws_insn_test (int regno
)
6237 return TEST_HARD_REG_BIT (rws_insn
, regno
);
6240 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6241 unsigned char rws_insn
[2];
6244 rws_insn_set (int regno
)
6246 if (regno
== REG_AR_CFM
)
6248 else if (regno
== REG_VOLATILE
)
6253 rws_insn_test (int regno
)
6255 if (regno
== REG_AR_CFM
)
6257 if (regno
== REG_VOLATILE
)
6263 /* Indicates whether this is the first instruction after a stop bit,
6264 in which case we don't need another stop bit. Without this,
6265 ia64_variable_issue will die when scheduling an alloc. */
6266 static int first_instruction
;
6268 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6269 RTL for one instruction. */
6272 unsigned int is_write
: 1; /* Is register being written? */
6273 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
6274 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
6275 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
6276 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
6277 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
6280 static void rws_update (int, struct reg_flags
, int);
6281 static int rws_access_regno (int, struct reg_flags
, int);
6282 static int rws_access_reg (rtx
, struct reg_flags
, int);
6283 static void update_set_flags (rtx
, struct reg_flags
*);
6284 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
6285 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
6286 static void init_insn_group_barriers (void);
6287 static int group_barrier_needed (rtx_insn
*);
6288 static int safe_group_barrier_needed (rtx_insn
*);
6289 static int in_safe_group_barrier
;
6291 /* Update *RWS for REGNO, which is being written by the current instruction,
6292 with predicate PRED, and associated register flags in FLAGS. */
6295 rws_update (int regno
, struct reg_flags flags
, int pred
)
6298 rws_sum
[regno
].write_count
++;
6300 rws_sum
[regno
].write_count
= 2;
6301 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
6302 /* ??? Not tracking and/or across differing predicates. */
6303 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6304 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6305 rws_sum
[regno
].first_pred
= pred
;
6308 /* Handle an access to register REGNO of type FLAGS using predicate register
6309 PRED. Update rws_sum array. Return 1 if this access creates
6310 a dependency with an earlier instruction in the same group. */
6313 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
6315 int need_barrier
= 0;
6317 gcc_assert (regno
< NUM_REGS
);
6319 if (! PR_REGNO_P (regno
))
6320 flags
.is_and
= flags
.is_or
= 0;
6326 rws_insn_set (regno
);
6327 write_count
= rws_sum
[regno
].write_count
;
6329 switch (write_count
)
6332 /* The register has not been written yet. */
6333 if (!in_safe_group_barrier
)
6334 rws_update (regno
, flags
, pred
);
6338 /* The register has been written via a predicate. Treat
6339 it like a unconditional write and do not try to check
6340 for complementary pred reg in earlier write. */
6341 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6343 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6347 if (!in_safe_group_barrier
)
6348 rws_update (regno
, flags
, pred
);
6352 /* The register has been unconditionally written already. We
6354 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6356 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6360 if (!in_safe_group_barrier
)
6362 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6363 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6373 if (flags
.is_branch
)
6375 /* Branches have several RAW exceptions that allow to avoid
6378 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6379 /* RAW dependencies on branch regs are permissible as long
6380 as the writer is a non-branch instruction. Since we
6381 never generate code that uses a branch register written
6382 by a branch instruction, handling this case is
6386 if (REGNO_REG_CLASS (regno
) == PR_REGS
6387 && ! rws_sum
[regno
].written_by_fp
)
6388 /* The predicates of a branch are available within the
6389 same insn group as long as the predicate was written by
6390 something other than a floating-point instruction. */
6394 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6396 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6399 switch (rws_sum
[regno
].write_count
)
6402 /* The register has not been written yet. */
6406 /* The register has been written via a predicate, assume we
6407 need a barrier (don't check for complementary regs). */
6412 /* The register has been unconditionally written already. We
6422 return need_barrier
;
6426 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6428 int regno
= REGNO (reg
);
6429 int n
= REG_NREGS (reg
);
6432 return rws_access_regno (regno
, flags
, pred
);
6435 int need_barrier
= 0;
6437 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6438 return need_barrier
;
6442 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6443 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6446 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6448 rtx src
= SET_SRC (x
);
6450 switch (GET_CODE (src
))
6456 /* There are four cases here:
6457 (1) The destination is (pc), in which case this is a branch,
6458 nothing here applies.
6459 (2) The destination is ar.lc, in which case this is a
6460 doloop_end_internal,
6461 (3) The destination is an fp register, in which case this is
6462 an fselect instruction.
6463 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6464 this is a check load.
6465 In all cases, nothing we do in this function applies. */
6469 if (COMPARISON_P (src
)
6470 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6471 /* Set pflags->is_fp to 1 so that we know we're dealing
6472 with a floating point comparison when processing the
6473 destination of the SET. */
6476 /* Discover if this is a parallel comparison. We only handle
6477 and.orcm and or.andcm at present, since we must retain a
6478 strict inverse on the predicate pair. */
6479 else if (GET_CODE (src
) == AND
)
6481 else if (GET_CODE (src
) == IOR
)
6488 /* Subroutine of rtx_needs_barrier; this function determines whether the
6489 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6490 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6494 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6496 int need_barrier
= 0;
6498 rtx src
= SET_SRC (x
);
6500 if (GET_CODE (src
) == CALL
)
6501 /* We don't need to worry about the result registers that
6502 get written by subroutine call. */
6503 return rtx_needs_barrier (src
, flags
, pred
);
6504 else if (SET_DEST (x
) == pc_rtx
)
6506 /* X is a conditional branch. */
6507 /* ??? This seems redundant, as the caller sets this bit for
6509 if (!ia64_spec_check_src_p (src
))
6510 flags
.is_branch
= 1;
6511 return rtx_needs_barrier (src
, flags
, pred
);
6514 if (ia64_spec_check_src_p (src
))
6515 /* Avoid checking one register twice (in condition
6516 and in 'then' section) for ldc pattern. */
6518 gcc_assert (REG_P (XEXP (src
, 2)));
6519 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6521 /* We process MEM below. */
6522 src
= XEXP (src
, 1);
6525 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6528 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6530 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6531 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6533 return need_barrier
;
6536 /* Handle an access to rtx X of type FLAGS using predicate register
6537 PRED. Return 1 if this access creates a dependency with an earlier
6538 instruction in the same group. */
6541 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6544 int is_complemented
= 0;
6545 int need_barrier
= 0;
6546 const char *format_ptr
;
6547 struct reg_flags new_flags
;
6555 switch (GET_CODE (x
))
6558 update_set_flags (x
, &new_flags
);
6559 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6560 if (GET_CODE (SET_SRC (x
)) != CALL
)
6562 new_flags
.is_write
= 1;
6563 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6568 new_flags
.is_write
= 0;
6569 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6571 /* Avoid multiple register writes, in case this is a pattern with
6572 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6573 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6575 new_flags
.is_write
= 1;
6576 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6577 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6578 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6583 /* X is a predicated instruction. */
6585 cond
= COND_EXEC_TEST (x
);
6587 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6589 if (GET_CODE (cond
) == EQ
)
6590 is_complemented
= 1;
6591 cond
= XEXP (cond
, 0);
6592 gcc_assert (GET_CODE (cond
) == REG
6593 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6594 pred
= REGNO (cond
);
6595 if (is_complemented
)
6598 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6599 return need_barrier
;
6603 /* Clobber & use are for earlier compiler-phases only. */
6608 /* We always emit stop bits for traditional asms. We emit stop bits
6609 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6610 if (GET_CODE (x
) != ASM_OPERANDS
6611 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6613 /* Avoid writing the register multiple times if we have multiple
6614 asm outputs. This avoids a failure in rws_access_reg. */
6615 if (! rws_insn_test (REG_VOLATILE
))
6617 new_flags
.is_write
= 1;
6618 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6623 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6624 We cannot just fall through here since then we would be confused
6625 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6626 traditional asms unlike their normal usage. */
6628 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6629 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6634 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6636 rtx pat
= XVECEXP (x
, 0, i
);
6637 switch (GET_CODE (pat
))
6640 update_set_flags (pat
, &new_flags
);
6641 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6648 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6652 if (REG_P (XEXP (pat
, 0))
6653 && extract_asm_operands (x
) != NULL_RTX
6654 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6656 new_flags
.is_write
= 1;
6657 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6670 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6672 rtx pat
= XVECEXP (x
, 0, i
);
6673 if (GET_CODE (pat
) == SET
)
6675 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6677 new_flags
.is_write
= 1;
6678 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6682 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6683 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6688 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6691 if (REGNO (x
) == AR_UNAT_REGNUM
)
6693 for (i
= 0; i
< 64; ++i
)
6694 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6697 need_barrier
= rws_access_reg (x
, flags
, pred
);
6701 /* Find the regs used in memory address computation. */
6702 new_flags
.is_write
= 0;
6703 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6706 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6707 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6710 /* Operators with side-effects. */
6711 case POST_INC
: case POST_DEC
:
6712 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6714 new_flags
.is_write
= 0;
6715 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6716 new_flags
.is_write
= 1;
6717 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6721 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6723 new_flags
.is_write
= 0;
6724 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6725 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6726 new_flags
.is_write
= 1;
6727 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6730 /* Handle common unary and binary ops for efficiency. */
6731 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6732 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6733 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6734 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6735 case NE
: case EQ
: case GE
: case GT
: case LE
:
6736 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6737 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6738 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6741 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6742 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6743 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6744 case SQRT
: case FFS
: case POPCOUNT
:
6745 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6749 /* VEC_SELECT's second argument is a PARALLEL with integers that
6750 describe the elements selected. On ia64, those integers are
6751 always constants. Avoid walking the PARALLEL so that we don't
6752 get confused with "normal" parallels and then die. */
6753 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6757 switch (XINT (x
, 1))
6759 case UNSPEC_LTOFF_DTPMOD
:
6760 case UNSPEC_LTOFF_DTPREL
:
6762 case UNSPEC_LTOFF_TPREL
:
6764 case UNSPEC_PRED_REL_MUTEX
:
6765 case UNSPEC_PIC_CALL
:
6767 case UNSPEC_FETCHADD_ACQ
:
6768 case UNSPEC_FETCHADD_REL
:
6769 case UNSPEC_BSP_VALUE
:
6770 case UNSPEC_FLUSHRS
:
6771 case UNSPEC_BUNDLE_SELECTOR
:
6774 case UNSPEC_GR_SPILL
:
6775 case UNSPEC_GR_RESTORE
:
6777 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6778 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6780 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6781 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6782 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6787 case UNSPEC_FR_SPILL
:
6788 case UNSPEC_FR_RESTORE
:
6789 case UNSPEC_GETF_EXP
:
6790 case UNSPEC_SETF_EXP
:
6792 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6793 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6798 case UNSPEC_CHKACLR
:
6800 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6803 case UNSPEC_FR_RECIP_APPROX
:
6805 case UNSPEC_COPYSIGN
:
6806 case UNSPEC_FR_RECIP_APPROX_RES
:
6807 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6808 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6811 case UNSPEC_CMPXCHG_ACQ
:
6812 case UNSPEC_CMPXCHG_REL
:
6813 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6814 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6822 case UNSPEC_VOLATILE
:
6823 switch (XINT (x
, 1))
6826 /* Alloc must always be the first instruction of a group.
6827 We force this by always returning true. */
6828 /* ??? We might get better scheduling if we explicitly check for
6829 input/local/output register dependencies, and modify the
6830 scheduler so that alloc is always reordered to the start of
6831 the current group. We could then eliminate all of the
6832 first_instruction code. */
6833 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6835 new_flags
.is_write
= 1;
6836 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6839 case UNSPECV_SET_BSP
:
6840 case UNSPECV_PROBE_STACK_RANGE
:
6844 case UNSPECV_BLOCKAGE
:
6845 case UNSPECV_INSN_GROUP_BARRIER
:
6847 case UNSPECV_PSAC_ALL
:
6848 case UNSPECV_PSAC_NORMAL
:
6851 case UNSPECV_PROBE_STACK_ADDRESS
:
6852 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6861 new_flags
.is_write
= 0;
6862 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6863 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6865 new_flags
.is_write
= 1;
6866 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6867 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6871 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6872 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6873 switch (format_ptr
[i
])
6875 case '0': /* unused field */
6876 case 'i': /* integer */
6877 case 'n': /* note */
6878 case 'w': /* wide integer */
6879 case 's': /* pointer to string */
6880 case 'S': /* optional pointer to string */
6884 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6889 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6890 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6899 return need_barrier
;
6902 /* Clear out the state for group_barrier_needed at the start of a
6903 sequence of insns. */
6906 init_insn_group_barriers (void)
6908 memset (rws_sum
, 0, sizeof (rws_sum
));
6909 first_instruction
= 1;
6912 /* Given the current state, determine whether a group barrier (a stop bit) is
6913 necessary before INSN. Return nonzero if so. This modifies the state to
6914 include the effects of INSN as a side-effect. */
6917 group_barrier_needed (rtx_insn
*insn
)
6920 int need_barrier
= 0;
6921 struct reg_flags flags
;
6923 memset (&flags
, 0, sizeof (flags
));
6924 switch (GET_CODE (insn
))
6931 /* A barrier doesn't imply an instruction group boundary. */
6935 memset (rws_insn
, 0, sizeof (rws_insn
));
6939 flags
.is_branch
= 1;
6940 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6941 memset (rws_insn
, 0, sizeof (rws_insn
));
6943 /* Don't bundle a call following another call. */
6944 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6950 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6954 if (!ia64_spec_check_p (insn
))
6955 flags
.is_branch
= 1;
6957 /* Don't bundle a jump following a call. */
6958 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6966 if (GET_CODE (PATTERN (insn
)) == USE
6967 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6968 /* Don't care about USE and CLOBBER "insns"---those are used to
6969 indicate to the optimizer that it shouldn't get rid of
6970 certain operations. */
6973 pat
= PATTERN (insn
);
6975 /* Ug. Hack hacks hacked elsewhere. */
6976 switch (recog_memoized (insn
))
6978 /* We play dependency tricks with the epilogue in order
6979 to get proper schedules. Undo this for dv analysis. */
6980 case CODE_FOR_epilogue_deallocate_stack
:
6981 case CODE_FOR_prologue_allocate_stack
:
6982 pat
= XVECEXP (pat
, 0, 0);
6985 /* The pattern we use for br.cloop confuses the code above.
6986 The second element of the vector is representative. */
6987 case CODE_FOR_doloop_end_internal
:
6988 pat
= XVECEXP (pat
, 0, 1);
6991 /* Doesn't generate code. */
6992 case CODE_FOR_pred_rel_mutex
:
6993 case CODE_FOR_prologue_use
:
7000 memset (rws_insn
, 0, sizeof (rws_insn
));
7001 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
7003 /* Check to see if the previous instruction was a volatile
7006 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
7014 if (first_instruction
&& important_for_bundling_p (insn
))
7017 first_instruction
= 0;
7020 return need_barrier
;
7023 /* Like group_barrier_needed, but do not clobber the current state. */
7026 safe_group_barrier_needed (rtx_insn
*insn
)
7028 int saved_first_instruction
;
7031 saved_first_instruction
= first_instruction
;
7032 in_safe_group_barrier
= 1;
7034 t
= group_barrier_needed (insn
);
7036 first_instruction
= saved_first_instruction
;
7037 in_safe_group_barrier
= 0;
7042 /* Scan the current function and insert stop bits as necessary to
7043 eliminate dependencies. This function assumes that a final
7044 instruction scheduling pass has been run which has already
7045 inserted most of the necessary stop bits. This function only
7046 inserts new ones at basic block boundaries, since these are
7047 invisible to the scheduler. */
7050 emit_insn_group_barriers (FILE *dump
)
7053 rtx_insn
*last_label
= 0;
7054 int insns_since_last_label
= 0;
7056 init_insn_group_barriers ();
7058 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7062 if (insns_since_last_label
)
7064 insns_since_last_label
= 0;
7066 else if (NOTE_P (insn
)
7067 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
7069 if (insns_since_last_label
)
7071 insns_since_last_label
= 0;
7073 else if (NONJUMP_INSN_P (insn
)
7074 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7075 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7077 init_insn_group_barriers ();
7080 else if (NONDEBUG_INSN_P (insn
))
7082 insns_since_last_label
= 1;
7084 if (group_barrier_needed (insn
))
7089 fprintf (dump
, "Emitting stop before label %d\n",
7090 INSN_UID (last_label
));
7091 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
7094 init_insn_group_barriers ();
7102 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7103 This function has to emit all necessary group barriers. */
7106 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7110 init_insn_group_barriers ();
7112 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7114 if (BARRIER_P (insn
))
7116 rtx_insn
*last
= prev_active_insn (insn
);
7120 if (JUMP_TABLE_DATA_P (last
))
7121 last
= prev_active_insn (last
);
7122 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7123 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7125 init_insn_group_barriers ();
7127 else if (NONDEBUG_INSN_P (insn
))
7129 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7130 init_insn_group_barriers ();
7131 else if (group_barrier_needed (insn
))
7133 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
7134 init_insn_group_barriers ();
7135 group_barrier_needed (insn
);
7143 /* Instruction scheduling support. */
7145 #define NR_BUNDLES 10
7147 /* A list of names of all available bundles. */
7149 static const char *bundle_name
[NR_BUNDLES
] =
7155 #if NR_BUNDLES == 10
7165 /* Nonzero if we should insert stop bits into the schedule. */
7167 int ia64_final_schedule
= 0;
7169 /* Codes of the corresponding queried units: */
7171 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
7172 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
7174 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
7175 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
7177 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
7179 /* The following variable value is an insn group barrier. */
7181 static rtx_insn
*dfa_stop_insn
;
7183 /* The following variable value is the last issued insn. */
7185 static rtx_insn
*last_scheduled_insn
;
7187 /* The following variable value is pointer to a DFA state used as
7188 temporary variable. */
7190 static state_t temp_dfa_state
= NULL
;
7192 /* The following variable value is DFA state after issuing the last
7195 static state_t prev_cycle_state
= NULL
;
7197 /* The following array element values are TRUE if the corresponding
7198 insn requires to add stop bits before it. */
7200 static char *stops_p
= NULL
;
7202 /* The following variable is used to set up the mentioned above array. */
7204 static int stop_before_p
= 0;
7206 /* The following variable value is length of the arrays `clocks' and
7209 static int clocks_length
;
7211 /* The following variable value is number of data speculations in progress. */
7212 static int pending_data_specs
= 0;
7214 /* Number of memory references on current and three future processor cycles. */
7215 static char mem_ops_in_group
[4];
7217 /* Number of current processor cycle (from scheduler's point of view). */
7218 static int current_cycle
;
7220 static rtx
ia64_single_set (rtx_insn
*);
7221 static void ia64_emit_insn_before (rtx
, rtx_insn
*);
7223 /* Map a bundle number to its pseudo-op. */
7226 get_bundle_name (int b
)
7228 return bundle_name
[b
];
7232 /* Return the maximum number of instructions a cpu can issue. */
7235 ia64_issue_rate (void)
7240 /* Helper function - like single_set, but look inside COND_EXEC. */
7243 ia64_single_set (rtx_insn
*insn
)
7245 rtx x
= PATTERN (insn
), ret
;
7246 if (GET_CODE (x
) == COND_EXEC
)
7247 x
= COND_EXEC_CODE (x
);
7248 if (GET_CODE (x
) == SET
)
7251 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7252 Although they are not classical single set, the second set is there just
7253 to protect it from moving past FP-relative stack accesses. */
7254 switch (recog_memoized (insn
))
7256 case CODE_FOR_prologue_allocate_stack
:
7257 case CODE_FOR_prologue_allocate_stack_pr
:
7258 case CODE_FOR_epilogue_deallocate_stack
:
7259 case CODE_FOR_epilogue_deallocate_stack_pr
:
7260 ret
= XVECEXP (x
, 0, 0);
7264 ret
= single_set_2 (insn
, x
);
7271 /* Adjust the cost of a scheduling dependency.
7272 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7273 COST is the current cost, DW is dependency weakness. */
7275 ia64_adjust_cost (rtx_insn
*insn
, int dep_type1
, rtx_insn
*dep_insn
,
7278 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
7279 enum attr_itanium_class dep_class
;
7280 enum attr_itanium_class insn_class
;
7282 insn_class
= ia64_safe_itanium_class (insn
);
7283 dep_class
= ia64_safe_itanium_class (dep_insn
);
7285 /* Treat true memory dependencies separately. Ignore apparent true
7286 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7287 if (dep_type
== REG_DEP_TRUE
7288 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
7289 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
7292 if (dw
== MIN_DEP_WEAK
)
7293 /* Store and load are likely to alias, use higher cost to avoid stall. */
7294 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
7295 else if (dw
> MIN_DEP_WEAK
)
7297 /* Store and load are less likely to alias. */
7298 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
7299 /* Assume there will be no cache conflict for floating-point data.
7300 For integer data, L1 conflict penalty is huge (17 cycles), so we
7301 never assume it will not cause a conflict. */
7307 if (dep_type
!= REG_DEP_OUTPUT
)
7310 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
7311 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
7317 /* Like emit_insn_before, but skip cycle_display notes.
7318 ??? When cycle display notes are implemented, update this. */
7321 ia64_emit_insn_before (rtx insn
, rtx_insn
*before
)
7323 emit_insn_before (insn
, before
);
7326 /* The following function marks insns who produce addresses for load
7327 and store insns. Such insns will be placed into M slots because it
7328 decrease latency time for Itanium1 (see function
7329 `ia64_produce_address_p' and the DFA descriptions). */
7332 ia64_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
7334 rtx_insn
*insn
, *next
, *next_tail
;
7336 /* Before reload, which_alternative is not set, which means that
7337 ia64_safe_itanium_class will produce wrong results for (at least)
7338 move instructions. */
7339 if (!reload_completed
)
7342 next_tail
= NEXT_INSN (tail
);
7343 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7346 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7348 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
7350 sd_iterator_def sd_it
;
7352 bool has_mem_op_consumer_p
= false;
7354 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
7356 enum attr_itanium_class c
;
7358 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
7361 next
= DEP_CON (dep
);
7362 c
= ia64_safe_itanium_class (next
);
7363 if ((c
== ITANIUM_CLASS_ST
7364 || c
== ITANIUM_CLASS_STF
)
7365 && ia64_st_address_bypass_p (insn
, next
))
7367 has_mem_op_consumer_p
= true;
7370 else if ((c
== ITANIUM_CLASS_LD
7371 || c
== ITANIUM_CLASS_FLD
7372 || c
== ITANIUM_CLASS_FLDP
)
7373 && ia64_ld_address_bypass_p (insn
, next
))
7375 has_mem_op_consumer_p
= true;
7380 insn
->call
= has_mem_op_consumer_p
;
7384 /* We're beginning a new block. Initialize data structures as necessary. */
7387 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7388 int sched_verbose ATTRIBUTE_UNUSED
,
7389 int max_ready ATTRIBUTE_UNUSED
)
7391 if (flag_checking
&& !sel_sched_p () && reload_completed
)
7393 for (rtx_insn
*insn
= NEXT_INSN (current_sched_info
->prev_head
);
7394 insn
!= current_sched_info
->next_tail
;
7395 insn
= NEXT_INSN (insn
))
7396 gcc_assert (!SCHED_GROUP_P (insn
));
7398 last_scheduled_insn
= NULL
;
7399 init_insn_group_barriers ();
7402 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7405 /* We're beginning a scheduling pass. Check assertion. */
7408 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7409 int sched_verbose ATTRIBUTE_UNUSED
,
7410 int max_ready ATTRIBUTE_UNUSED
)
7412 gcc_assert (pending_data_specs
== 0);
7415 /* Scheduling pass is now finished. Free/reset static variable. */
7417 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7418 int sched_verbose ATTRIBUTE_UNUSED
)
7420 gcc_assert (pending_data_specs
== 0);
7423 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7424 speculation check), FALSE otherwise. */
7426 is_load_p (rtx_insn
*insn
)
7428 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7431 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7432 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7435 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7436 (taking account for 3-cycle cache reference postponing for stores: Intel
7437 Itanium 2 Reference Manual for Software Development and Optimization,
7440 record_memory_reference (rtx_insn
*insn
)
7442 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7444 switch (insn_class
) {
7445 case ITANIUM_CLASS_FLD
:
7446 case ITANIUM_CLASS_LD
:
7447 mem_ops_in_group
[current_cycle
% 4]++;
7449 case ITANIUM_CLASS_STF
:
7450 case ITANIUM_CLASS_ST
:
7451 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7457 /* We are about to being issuing insns for this clock cycle.
7458 Override the default sort algorithm to better slot instructions. */
7461 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7462 int *pn_ready
, int clock_var
,
7466 int n_ready
= *pn_ready
;
7467 rtx_insn
**e_ready
= ready
+ n_ready
;
7471 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7473 if (reorder_type
== 0)
7475 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7477 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7478 if (insnp
< e_ready
)
7480 rtx_insn
*insn
= *insnp
;
7481 enum attr_type t
= ia64_safe_type (insn
);
7482 if (t
== TYPE_UNKNOWN
)
7484 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7485 || asm_noperands (PATTERN (insn
)) >= 0)
7487 rtx_insn
*lowest
= ready
[n_asms
];
7488 ready
[n_asms
] = insn
;
7494 rtx_insn
*highest
= ready
[n_ready
- 1];
7495 ready
[n_ready
- 1] = insn
;
7502 if (n_asms
< n_ready
)
7504 /* Some normal insns to process. Skip the asms. */
7508 else if (n_ready
> 0)
7512 if (ia64_final_schedule
)
7515 int nr_need_stop
= 0;
7517 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7518 if (safe_group_barrier_needed (*insnp
))
7521 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7523 if (reorder_type
== 0)
7526 /* Move down everything that needs a stop bit, preserving
7528 while (insnp
-- > ready
+ deleted
)
7529 while (insnp
>= ready
+ deleted
)
7531 rtx_insn
*insn
= *insnp
;
7532 if (! safe_group_barrier_needed (insn
))
7534 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7542 current_cycle
= clock_var
;
7543 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7548 /* Move down loads/stores, preserving relative order. */
7549 while (insnp
-- > ready
+ moved
)
7550 while (insnp
>= ready
+ moved
)
7552 rtx_insn
*insn
= *insnp
;
7553 if (! is_load_p (insn
))
7555 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7566 /* We are about to being issuing insns for this clock cycle. Override
7567 the default sort algorithm to better slot instructions. */
7570 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7571 int *pn_ready
, int clock_var
)
7573 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7574 pn_ready
, clock_var
, 0);
7577 /* Like ia64_sched_reorder, but called after issuing each insn.
7578 Override the default sort algorithm to better slot instructions. */
7581 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7582 int sched_verbose ATTRIBUTE_UNUSED
, rtx_insn
**ready
,
7583 int *pn_ready
, int clock_var
)
7585 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7589 /* We are about to issue INSN. Return the number of insns left on the
7590 ready queue that can be issued this cycle. */
7593 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7594 int sched_verbose ATTRIBUTE_UNUSED
,
7596 int can_issue_more ATTRIBUTE_UNUSED
)
7598 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7599 /* Modulo scheduling does not extend h_i_d when emitting
7600 new instructions. Don't use h_i_d, if we don't have to. */
7602 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7603 pending_data_specs
++;
7604 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7605 pending_data_specs
--;
7608 if (DEBUG_INSN_P (insn
))
7611 last_scheduled_insn
= insn
;
7612 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7613 if (reload_completed
)
7615 int needed
= group_barrier_needed (insn
);
7617 gcc_assert (!needed
);
7619 init_insn_group_barriers ();
7620 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7623 record_memory_reference (insn
);
7628 /* We are choosing insn from the ready queue. Return zero if INSN
7632 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
7634 gcc_assert (insn
&& INSN_P (insn
));
7636 /* Size of ALAT is 32. As far as we perform conservative
7637 data speculation, we keep ALAT half-empty. */
7638 if (pending_data_specs
>= 16 && (TODO_SPEC (insn
) & BEGIN_DATA
))
7639 return ready_index
== 0 ? -1 : 1;
7641 if (ready_index
== 0)
7644 if ((!reload_completed
7645 || !safe_group_barrier_needed (insn
))
7646 && (!mflag_sched_mem_insns_hard_limit
7647 || !is_load_p (insn
)
7648 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
))
7654 /* The following variable value is pseudo-insn used by the DFA insn
7655 scheduler to change the DFA state when the simulated clock is
7658 static rtx_insn
*dfa_pre_cycle_insn
;
7660 /* Returns 1 when a meaningful insn was scheduled between the last group
7661 barrier and LAST. */
7663 scheduled_good_insn (rtx_insn
*last
)
7665 if (last
&& recog_memoized (last
) >= 0)
7669 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7670 && !stops_p
[INSN_UID (last
)];
7671 last
= PREV_INSN (last
))
7672 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7673 the ebb we're scheduling. */
7674 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7680 /* We are about to being issuing INSN. Return nonzero if we cannot
7681 issue it on given cycle CLOCK and return zero if we should not sort
7682 the ready queue on the next clock start. */
7685 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx_insn
*insn
, int last_clock
,
7686 int clock
, int *sort_p
)
7688 gcc_assert (insn
&& INSN_P (insn
));
7690 if (DEBUG_INSN_P (insn
))
7693 /* When a group barrier is needed for insn, last_scheduled_insn
7695 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7696 || last_scheduled_insn
);
7698 if ((reload_completed
7699 && (safe_group_barrier_needed (insn
)
7700 || (mflag_sched_stop_bits_after_every_cycle
7701 && last_clock
!= clock
7702 && last_scheduled_insn
7703 && scheduled_good_insn (last_scheduled_insn
))))
7704 || (last_scheduled_insn
7705 && (CALL_P (last_scheduled_insn
)
7706 || unknown_for_bundling_p (last_scheduled_insn
))))
7708 init_insn_group_barriers ();
7710 if (verbose
&& dump
)
7711 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7712 last_clock
== clock
? " + cycle advance" : "");
7715 current_cycle
= clock
;
7716 mem_ops_in_group
[current_cycle
% 4] = 0;
7718 if (last_clock
== clock
)
7720 state_transition (curr_state
, dfa_stop_insn
);
7721 if (TARGET_EARLY_STOP_BITS
)
7722 *sort_p
= (last_scheduled_insn
== NULL_RTX
7723 || ! CALL_P (last_scheduled_insn
));
7729 if (last_scheduled_insn
)
7731 if (unknown_for_bundling_p (last_scheduled_insn
))
7732 state_reset (curr_state
);
7735 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7736 state_transition (curr_state
, dfa_stop_insn
);
7737 state_transition (curr_state
, dfa_pre_cycle_insn
);
7738 state_transition (curr_state
, NULL
);
7745 /* Implement targetm.sched.h_i_d_extended hook.
7746 Extend internal data structures. */
7748 ia64_h_i_d_extended (void)
7750 if (stops_p
!= NULL
)
7752 int new_clocks_length
= get_max_uid () * 3 / 2;
7753 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7754 clocks_length
= new_clocks_length
;
7759 /* This structure describes the data used by the backend to guide scheduling.
7760 When the current scheduling point is switched, this data should be saved
7761 and restored later, if the scheduler returns to this point. */
7762 struct _ia64_sched_context
7764 state_t prev_cycle_state
;
7765 rtx_insn
*last_scheduled_insn
;
7766 struct reg_write_state rws_sum
[NUM_REGS
];
7767 struct reg_write_state rws_insn
[NUM_REGS
];
7768 int first_instruction
;
7769 int pending_data_specs
;
7771 char mem_ops_in_group
[4];
7773 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7775 /* Allocates a scheduling context. */
7777 ia64_alloc_sched_context (void)
7779 return xmalloc (sizeof (struct _ia64_sched_context
));
7782 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7783 the global context otherwise. */
7785 ia64_init_sched_context (void *_sc
, bool clean_p
)
7787 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7789 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7792 state_reset (sc
->prev_cycle_state
);
7793 sc
->last_scheduled_insn
= NULL
;
7794 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7795 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7796 sc
->first_instruction
= 1;
7797 sc
->pending_data_specs
= 0;
7798 sc
->current_cycle
= 0;
7799 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7803 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7804 sc
->last_scheduled_insn
= last_scheduled_insn
;
7805 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7806 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7807 sc
->first_instruction
= first_instruction
;
7808 sc
->pending_data_specs
= pending_data_specs
;
7809 sc
->current_cycle
= current_cycle
;
7810 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7814 /* Sets the global scheduling context to the one pointed to by _SC. */
7816 ia64_set_sched_context (void *_sc
)
7818 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7820 gcc_assert (sc
!= NULL
);
7822 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7823 last_scheduled_insn
= sc
->last_scheduled_insn
;
7824 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7825 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7826 first_instruction
= sc
->first_instruction
;
7827 pending_data_specs
= sc
->pending_data_specs
;
7828 current_cycle
= sc
->current_cycle
;
7829 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7832 /* Clears the data in the _SC scheduling context. */
7834 ia64_clear_sched_context (void *_sc
)
7836 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7838 free (sc
->prev_cycle_state
);
7839 sc
->prev_cycle_state
= NULL
;
7842 /* Frees the _SC scheduling context. */
7844 ia64_free_sched_context (void *_sc
)
7846 gcc_assert (_sc
!= NULL
);
7851 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7853 /* Return a function that will generate a load of mode MODE_NO
7854 with speculation types TS. */
7856 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7858 static gen_func_t gen_ld_
[] = {
7868 gen_zero_extendqidi2
,
7869 gen_zero_extendhidi2
,
7870 gen_zero_extendsidi2
,
7873 static gen_func_t gen_ld_a
[] = {
7883 gen_zero_extendqidi2_advanced
,
7884 gen_zero_extendhidi2_advanced
,
7885 gen_zero_extendsidi2_advanced
,
7887 static gen_func_t gen_ld_s
[] = {
7888 gen_movbi_speculative
,
7889 gen_movqi_speculative
,
7890 gen_movhi_speculative
,
7891 gen_movsi_speculative
,
7892 gen_movdi_speculative
,
7893 gen_movsf_speculative
,
7894 gen_movdf_speculative
,
7895 gen_movxf_speculative
,
7896 gen_movti_speculative
,
7897 gen_zero_extendqidi2_speculative
,
7898 gen_zero_extendhidi2_speculative
,
7899 gen_zero_extendsidi2_speculative
,
7901 static gen_func_t gen_ld_sa
[] = {
7902 gen_movbi_speculative_advanced
,
7903 gen_movqi_speculative_advanced
,
7904 gen_movhi_speculative_advanced
,
7905 gen_movsi_speculative_advanced
,
7906 gen_movdi_speculative_advanced
,
7907 gen_movsf_speculative_advanced
,
7908 gen_movdf_speculative_advanced
,
7909 gen_movxf_speculative_advanced
,
7910 gen_movti_speculative_advanced
,
7911 gen_zero_extendqidi2_speculative_advanced
,
7912 gen_zero_extendhidi2_speculative_advanced
,
7913 gen_zero_extendsidi2_speculative_advanced
,
7915 static gen_func_t gen_ld_s_a
[] = {
7916 gen_movbi_speculative_a
,
7917 gen_movqi_speculative_a
,
7918 gen_movhi_speculative_a
,
7919 gen_movsi_speculative_a
,
7920 gen_movdi_speculative_a
,
7921 gen_movsf_speculative_a
,
7922 gen_movdf_speculative_a
,
7923 gen_movxf_speculative_a
,
7924 gen_movti_speculative_a
,
7925 gen_zero_extendqidi2_speculative_a
,
7926 gen_zero_extendhidi2_speculative_a
,
7927 gen_zero_extendsidi2_speculative_a
,
7932 if (ts
& BEGIN_DATA
)
7934 if (ts
& BEGIN_CONTROL
)
7939 else if (ts
& BEGIN_CONTROL
)
7941 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7942 || ia64_needs_block_p (ts
))
7945 gen_ld
= gen_ld_s_a
;
7952 return gen_ld
[mode_no
];
7955 /* Constants that help mapping 'machine_mode' to int. */
7958 SPEC_MODE_INVALID
= -1,
7959 SPEC_MODE_FIRST
= 0,
7960 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7961 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7967 /* Offset to reach ZERO_EXTEND patterns. */
7968 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7971 /* Return index of the MODE. */
7973 ia64_mode_to_int (machine_mode mode
)
7977 case E_BImode
: return 0; /* SPEC_MODE_FIRST */
7978 case E_QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7979 case E_HImode
: return 2;
7980 case E_SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7981 case E_DImode
: return 4;
7982 case E_SFmode
: return 5;
7983 case E_DFmode
: return 6;
7984 case E_XFmode
: return 7;
7986 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7987 mentioned in itanium[12].md. Predicate fp_register_operand also
7988 needs to be defined. Bottom line: better disable for now. */
7989 return SPEC_MODE_INVALID
;
7990 default: return SPEC_MODE_INVALID
;
7994 /* Provide information about speculation capabilities. */
7996 ia64_set_sched_flags (spec_info_t spec_info
)
7998 unsigned int *flags
= &(current_sched_info
->flags
);
8000 if (*flags
& SCHED_RGN
8001 || *flags
& SCHED_EBB
8002 || *flags
& SEL_SCHED
)
8006 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
8007 || (mflag_sched_ar_data_spec
&& reload_completed
))
8012 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
8013 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
8017 if (mflag_sched_control_spec
8019 || reload_completed
))
8021 mask
|= BEGIN_CONTROL
;
8023 if (!sel_sched_p () && mflag_sched_in_control_spec
)
8024 mask
|= BE_IN_CONTROL
;
8027 spec_info
->mask
= mask
;
8031 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
8033 if (mask
& BE_IN_SPEC
)
8036 spec_info
->flags
= 0;
8038 if ((mask
& CONTROL_SPEC
)
8039 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
8040 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
8042 if (sched_verbose
>= 1)
8043 spec_info
->dump
= sched_dump
;
8045 spec_info
->dump
= 0;
8047 if (mflag_sched_count_spec_in_critical_path
)
8048 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
8052 spec_info
->mask
= 0;
8055 /* If INSN is an appropriate load return its mode.
8056 Return -1 otherwise. */
8058 get_mode_no_for_insn (rtx_insn
*insn
)
8060 rtx reg
, mem
, mode_rtx
;
8064 extract_insn_cached (insn
);
8066 /* We use WHICH_ALTERNATIVE only after reload. This will
8067 guarantee that reload won't touch a speculative insn. */
8069 if (recog_data
.n_operands
!= 2)
8072 reg
= recog_data
.operand
[0];
8073 mem
= recog_data
.operand
[1];
8075 /* We should use MEM's mode since REG's mode in presence of
8076 ZERO_EXTEND will always be DImode. */
8077 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
8078 /* Process non-speculative ld. */
8080 if (!reload_completed
)
8082 /* Do not speculate into regs like ar.lc. */
8083 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
8090 rtx mem_reg
= XEXP (mem
, 0);
8092 if (!REG_P (mem_reg
))
8098 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
8100 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8106 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
8107 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
8108 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
8109 /* Process speculative ld or ld.c. */
8111 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8116 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
8118 if (attr_class
== ITANIUM_CLASS_CHK_A
8119 || attr_class
== ITANIUM_CLASS_CHK_S_I
8120 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
8127 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
8129 if (mode_no
== SPEC_MODE_INVALID
)
8132 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
8136 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
8137 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
8140 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
8146 /* If X is an unspec part of a speculative load, return its code.
8147 Return -1 otherwise. */
8149 get_spec_unspec_code (const_rtx x
)
8151 if (GET_CODE (x
) != UNSPEC
)
8173 /* Implement skip_rtx_p hook. */
8175 ia64_skip_rtx_p (const_rtx x
)
8177 return get_spec_unspec_code (x
) != -1;
8180 /* If INSN is a speculative load, return its UNSPEC code.
8181 Return -1 otherwise. */
8183 get_insn_spec_code (const_rtx insn
)
8187 pat
= PATTERN (insn
);
8189 if (GET_CODE (pat
) == COND_EXEC
)
8190 pat
= COND_EXEC_CODE (pat
);
8192 if (GET_CODE (pat
) != SET
)
8195 reg
= SET_DEST (pat
);
8199 mem
= SET_SRC (pat
);
8200 if (GET_CODE (mem
) == ZERO_EXTEND
)
8201 mem
= XEXP (mem
, 0);
8203 return get_spec_unspec_code (mem
);
8206 /* If INSN is a speculative load, return a ds with the speculation types.
8207 Otherwise [if INSN is a normal instruction] return 0. */
8209 ia64_get_insn_spec_ds (rtx_insn
*insn
)
8211 int code
= get_insn_spec_code (insn
);
8220 return BEGIN_CONTROL
;
8223 return BEGIN_DATA
| BEGIN_CONTROL
;
8230 /* If INSN is a speculative load return a ds with the speculation types that
8232 Otherwise [if INSN is a normal instruction] return 0. */
8234 ia64_get_insn_checked_ds (rtx_insn
*insn
)
8236 int code
= get_insn_spec_code (insn
);
8241 return BEGIN_DATA
| BEGIN_CONTROL
;
8244 return BEGIN_CONTROL
;
8248 return BEGIN_DATA
| BEGIN_CONTROL
;
8255 /* If GEN_P is true, calculate the index of needed speculation check and return
8256 speculative pattern for INSN with speculative mode TS, machine mode
8257 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8258 If GEN_P is false, just calculate the index of needed speculation check. */
8260 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
8263 gen_func_t gen_load
;
8265 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
8267 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
8268 copy_rtx (recog_data
.operand
[1]));
8270 pat
= PATTERN (insn
);
8271 if (GET_CODE (pat
) == COND_EXEC
)
8272 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8279 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
8280 ds_t ds ATTRIBUTE_UNUSED
)
8285 /* Implement targetm.sched.speculate_insn hook.
8286 Check if the INSN can be TS speculative.
8287 If 'no' - return -1.
8288 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8289 If current pattern of the INSN already provides TS speculation,
8292 ia64_speculate_insn (rtx_insn
*insn
, ds_t ts
, rtx
*new_pat
)
8297 gcc_assert (!(ts
& ~SPECULATIVE
));
8299 if (ia64_spec_check_p (insn
))
8302 if ((ts
& BE_IN_SPEC
)
8303 && !insn_can_be_in_speculative_p (insn
, ts
))
8306 mode_no
= get_mode_no_for_insn (insn
);
8308 if (mode_no
!= SPEC_MODE_INVALID
)
8310 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
8315 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
8324 /* Return a function that will generate a check for speculation TS with mode
8326 If simple check is needed, pass true for SIMPLE_CHECK_P.
8327 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8329 get_spec_check_gen_function (ds_t ts
, int mode_no
,
8330 bool simple_check_p
, bool clearing_check_p
)
8332 static gen_func_t gen_ld_c_clr
[] = {
8342 gen_zero_extendqidi2_clr
,
8343 gen_zero_extendhidi2_clr
,
8344 gen_zero_extendsidi2_clr
,
8346 static gen_func_t gen_ld_c_nc
[] = {
8356 gen_zero_extendqidi2_nc
,
8357 gen_zero_extendhidi2_nc
,
8358 gen_zero_extendsidi2_nc
,
8360 static gen_func_t gen_chk_a_clr
[] = {
8361 gen_advanced_load_check_clr_bi
,
8362 gen_advanced_load_check_clr_qi
,
8363 gen_advanced_load_check_clr_hi
,
8364 gen_advanced_load_check_clr_si
,
8365 gen_advanced_load_check_clr_di
,
8366 gen_advanced_load_check_clr_sf
,
8367 gen_advanced_load_check_clr_df
,
8368 gen_advanced_load_check_clr_xf
,
8369 gen_advanced_load_check_clr_ti
,
8370 gen_advanced_load_check_clr_di
,
8371 gen_advanced_load_check_clr_di
,
8372 gen_advanced_load_check_clr_di
,
8374 static gen_func_t gen_chk_a_nc
[] = {
8375 gen_advanced_load_check_nc_bi
,
8376 gen_advanced_load_check_nc_qi
,
8377 gen_advanced_load_check_nc_hi
,
8378 gen_advanced_load_check_nc_si
,
8379 gen_advanced_load_check_nc_di
,
8380 gen_advanced_load_check_nc_sf
,
8381 gen_advanced_load_check_nc_df
,
8382 gen_advanced_load_check_nc_xf
,
8383 gen_advanced_load_check_nc_ti
,
8384 gen_advanced_load_check_nc_di
,
8385 gen_advanced_load_check_nc_di
,
8386 gen_advanced_load_check_nc_di
,
8388 static gen_func_t gen_chk_s
[] = {
8389 gen_speculation_check_bi
,
8390 gen_speculation_check_qi
,
8391 gen_speculation_check_hi
,
8392 gen_speculation_check_si
,
8393 gen_speculation_check_di
,
8394 gen_speculation_check_sf
,
8395 gen_speculation_check_df
,
8396 gen_speculation_check_xf
,
8397 gen_speculation_check_ti
,
8398 gen_speculation_check_di
,
8399 gen_speculation_check_di
,
8400 gen_speculation_check_di
,
8403 gen_func_t
*gen_check
;
8405 if (ts
& BEGIN_DATA
)
8407 /* We don't need recovery because even if this is ld.sa
8408 ALAT entry will be allocated only if NAT bit is set to zero.
8409 So it is enough to use ld.c here. */
8413 gcc_assert (mflag_sched_spec_ldc
);
8415 if (clearing_check_p
)
8416 gen_check
= gen_ld_c_clr
;
8418 gen_check
= gen_ld_c_nc
;
8422 if (clearing_check_p
)
8423 gen_check
= gen_chk_a_clr
;
8425 gen_check
= gen_chk_a_nc
;
8428 else if (ts
& BEGIN_CONTROL
)
8431 /* We might want to use ld.sa -> ld.c instead of
8434 gcc_assert (!ia64_needs_block_p (ts
));
8436 if (clearing_check_p
)
8437 gen_check
= gen_ld_c_clr
;
8439 gen_check
= gen_ld_c_nc
;
8443 gen_check
= gen_chk_s
;
8449 gcc_assert (mode_no
>= 0);
8450 return gen_check
[mode_no
];
8453 /* Return nonzero, if INSN needs branchy recovery check. */
8455 ia64_needs_block_p (ds_t ts
)
8457 if (ts
& BEGIN_DATA
)
8458 return !mflag_sched_spec_ldc
;
8460 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8462 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8465 /* Generate (or regenerate) a recovery check for INSN. */
8467 ia64_gen_spec_check (rtx_insn
*insn
, rtx_insn
*label
, ds_t ds
)
8469 rtx op1
, pat
, check_pat
;
8470 gen_func_t gen_check
;
8473 mode_no
= get_mode_no_for_insn (insn
);
8474 gcc_assert (mode_no
>= 0);
8480 gcc_assert (!ia64_needs_block_p (ds
));
8481 op1
= copy_rtx (recog_data
.operand
[1]);
8484 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8487 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8489 pat
= PATTERN (insn
);
8490 if (GET_CODE (pat
) == COND_EXEC
)
8491 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8497 /* Return nonzero, if X is branchy recovery check. */
8499 ia64_spec_check_p (rtx x
)
8502 if (GET_CODE (x
) == COND_EXEC
)
8503 x
= COND_EXEC_CODE (x
);
8504 if (GET_CODE (x
) == SET
)
8505 return ia64_spec_check_src_p (SET_SRC (x
));
8509 /* Return nonzero, if SRC belongs to recovery check. */
8511 ia64_spec_check_src_p (rtx src
)
8513 if (GET_CODE (src
) == IF_THEN_ELSE
)
8518 if (GET_CODE (t
) == NE
)
8522 if (GET_CODE (t
) == UNSPEC
)
8528 if (code
== UNSPEC_LDCCLR
8529 || code
== UNSPEC_LDCNC
8530 || code
== UNSPEC_CHKACLR
8531 || code
== UNSPEC_CHKANC
8532 || code
== UNSPEC_CHKS
)
8534 gcc_assert (code
!= 0);
8544 /* The following page contains abstract data `bundle states' which are
8545 used for bundling insns (inserting nops and template generation). */
8547 /* The following describes state of insn bundling. */
8551 /* Unique bundle state number to identify them in the debugging
8554 rtx_insn
*insn
; /* corresponding insn, NULL for the 1st and the last state */
8555 /* number nops before and after the insn */
8556 short before_nops_num
, after_nops_num
;
8557 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8559 int cost
; /* cost of the state in cycles */
8560 int accumulated_insns_num
; /* number of all previous insns including
8561 nops. L is considered as 2 insns */
8562 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8563 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8564 struct bundle_state
*next
; /* next state with the same insn_num */
8565 struct bundle_state
*originator
; /* originator (previous insn state) */
8566 /* All bundle states are in the following chain. */
8567 struct bundle_state
*allocated_states_chain
;
8568 /* The DFA State after issuing the insn and the nops. */
8572 /* The following is map insn number to the corresponding bundle state. */
8574 static struct bundle_state
**index_to_bundle_states
;
8576 /* The unique number of next bundle state. */
8578 static int bundle_states_num
;
8580 /* All allocated bundle states are in the following chain. */
8582 static struct bundle_state
*allocated_bundle_states_chain
;
8584 /* All allocated but not used bundle states are in the following
8587 static struct bundle_state
*free_bundle_state_chain
;
8590 /* The following function returns a free bundle state. */
8592 static struct bundle_state
*
8593 get_free_bundle_state (void)
8595 struct bundle_state
*result
;
8597 if (free_bundle_state_chain
!= NULL
)
8599 result
= free_bundle_state_chain
;
8600 free_bundle_state_chain
= result
->next
;
8604 result
= XNEW (struct bundle_state
);
8605 result
->dfa_state
= xmalloc (dfa_state_size
);
8606 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8607 allocated_bundle_states_chain
= result
;
8609 result
->unique_num
= bundle_states_num
++;
8614 /* The following function frees given bundle state. */
8617 free_bundle_state (struct bundle_state
*state
)
8619 state
->next
= free_bundle_state_chain
;
8620 free_bundle_state_chain
= state
;
8623 /* Start work with abstract data `bundle states'. */
8626 initiate_bundle_states (void)
8628 bundle_states_num
= 0;
8629 free_bundle_state_chain
= NULL
;
8630 allocated_bundle_states_chain
= NULL
;
8633 /* Finish work with abstract data `bundle states'. */
8636 finish_bundle_states (void)
8638 struct bundle_state
*curr_state
, *next_state
;
8640 for (curr_state
= allocated_bundle_states_chain
;
8642 curr_state
= next_state
)
8644 next_state
= curr_state
->allocated_states_chain
;
8645 free (curr_state
->dfa_state
);
8650 /* Hashtable helpers. */
8652 struct bundle_state_hasher
: nofree_ptr_hash
<bundle_state
>
8654 static inline hashval_t
hash (const bundle_state
*);
8655 static inline bool equal (const bundle_state
*, const bundle_state
*);
8658 /* The function returns hash of BUNDLE_STATE. */
8661 bundle_state_hasher::hash (const bundle_state
*state
)
8665 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8666 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8667 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8668 return result
+ state
->insn_num
;
8671 /* The function returns nonzero if the bundle state keys are equal. */
8674 bundle_state_hasher::equal (const bundle_state
*state1
,
8675 const bundle_state
*state2
)
8677 return (state1
->insn_num
== state2
->insn_num
8678 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8679 dfa_state_size
) == 0);
8682 /* Hash table of the bundle states. The key is dfa_state and insn_num
8683 of the bundle states. */
8685 static hash_table
<bundle_state_hasher
> *bundle_state_table
;
8687 /* The function inserts the BUNDLE_STATE into the hash table. The
8688 function returns nonzero if the bundle has been inserted into the
8689 table. The table contains the best bundle state with given key. */
8692 insert_bundle_state (struct bundle_state
*bundle_state
)
8694 struct bundle_state
**entry_ptr
;
8696 entry_ptr
= bundle_state_table
->find_slot (bundle_state
, INSERT
);
8697 if (*entry_ptr
== NULL
)
8699 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8700 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8701 *entry_ptr
= bundle_state
;
8704 else if (bundle_state
->cost
< (*entry_ptr
)->cost
8705 || (bundle_state
->cost
== (*entry_ptr
)->cost
8706 && ((*entry_ptr
)->accumulated_insns_num
8707 > bundle_state
->accumulated_insns_num
8708 || ((*entry_ptr
)->accumulated_insns_num
8709 == bundle_state
->accumulated_insns_num
8710 && ((*entry_ptr
)->branch_deviation
8711 > bundle_state
->branch_deviation
8712 || ((*entry_ptr
)->branch_deviation
8713 == bundle_state
->branch_deviation
8714 && (*entry_ptr
)->middle_bundle_stops
8715 > bundle_state
->middle_bundle_stops
))))))
8718 struct bundle_state temp
;
8721 **entry_ptr
= *bundle_state
;
8722 (*entry_ptr
)->next
= temp
.next
;
8723 *bundle_state
= temp
;
8728 /* Start work with the hash table. */
8731 initiate_bundle_state_table (void)
8733 bundle_state_table
= new hash_table
<bundle_state_hasher
> (50);
8736 /* Finish work with the hash table. */
8739 finish_bundle_state_table (void)
8741 delete bundle_state_table
;
8742 bundle_state_table
= NULL
;
8747 /* The following variable is a insn `nop' used to check bundle states
8748 with different number of inserted nops. */
8750 static rtx_insn
*ia64_nop
;
8752 /* The following function tries to issue NOPS_NUM nops for the current
8753 state without advancing processor cycle. If it failed, the
8754 function returns FALSE and frees the current state. */
8757 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8761 for (i
= 0; i
< nops_num
; i
++)
8762 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8764 free_bundle_state (curr_state
);
8770 /* The following function tries to issue INSN for the current
8771 state without advancing processor cycle. If it failed, the
8772 function returns FALSE and frees the current state. */
8775 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8777 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8779 free_bundle_state (curr_state
);
8785 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8786 starting with ORIGINATOR without advancing processor cycle. If
8787 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8788 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8789 If it was successful, the function creates new bundle state and
8790 insert into the hash table and into `index_to_bundle_states'. */
8793 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8794 rtx_insn
*insn
, int try_bundle_end_p
,
8795 int only_bundle_end_p
)
8797 struct bundle_state
*curr_state
;
8799 curr_state
= get_free_bundle_state ();
8800 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8801 curr_state
->insn
= insn
;
8802 curr_state
->insn_num
= originator
->insn_num
+ 1;
8803 curr_state
->cost
= originator
->cost
;
8804 curr_state
->originator
= originator
;
8805 curr_state
->before_nops_num
= before_nops_num
;
8806 curr_state
->after_nops_num
= 0;
8807 curr_state
->accumulated_insns_num
8808 = originator
->accumulated_insns_num
+ before_nops_num
;
8809 curr_state
->branch_deviation
= originator
->branch_deviation
;
8810 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8812 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8814 gcc_assert (GET_MODE (insn
) != TImode
);
8815 if (!try_issue_nops (curr_state
, before_nops_num
))
8817 if (!try_issue_insn (curr_state
, insn
))
8819 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8820 if (curr_state
->accumulated_insns_num
% 3 != 0)
8821 curr_state
->middle_bundle_stops
++;
8822 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8823 && curr_state
->accumulated_insns_num
% 3 != 0)
8825 free_bundle_state (curr_state
);
8829 else if (GET_MODE (insn
) != TImode
)
8831 if (!try_issue_nops (curr_state
, before_nops_num
))
8833 if (!try_issue_insn (curr_state
, insn
))
8835 curr_state
->accumulated_insns_num
++;
8836 gcc_assert (!unknown_for_bundling_p (insn
));
8838 if (ia64_safe_type (insn
) == TYPE_L
)
8839 curr_state
->accumulated_insns_num
++;
8843 /* If this is an insn that must be first in a group, then don't allow
8844 nops to be emitted before it. Currently, alloc is the only such
8845 supported instruction. */
8846 /* ??? The bundling automatons should handle this for us, but they do
8847 not yet have support for the first_insn attribute. */
8848 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8850 free_bundle_state (curr_state
);
8854 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8855 state_transition (curr_state
->dfa_state
, NULL
);
8857 if (!try_issue_nops (curr_state
, before_nops_num
))
8859 if (!try_issue_insn (curr_state
, insn
))
8861 curr_state
->accumulated_insns_num
++;
8862 if (unknown_for_bundling_p (insn
))
8864 /* Finish bundle containing asm insn. */
8865 curr_state
->after_nops_num
8866 = 3 - curr_state
->accumulated_insns_num
% 3;
8867 curr_state
->accumulated_insns_num
8868 += 3 - curr_state
->accumulated_insns_num
% 3;
8870 else if (ia64_safe_type (insn
) == TYPE_L
)
8871 curr_state
->accumulated_insns_num
++;
8873 if (ia64_safe_type (insn
) == TYPE_B
)
8874 curr_state
->branch_deviation
8875 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8876 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8878 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8881 struct bundle_state
*curr_state1
;
8882 struct bundle_state
*allocated_states_chain
;
8884 curr_state1
= get_free_bundle_state ();
8885 dfa_state
= curr_state1
->dfa_state
;
8886 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8887 *curr_state1
= *curr_state
;
8888 curr_state1
->dfa_state
= dfa_state
;
8889 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8890 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8892 curr_state
= curr_state1
;
8894 if (!try_issue_nops (curr_state
,
8895 3 - curr_state
->accumulated_insns_num
% 3))
8897 curr_state
->after_nops_num
8898 = 3 - curr_state
->accumulated_insns_num
% 3;
8899 curr_state
->accumulated_insns_num
8900 += 3 - curr_state
->accumulated_insns_num
% 3;
8902 if (!insert_bundle_state (curr_state
))
8903 free_bundle_state (curr_state
);
8907 /* The following function returns position in the two window bundle
8911 get_max_pos (state_t state
)
8913 if (cpu_unit_reservation_p (state
, pos_6
))
8915 else if (cpu_unit_reservation_p (state
, pos_5
))
8917 else if (cpu_unit_reservation_p (state
, pos_4
))
8919 else if (cpu_unit_reservation_p (state
, pos_3
))
8921 else if (cpu_unit_reservation_p (state
, pos_2
))
8923 else if (cpu_unit_reservation_p (state
, pos_1
))
8929 /* The function returns code of a possible template for given position
8930 and state. The function should be called only with 2 values of
8931 position equal to 3 or 6. We avoid generating F NOPs by putting
8932 templates containing F insns at the end of the template search
8933 because undocumented anomaly in McKinley derived cores which can
8934 cause stalls if an F-unit insn (including a NOP) is issued within a
8935 six-cycle window after reading certain application registers (such
8936 as ar.bsp). Furthermore, power-considerations also argue against
8937 the use of F-unit instructions unless they're really needed. */
8940 get_template (state_t state
, int pos
)
8945 if (cpu_unit_reservation_p (state
, _0mmi_
))
8947 else if (cpu_unit_reservation_p (state
, _0mii_
))
8949 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8951 else if (cpu_unit_reservation_p (state
, _0mib_
))
8953 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8955 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8957 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8959 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8961 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8963 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8968 if (cpu_unit_reservation_p (state
, _1mmi_
))
8970 else if (cpu_unit_reservation_p (state
, _1mii_
))
8972 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8974 else if (cpu_unit_reservation_p (state
, _1mib_
))
8976 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8978 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8980 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8982 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8984 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8986 else if (cpu_unit_reservation_p (state
, _1mlx_
))
8995 /* True when INSN is important for bundling. */
8998 important_for_bundling_p (rtx_insn
*insn
)
9000 return (INSN_P (insn
)
9001 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
9002 && GET_CODE (PATTERN (insn
)) != USE
9003 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
9006 /* The following function returns an insn important for insn bundling
9007 followed by INSN and before TAIL. */
9010 get_next_important_insn (rtx_insn
*insn
, rtx_insn
*tail
)
9012 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
9013 if (important_for_bundling_p (insn
))
9018 /* True when INSN is unknown, but important, for bundling. */
9021 unknown_for_bundling_p (rtx_insn
*insn
)
9023 return (INSN_P (insn
)
9024 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_UNKNOWN
9025 && GET_CODE (PATTERN (insn
)) != USE
9026 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
9029 /* Add a bundle selector TEMPLATE0 before INSN. */
9032 ia64_add_bundle_selector_before (int template0
, rtx_insn
*insn
)
9034 rtx b
= gen_bundle_selector (GEN_INT (template0
));
9036 ia64_emit_insn_before (b
, insn
);
9037 #if NR_BUNDLES == 10
9038 if ((template0
== 4 || template0
== 5)
9039 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9042 rtx note
= NULL_RTX
;
9044 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9045 first or second slot. If it is and has REG_EH_NOTE set, copy it
9046 to following nops, as br.call sets rp to the address of following
9047 bundle and therefore an EH region end must be on a bundle
9049 insn
= PREV_INSN (insn
);
9050 for (i
= 0; i
< 3; i
++)
9053 insn
= next_active_insn (insn
);
9054 while (NONJUMP_INSN_P (insn
)
9055 && get_attr_empty (insn
) == EMPTY_YES
);
9057 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
9062 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
9063 || code
== CODE_FOR_nop_b
);
9064 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
9067 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
9074 /* The following function does insn bundling. Bundling means
9075 inserting templates and nop insns to fit insn groups into permitted
9076 templates. Instruction scheduling uses NDFA (non-deterministic
9077 finite automata) encoding informations about the templates and the
9078 inserted nops. Nondeterminism of the automata permits follows
9079 all possible insn sequences very fast.
9081 Unfortunately it is not possible to get information about inserting
9082 nop insns and used templates from the automata states. The
9083 automata only says that we can issue an insn possibly inserting
9084 some nops before it and using some template. Therefore insn
9085 bundling in this function is implemented by using DFA
9086 (deterministic finite automata). We follow all possible insn
9087 sequences by inserting 0-2 nops (that is what the NDFA describe for
9088 insn scheduling) before/after each insn being bundled. We know the
9089 start of simulated processor cycle from insn scheduling (insn
9090 starting a new cycle has TImode).
9092 Simple implementation of insn bundling would create enormous
9093 number of possible insn sequences satisfying information about new
9094 cycle ticks taken from the insn scheduling. To make the algorithm
9095 practical we use dynamic programming. Each decision (about
9096 inserting nops and implicitly about previous decisions) is described
9097 by structure bundle_state (see above). If we generate the same
9098 bundle state (key is automaton state after issuing the insns and
9099 nops for it), we reuse already generated one. As consequence we
9100 reject some decisions which cannot improve the solution and
9101 reduce memory for the algorithm.
9103 When we reach the end of EBB (extended basic block), we choose the
9104 best sequence and then, moving back in EBB, insert templates for
9105 the best alternative. The templates are taken from querying
9106 automaton state for each insn in chosen bundle states.
9108 So the algorithm makes two (forward and backward) passes through
9112 bundling (FILE *dump
, int verbose
, rtx_insn
*prev_head_insn
, rtx_insn
*tail
)
9114 struct bundle_state
*curr_state
, *next_state
, *best_state
;
9115 rtx_insn
*insn
, *next_insn
;
9117 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
9118 int pos
= 0, max_pos
, template0
, template1
;
9120 enum attr_type type
;
9123 /* Count insns in the EBB. */
9124 for (insn
= NEXT_INSN (prev_head_insn
);
9125 insn
&& insn
!= tail
;
9126 insn
= NEXT_INSN (insn
))
9132 dfa_clean_insn_cache ();
9133 initiate_bundle_state_table ();
9134 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
9135 /* First (forward) pass -- generation of bundle states. */
9136 curr_state
= get_free_bundle_state ();
9137 curr_state
->insn
= NULL
;
9138 curr_state
->before_nops_num
= 0;
9139 curr_state
->after_nops_num
= 0;
9140 curr_state
->insn_num
= 0;
9141 curr_state
->cost
= 0;
9142 curr_state
->accumulated_insns_num
= 0;
9143 curr_state
->branch_deviation
= 0;
9144 curr_state
->middle_bundle_stops
= 0;
9145 curr_state
->next
= NULL
;
9146 curr_state
->originator
= NULL
;
9147 state_reset (curr_state
->dfa_state
);
9148 index_to_bundle_states
[0] = curr_state
;
9150 /* Shift cycle mark if it is put on insn which could be ignored. */
9151 for (insn
= NEXT_INSN (prev_head_insn
);
9153 insn
= NEXT_INSN (insn
))
9155 && !important_for_bundling_p (insn
)
9156 && GET_MODE (insn
) == TImode
)
9158 PUT_MODE (insn
, VOIDmode
);
9159 for (next_insn
= NEXT_INSN (insn
);
9161 next_insn
= NEXT_INSN (next_insn
))
9162 if (important_for_bundling_p (next_insn
)
9163 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
9165 PUT_MODE (next_insn
, TImode
);
9169 /* Forward pass: generation of bundle states. */
9170 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
9174 gcc_assert (important_for_bundling_p (insn
));
9175 type
= ia64_safe_type (insn
);
9176 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
9178 index_to_bundle_states
[insn_num
] = NULL
;
9179 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
9181 curr_state
= next_state
)
9183 pos
= curr_state
->accumulated_insns_num
% 3;
9184 next_state
= curr_state
->next
;
9185 /* We must fill up the current bundle in order to start a
9186 subsequent asm insn in a new bundle. Asm insn is always
9187 placed in a separate bundle. */
9189 = (next_insn
!= NULL_RTX
9190 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
9191 && unknown_for_bundling_p (next_insn
));
9192 /* We may fill up the current bundle if it is the cycle end
9193 without a group barrier. */
9195 = (only_bundle_end_p
|| next_insn
== NULL_RTX
9196 || (GET_MODE (next_insn
) == TImode
9197 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
9198 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
9200 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
9202 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
9204 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
9207 gcc_assert (index_to_bundle_states
[insn_num
]);
9208 for (curr_state
= index_to_bundle_states
[insn_num
];
9210 curr_state
= curr_state
->next
)
9211 if (verbose
>= 2 && dump
)
9213 /* This structure is taken from generated code of the
9214 pipeline hazard recognizer (see file insn-attrtab.c).
9215 Please don't forget to change the structure if a new
9216 automaton is added to .md file. */
9219 unsigned short one_automaton_state
;
9220 unsigned short oneb_automaton_state
;
9221 unsigned short two_automaton_state
;
9222 unsigned short twob_automaton_state
;
9227 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9228 curr_state
->unique_num
,
9229 (curr_state
->originator
== NULL
9230 ? -1 : curr_state
->originator
->unique_num
),
9232 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9233 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9234 curr_state
->middle_bundle_stops
,
9235 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9240 /* We should find a solution because the 2nd insn scheduling has
9242 gcc_assert (index_to_bundle_states
[insn_num
]);
9243 /* Find a state corresponding to the best insn sequence. */
9245 for (curr_state
= index_to_bundle_states
[insn_num
];
9247 curr_state
= curr_state
->next
)
9248 /* We are just looking at the states with fully filled up last
9249 bundle. The first we prefer insn sequences with minimal cost
9250 then with minimal inserted nops and finally with branch insns
9251 placed in the 3rd slots. */
9252 if (curr_state
->accumulated_insns_num
% 3 == 0
9253 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
9254 || (best_state
->cost
== curr_state
->cost
9255 && (curr_state
->accumulated_insns_num
9256 < best_state
->accumulated_insns_num
9257 || (curr_state
->accumulated_insns_num
9258 == best_state
->accumulated_insns_num
9259 && (curr_state
->branch_deviation
9260 < best_state
->branch_deviation
9261 || (curr_state
->branch_deviation
9262 == best_state
->branch_deviation
9263 && curr_state
->middle_bundle_stops
9264 < best_state
->middle_bundle_stops
)))))))
9265 best_state
= curr_state
;
9266 /* Second (backward) pass: adding nops and templates. */
9267 gcc_assert (best_state
);
9268 insn_num
= best_state
->before_nops_num
;
9269 template0
= template1
= -1;
9270 for (curr_state
= best_state
;
9271 curr_state
->originator
!= NULL
;
9272 curr_state
= curr_state
->originator
)
9274 insn
= curr_state
->insn
;
9275 asm_p
= unknown_for_bundling_p (insn
);
9277 if (verbose
>= 2 && dump
)
9281 unsigned short one_automaton_state
;
9282 unsigned short oneb_automaton_state
;
9283 unsigned short two_automaton_state
;
9284 unsigned short twob_automaton_state
;
9289 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9290 curr_state
->unique_num
,
9291 (curr_state
->originator
== NULL
9292 ? -1 : curr_state
->originator
->unique_num
),
9294 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9295 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9296 curr_state
->middle_bundle_stops
,
9297 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9300 /* Find the position in the current bundle window. The window can
9301 contain at most two bundles. Two bundle window means that
9302 the processor will make two bundle rotation. */
9303 max_pos
= get_max_pos (curr_state
->dfa_state
);
9305 /* The following (negative template number) means that the
9306 processor did one bundle rotation. */
9307 || (max_pos
== 3 && template0
< 0))
9309 /* We are at the end of the window -- find template(s) for
9313 template0
= get_template (curr_state
->dfa_state
, 3);
9316 template1
= get_template (curr_state
->dfa_state
, 3);
9317 template0
= get_template (curr_state
->dfa_state
, 6);
9320 if (max_pos
> 3 && template1
< 0)
9321 /* It may happen when we have the stop inside a bundle. */
9323 gcc_assert (pos
<= 3);
9324 template1
= get_template (curr_state
->dfa_state
, 3);
9328 /* Emit nops after the current insn. */
9329 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
9331 rtx nop_pat
= gen_nop ();
9332 rtx_insn
*nop
= emit_insn_after (nop_pat
, insn
);
9334 gcc_assert (pos
>= 0);
9337 /* We are at the start of a bundle: emit the template
9338 (it should be defined). */
9339 gcc_assert (template0
>= 0);
9340 ia64_add_bundle_selector_before (template0
, nop
);
9341 /* If we have two bundle window, we make one bundle
9342 rotation. Otherwise template0 will be undefined
9343 (negative value). */
9344 template0
= template1
;
9348 /* Move the position backward in the window. Group barrier has
9349 no slot. Asm insn takes all bundle. */
9350 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9351 && !unknown_for_bundling_p (insn
))
9353 /* Long insn takes 2 slots. */
9354 if (ia64_safe_type (insn
) == TYPE_L
)
9356 gcc_assert (pos
>= 0);
9358 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9359 && !unknown_for_bundling_p (insn
))
9361 /* The current insn is at the bundle start: emit the
9363 gcc_assert (template0
>= 0);
9364 ia64_add_bundle_selector_before (template0
, insn
);
9365 b
= PREV_INSN (insn
);
9367 /* See comment above in analogous place for emitting nops
9369 template0
= template1
;
9372 /* Emit nops after the current insn. */
9373 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9375 rtx nop_pat
= gen_nop ();
9376 ia64_emit_insn_before (nop_pat
, insn
);
9377 rtx_insn
*nop
= PREV_INSN (insn
);
9380 gcc_assert (pos
>= 0);
9383 /* See comment above in analogous place for emitting nops
9385 gcc_assert (template0
>= 0);
9386 ia64_add_bundle_selector_before (template0
, insn
);
9387 b
= PREV_INSN (insn
);
9389 template0
= template1
;
9397 /* Assert right calculation of middle_bundle_stops. */
9398 int num
= best_state
->middle_bundle_stops
;
9399 bool start_bundle
= true, end_bundle
= false;
9401 for (insn
= NEXT_INSN (prev_head_insn
);
9402 insn
&& insn
!= tail
;
9403 insn
= NEXT_INSN (insn
))
9407 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9408 start_bundle
= true;
9411 rtx_insn
*next_insn
;
9413 for (next_insn
= NEXT_INSN (insn
);
9414 next_insn
&& next_insn
!= tail
;
9415 next_insn
= NEXT_INSN (next_insn
))
9416 if (INSN_P (next_insn
)
9417 && (ia64_safe_itanium_class (next_insn
)
9418 != ITANIUM_CLASS_IGNORE
9419 || recog_memoized (next_insn
)
9420 == CODE_FOR_bundle_selector
)
9421 && GET_CODE (PATTERN (next_insn
)) != USE
9422 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9425 end_bundle
= next_insn
== NULL_RTX
9426 || next_insn
== tail
9427 || (INSN_P (next_insn
)
9428 && recog_memoized (next_insn
) == CODE_FOR_bundle_selector
);
9429 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9430 && !start_bundle
&& !end_bundle
9432 && !unknown_for_bundling_p (next_insn
))
9435 start_bundle
= false;
9439 gcc_assert (num
== 0);
9442 free (index_to_bundle_states
);
9443 finish_bundle_state_table ();
9445 dfa_clean_insn_cache ();
9448 /* The following function is called at the end of scheduling BB or
9449 EBB. After reload, it inserts stop bits and does insn bundling. */
9452 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9455 fprintf (dump
, "// Finishing schedule.\n");
9456 if (!reload_completed
)
9458 if (reload_completed
)
9460 final_emit_insn_group_barriers (dump
);
9461 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9462 current_sched_info
->next_tail
);
9463 if (sched_verbose
&& dump
)
9464 fprintf (dump
, "// finishing %d-%d\n",
9465 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9466 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9472 /* The following function inserts stop bits in scheduled BB or EBB. */
9475 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9478 int need_barrier_p
= 0;
9479 int seen_good_insn
= 0;
9481 init_insn_group_barriers ();
9483 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9484 insn
!= current_sched_info
->next_tail
;
9485 insn
= NEXT_INSN (insn
))
9487 if (BARRIER_P (insn
))
9489 rtx_insn
*last
= prev_active_insn (insn
);
9493 if (JUMP_TABLE_DATA_P (last
))
9494 last
= prev_active_insn (last
);
9495 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9496 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9498 init_insn_group_barriers ();
9502 else if (NONDEBUG_INSN_P (insn
))
9504 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9506 init_insn_group_barriers ();
9510 else if (need_barrier_p
|| group_barrier_needed (insn
)
9511 || (mflag_sched_stop_bits_after_every_cycle
9512 && GET_MODE (insn
) == TImode
9515 if (TARGET_EARLY_STOP_BITS
)
9520 last
!= current_sched_info
->prev_head
;
9521 last
= PREV_INSN (last
))
9522 if (INSN_P (last
) && GET_MODE (last
) == TImode
9523 && stops_p
[INSN_UID (last
)])
9525 if (last
== current_sched_info
->prev_head
)
9527 last
= prev_active_insn (last
);
9529 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9530 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9532 init_insn_group_barriers ();
9533 for (last
= NEXT_INSN (last
);
9535 last
= NEXT_INSN (last
))
9538 group_barrier_needed (last
);
9539 if (recog_memoized (last
) >= 0
9540 && important_for_bundling_p (last
))
9546 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9548 init_insn_group_barriers ();
9551 group_barrier_needed (insn
);
9552 if (recog_memoized (insn
) >= 0
9553 && important_for_bundling_p (insn
))
9556 else if (recog_memoized (insn
) >= 0
9557 && important_for_bundling_p (insn
))
9559 need_barrier_p
= (CALL_P (insn
) || unknown_for_bundling_p (insn
));
9566 /* If the following function returns TRUE, we will use the DFA
9570 ia64_first_cycle_multipass_dfa_lookahead (void)
9572 return (reload_completed
? 6 : 4);
9575 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9578 ia64_init_dfa_pre_cycle_insn (void)
9580 if (temp_dfa_state
== NULL
)
9582 dfa_state_size
= state_size ();
9583 temp_dfa_state
= xmalloc (dfa_state_size
);
9584 prev_cycle_state
= xmalloc (dfa_state_size
);
9586 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9587 SET_PREV_INSN (dfa_pre_cycle_insn
) = SET_NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9588 recog_memoized (dfa_pre_cycle_insn
);
9589 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9590 SET_PREV_INSN (dfa_stop_insn
) = SET_NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9591 recog_memoized (dfa_stop_insn
);
9594 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9595 used by the DFA insn scheduler. */
9598 ia64_dfa_pre_cycle_insn (void)
9600 return dfa_pre_cycle_insn
;
9603 /* The following function returns TRUE if PRODUCER (of type ilog or
9604 ld) produces address for CONSUMER (of type st or stf). */
9607 ia64_st_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9611 gcc_assert (producer
&& consumer
);
9612 dest
= ia64_single_set (producer
);
9614 reg
= SET_DEST (dest
);
9616 if (GET_CODE (reg
) == SUBREG
)
9617 reg
= SUBREG_REG (reg
);
9618 gcc_assert (GET_CODE (reg
) == REG
);
9620 dest
= ia64_single_set (consumer
);
9622 mem
= SET_DEST (dest
);
9623 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9624 return reg_mentioned_p (reg
, mem
);
9627 /* The following function returns TRUE if PRODUCER (of type ilog or
9628 ld) produces address for CONSUMER (of type ld or fld). */
9631 ia64_ld_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9633 rtx dest
, src
, reg
, mem
;
9635 gcc_assert (producer
&& consumer
);
9636 dest
= ia64_single_set (producer
);
9638 reg
= SET_DEST (dest
);
9640 if (GET_CODE (reg
) == SUBREG
)
9641 reg
= SUBREG_REG (reg
);
9642 gcc_assert (GET_CODE (reg
) == REG
);
9644 src
= ia64_single_set (consumer
);
9646 mem
= SET_SRC (src
);
9649 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9650 mem
= XVECEXP (mem
, 0, 0);
9651 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9652 /* ??? Is this bypass necessary for ld.c? */
9654 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9655 mem
= XEXP (mem
, 1);
9658 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9659 mem
= XEXP (mem
, 0);
9661 if (GET_CODE (mem
) == UNSPEC
)
9663 int c
= XINT (mem
, 1);
9665 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9666 || c
== UNSPEC_LDSA
);
9667 mem
= XVECEXP (mem
, 0, 0);
9670 /* Note that LO_SUM is used for GOT loads. */
9671 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9673 return reg_mentioned_p (reg
, mem
);
9676 /* The following function returns TRUE if INSN produces address for a
9677 load/store insn. We will place such insns into M slot because it
9678 decreases its latency time. */
9681 ia64_produce_address_p (rtx insn
)
9687 /* Emit pseudo-ops for the assembler to describe predicate relations.
9688 At present this assumes that we only consider predicate pairs to
9689 be mutex, and that the assembler can deduce proper values from
9690 straight-line code. */
9693 emit_predicate_relation_info (void)
9697 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9700 rtx_insn
*head
= BB_HEAD (bb
);
9702 /* We only need such notes at code labels. */
9703 if (! LABEL_P (head
))
9705 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9706 head
= NEXT_INSN (head
);
9708 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9709 grabbing the entire block of predicate registers. */
9710 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9711 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9713 rtx p
= gen_rtx_REG (BImode
, r
);
9714 rtx_insn
*n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9715 if (head
== BB_END (bb
))
9721 /* Look for conditional calls that do not return, and protect predicate
9722 relations around them. Otherwise the assembler will assume the call
9723 returns, and complain about uses of call-clobbered predicates after
9725 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9727 rtx_insn
*insn
= BB_HEAD (bb
);
9732 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9733 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9736 emit_insn_before (gen_safe_across_calls_all (), insn
);
9737 rtx_insn
*a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9738 if (BB_HEAD (bb
) == insn
)
9740 if (BB_END (bb
) == insn
)
9744 if (insn
== BB_END (bb
))
9746 insn
= NEXT_INSN (insn
);
9751 /* Perform machine dependent operations on the rtl chain INSNS. */
9756 /* We are freeing block_for_insn in the toplev to keep compatibility
9757 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9758 compute_bb_for_insn ();
9760 /* If optimizing, we'll have split before scheduling. */
9764 if (optimize
&& flag_schedule_insns_after_reload
9765 && dbg_cnt (ia64_sched2
))
9768 timevar_push (TV_SCHED2
);
9769 ia64_final_schedule
= 1;
9771 /* We can't let modulo-sched prevent us from scheduling any bbs,
9772 since we need the final schedule to produce bundle information. */
9773 FOR_EACH_BB_FN (bb
, cfun
)
9774 bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
9776 initiate_bundle_states ();
9777 ia64_nop
= make_insn_raw (gen_nop ());
9778 SET_PREV_INSN (ia64_nop
) = SET_NEXT_INSN (ia64_nop
) = NULL_RTX
;
9779 recog_memoized (ia64_nop
);
9780 clocks_length
= get_max_uid () + 1;
9781 stops_p
= XCNEWVEC (char, clocks_length
);
9783 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9785 pos_1
= get_cpu_unit_code ("2_1");
9786 pos_2
= get_cpu_unit_code ("2_2");
9787 pos_3
= get_cpu_unit_code ("2_3");
9788 pos_4
= get_cpu_unit_code ("2_4");
9789 pos_5
= get_cpu_unit_code ("2_5");
9790 pos_6
= get_cpu_unit_code ("2_6");
9791 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9792 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9793 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9794 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9795 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9796 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9797 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9798 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9799 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9800 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9801 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9802 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9803 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9804 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9805 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9806 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9807 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9808 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9809 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9810 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9814 pos_1
= get_cpu_unit_code ("1_1");
9815 pos_2
= get_cpu_unit_code ("1_2");
9816 pos_3
= get_cpu_unit_code ("1_3");
9817 pos_4
= get_cpu_unit_code ("1_4");
9818 pos_5
= get_cpu_unit_code ("1_5");
9819 pos_6
= get_cpu_unit_code ("1_6");
9820 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9821 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9822 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9823 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9824 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9825 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9826 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9827 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9828 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9829 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9830 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9831 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9832 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9833 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9834 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9835 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9836 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9837 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9838 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9839 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9842 if (flag_selective_scheduling2
9843 && !maybe_skip_selective_scheduling ())
9844 run_selective_scheduling ();
9848 /* Redo alignment computation, as it might gone wrong. */
9849 compute_alignments ();
9851 /* We cannot reuse this one because it has been corrupted by the
9853 finish_bundle_states ();
9856 emit_insn_group_barriers (dump_file
);
9858 ia64_final_schedule
= 0;
9859 timevar_pop (TV_SCHED2
);
9862 emit_all_insn_group_barriers (dump_file
);
9866 /* A call must not be the last instruction in a function, so that the
9867 return address is still within the function, so that unwinding works
9868 properly. Note that IA-64 differs from dwarf2 on this point. */
9869 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9874 insn
= get_last_insn ();
9875 if (! INSN_P (insn
))
9876 insn
= prev_active_insn (insn
);
9879 /* Skip over insns that expand to nothing. */
9880 while (NONJUMP_INSN_P (insn
)
9881 && get_attr_empty (insn
) == EMPTY_YES
)
9883 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9884 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9886 insn
= prev_active_insn (insn
);
9891 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9892 emit_insn (gen_break_f ());
9893 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9898 emit_predicate_relation_info ();
9900 if (flag_var_tracking
)
9902 timevar_push (TV_VAR_TRACKING
);
9903 variable_tracking_main ();
9904 timevar_pop (TV_VAR_TRACKING
);
9906 df_finish_pass (false);
9909 /* Return true if REGNO is used by the epilogue. */
9912 ia64_epilogue_uses (int regno
)
9917 /* With a call to a function in another module, we will write a new
9918 value to "gp". After returning from such a call, we need to make
9919 sure the function restores the original gp-value, even if the
9920 function itself does not use the gp anymore. */
9921 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9923 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9924 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9925 /* For functions defined with the syscall_linkage attribute, all
9926 input registers are marked as live at all function exits. This
9927 prevents the register allocator from using the input registers,
9928 which in turn makes it possible to restart a system call after
9929 an interrupt without having to save/restore the input registers.
9930 This also prevents kernel data from leaking to application code. */
9931 return lookup_attribute ("syscall_linkage",
9932 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9935 /* Conditional return patterns can't represent the use of `b0' as
9936 the return address, so we force the value live this way. */
9940 /* Likewise for ar.pfs, which is used by br.ret. */
9948 /* Return true if REGNO is used by the frame unwinder. */
9951 ia64_eh_uses (int regno
)
9955 if (! reload_completed
)
9961 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9962 if (regno
== current_frame_info
.r
[r
]
9963 || regno
== emitted_frame_related_regs
[r
])
9969 /* Return true if this goes in small data/bss. */
9971 /* ??? We could also support own long data here. Generating movl/add/ld8
9972 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9973 code faster because there is one less load. This also includes incomplete
9974 types which can't go in sdata/sbss. */
9977 ia64_in_small_data_p (const_tree exp
)
9979 if (TARGET_NO_SDATA
)
9982 /* We want to merge strings, so we never consider them small data. */
9983 if (TREE_CODE (exp
) == STRING_CST
)
9986 /* Functions are never small data. */
9987 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9990 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9992 const char *section
= DECL_SECTION_NAME (exp
);
9994 if (strcmp (section
, ".sdata") == 0
9995 || strncmp (section
, ".sdata.", 7) == 0
9996 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
9997 || strcmp (section
, ".sbss") == 0
9998 || strncmp (section
, ".sbss.", 6) == 0
9999 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
10004 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
10006 /* If this is an incomplete type with size 0, then we can't put it
10007 in sdata because it might be too big when completed. */
10008 if (size
> 0 && size
<= ia64_section_threshold
)
10015 /* Output assembly directives for prologue regions. */
10017 /* The current basic block number. */
10019 static bool last_block
;
10021 /* True if we need a copy_state command at the start of the next block. */
10023 static bool need_copy_state
;
10025 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10026 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10029 /* The function emits unwind directives for the start of an epilogue. */
10032 process_epilogue (FILE *asm_out_file
, rtx insn ATTRIBUTE_UNUSED
,
10033 bool unwind
, bool frame ATTRIBUTE_UNUSED
)
10035 /* If this isn't the last block of the function, then we need to label the
10036 current state, and copy it back in at the start of the next block. */
10041 fprintf (asm_out_file
, "\t.label_state %d\n",
10042 ++cfun
->machine
->state_num
);
10043 need_copy_state
= true;
10047 fprintf (asm_out_file
, "\t.restore sp\n");
10050 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10053 process_cfa_adjust_cfa (FILE *asm_out_file
, rtx pat
, rtx insn
,
10054 bool unwind
, bool frame
)
10056 rtx dest
= SET_DEST (pat
);
10057 rtx src
= SET_SRC (pat
);
10059 if (dest
== stack_pointer_rtx
)
10061 if (GET_CODE (src
) == PLUS
)
10063 rtx op0
= XEXP (src
, 0);
10064 rtx op1
= XEXP (src
, 1);
10066 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
10068 if (INTVAL (op1
) < 0)
10070 gcc_assert (!frame_pointer_needed
);
10072 fprintf (asm_out_file
,
10073 "\t.fframe " HOST_WIDE_INT_PRINT_DEC
"\n",
10077 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10081 gcc_assert (src
== hard_frame_pointer_rtx
);
10082 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10085 else if (dest
== hard_frame_pointer_rtx
)
10087 gcc_assert (src
== stack_pointer_rtx
);
10088 gcc_assert (frame_pointer_needed
);
10091 fprintf (asm_out_file
, "\t.vframe r%d\n",
10092 ia64_dbx_register_number (REGNO (dest
)));
10095 gcc_unreachable ();
10098 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10101 process_cfa_register (FILE *asm_out_file
, rtx pat
, bool unwind
)
10103 rtx dest
= SET_DEST (pat
);
10104 rtx src
= SET_SRC (pat
);
10105 int dest_regno
= REGNO (dest
);
10110 /* Saving return address pointer. */
10112 fprintf (asm_out_file
, "\t.save rp, r%d\n",
10113 ia64_dbx_register_number (dest_regno
));
10117 src_regno
= REGNO (src
);
10122 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
10124 fprintf (asm_out_file
, "\t.save pr, r%d\n",
10125 ia64_dbx_register_number (dest_regno
));
10128 case AR_UNAT_REGNUM
:
10129 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
10131 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
10132 ia64_dbx_register_number (dest_regno
));
10136 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
10138 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
10139 ia64_dbx_register_number (dest_regno
));
10143 /* Everything else should indicate being stored to memory. */
10144 gcc_unreachable ();
10148 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10151 process_cfa_offset (FILE *asm_out_file
, rtx pat
, bool unwind
)
10153 rtx dest
= SET_DEST (pat
);
10154 rtx src
= SET_SRC (pat
);
10155 int src_regno
= REGNO (src
);
10156 const char *saveop
;
10160 gcc_assert (MEM_P (dest
));
10161 if (GET_CODE (XEXP (dest
, 0)) == REG
)
10163 base
= XEXP (dest
, 0);
10168 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
10169 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
10170 base
= XEXP (XEXP (dest
, 0), 0);
10171 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
10174 if (base
== hard_frame_pointer_rtx
)
10176 saveop
= ".savepsp";
10181 gcc_assert (base
== stack_pointer_rtx
);
10182 saveop
= ".savesp";
10185 src_regno
= REGNO (src
);
10189 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
10191 fprintf (asm_out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
10196 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
10198 fprintf (asm_out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
10203 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
10205 fprintf (asm_out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
10209 case AR_PFS_REGNUM
:
10210 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
10212 fprintf (asm_out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
10216 case AR_UNAT_REGNUM
:
10217 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
10219 fprintf (asm_out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
10228 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
10229 1 << (src_regno
- GR_REG (4)));
10238 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
10239 1 << (src_regno
- BR_REG (1)));
10247 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
10248 1 << (src_regno
- FR_REG (2)));
10251 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10252 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10253 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10254 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10256 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
10257 1 << (src_regno
- FR_REG (12)));
10261 /* ??? For some reason we mark other general registers, even those
10262 we can't represent in the unwind info. Ignore them. */
10267 /* This function looks at a single insn and emits any directives
10268 required to unwind this insn. */
10271 ia64_asm_unwind_emit (FILE *asm_out_file
, rtx_insn
*insn
)
10273 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
10274 bool frame
= dwarf2out_do_frame ();
10278 if (!unwind
&& !frame
)
10281 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
10283 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
10284 == EXIT_BLOCK_PTR_FOR_FN (cfun
);
10286 /* Restore unwind state from immediately before the epilogue. */
10287 if (need_copy_state
)
10291 fprintf (asm_out_file
, "\t.body\n");
10292 fprintf (asm_out_file
, "\t.copy_state %d\n",
10293 cfun
->machine
->state_num
);
10295 need_copy_state
= false;
10299 if (NOTE_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
10302 /* Look for the ALLOC insn. */
10303 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10305 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10306 int dest_regno
= REGNO (dest
);
10308 /* If this is the final destination for ar.pfs, then this must
10309 be the alloc in the prologue. */
10310 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10313 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
10314 ia64_dbx_register_number (dest_regno
));
10318 /* This must be an alloc before a sibcall. We must drop the
10319 old frame info. The easiest way to drop the old frame
10320 info is to ensure we had a ".restore sp" directive
10321 followed by a new prologue. If the procedure doesn't
10322 have a memory-stack frame, we'll issue a dummy ".restore
10324 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10325 /* if haven't done process_epilogue() yet, do it now */
10326 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10328 fprintf (asm_out_file
, "\t.prologue\n");
10333 handled_one
= false;
10334 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10335 switch (REG_NOTE_KIND (note
))
10337 case REG_CFA_ADJUST_CFA
:
10338 pat
= XEXP (note
, 0);
10340 pat
= PATTERN (insn
);
10341 process_cfa_adjust_cfa (asm_out_file
, pat
, insn
, unwind
, frame
);
10342 handled_one
= true;
10345 case REG_CFA_OFFSET
:
10346 pat
= XEXP (note
, 0);
10348 pat
= PATTERN (insn
);
10349 process_cfa_offset (asm_out_file
, pat
, unwind
);
10350 handled_one
= true;
10353 case REG_CFA_REGISTER
:
10354 pat
= XEXP (note
, 0);
10356 pat
= PATTERN (insn
);
10357 process_cfa_register (asm_out_file
, pat
, unwind
);
10358 handled_one
= true;
10361 case REG_FRAME_RELATED_EXPR
:
10362 case REG_CFA_DEF_CFA
:
10363 case REG_CFA_EXPRESSION
:
10364 case REG_CFA_RESTORE
:
10365 case REG_CFA_SET_VDRAP
:
10366 /* Not used in the ia64 port. */
10367 gcc_unreachable ();
10370 /* Not a frame-related note. */
10374 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10375 explicit action to take. No guessing required. */
10376 gcc_assert (handled_one
);
10379 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10382 ia64_asm_emit_except_personality (rtx personality
)
10384 fputs ("\t.personality\t", asm_out_file
);
10385 output_addr_const (asm_out_file
, personality
);
10386 fputc ('\n', asm_out_file
);
10389 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10392 ia64_asm_init_sections (void)
10394 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10398 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10400 static enum unwind_info_type
10401 ia64_debug_unwind_info (void)
10409 IA64_BUILTIN_COPYSIGNQ
,
10410 IA64_BUILTIN_FABSQ
,
10411 IA64_BUILTIN_FLUSHRS
,
10413 IA64_BUILTIN_HUGE_VALQ
,
10415 IA64_BUILTIN_NANSQ
,
10419 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10422 ia64_init_builtins (void)
10428 /* The __fpreg type. */
10429 fpreg_type
= make_node (REAL_TYPE
);
10430 TYPE_PRECISION (fpreg_type
) = 82;
10431 layout_type (fpreg_type
);
10432 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10434 /* The __float80 type. */
10435 if (float64x_type_node
!= NULL_TREE
10436 && TYPE_MODE (float64x_type_node
) == XFmode
)
10437 float80_type
= float64x_type_node
;
10440 float80_type
= make_node (REAL_TYPE
);
10441 TYPE_PRECISION (float80_type
) = 80;
10442 layout_type (float80_type
);
10444 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10446 /* The __float128 type. */
10450 tree const_string_type
10451 = build_pointer_type (build_qualified_type
10452 (char_type_node
, TYPE_QUAL_CONST
));
10454 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
10457 /* TFmode support builtins. */
10458 ftype
= build_function_type_list (float128_type_node
, NULL_TREE
);
10459 decl
= add_builtin_function ("__builtin_infq", ftype
,
10460 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10462 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10464 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10465 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10467 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10469 ftype
= build_function_type_list (float128_type_node
,
10472 decl
= add_builtin_function ("__builtin_nanq", ftype
,
10473 IA64_BUILTIN_NANQ
, BUILT_IN_MD
,
10474 "nanq", NULL_TREE
);
10475 TREE_READONLY (decl
) = 1;
10476 ia64_builtins
[IA64_BUILTIN_NANQ
] = decl
;
10478 decl
= add_builtin_function ("__builtin_nansq", ftype
,
10479 IA64_BUILTIN_NANSQ
, BUILT_IN_MD
,
10480 "nansq", NULL_TREE
);
10481 TREE_READONLY (decl
) = 1;
10482 ia64_builtins
[IA64_BUILTIN_NANSQ
] = decl
;
10484 ftype
= build_function_type_list (float128_type_node
,
10485 float128_type_node
,
10487 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10488 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10489 "__fabstf2", NULL_TREE
);
10490 TREE_READONLY (decl
) = 1;
10491 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10493 ftype
= build_function_type_list (float128_type_node
,
10494 float128_type_node
,
10495 float128_type_node
,
10497 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10498 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10499 "__copysigntf3", NULL_TREE
);
10500 TREE_READONLY (decl
) = 1;
10501 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10504 /* Under HPUX, this is a synonym for "long double". */
10505 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10508 /* Fwrite on VMS is non-standard. */
10509 #if TARGET_ABI_OPEN_VMS
10510 vms_patch_builtins ();
10513 #define def_builtin(name, type, code) \
10514 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10517 decl
= def_builtin ("__builtin_ia64_bsp",
10518 build_function_type_list (ptr_type_node
, NULL_TREE
),
10520 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10522 decl
= def_builtin ("__builtin_ia64_flushrs",
10523 build_function_type_list (void_type_node
, NULL_TREE
),
10524 IA64_BUILTIN_FLUSHRS
);
10525 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10531 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
10532 set_user_assembler_name (decl
, "_Isfinite");
10533 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
10534 set_user_assembler_name (decl
, "_Isfinitef");
10535 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEL
)) != NULL_TREE
)
10536 set_user_assembler_name (decl
, "_Isfinitef128");
10541 ia64_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
10542 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
10544 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
10546 enum ia64_builtins fn_code
= (enum ia64_builtins
)
10547 DECL_FUNCTION_CODE (fndecl
);
10550 case IA64_BUILTIN_NANQ
:
10551 case IA64_BUILTIN_NANSQ
:
10553 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
10554 const char *str
= c_getstr (*args
);
10555 int quiet
= fn_code
== IA64_BUILTIN_NANQ
;
10556 REAL_VALUE_TYPE real
;
10558 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
10559 return build_real (type
, real
);
10568 #ifdef SUBTARGET_FOLD_BUILTIN
10569 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
10576 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10577 machine_mode mode ATTRIBUTE_UNUSED
,
10578 int ignore ATTRIBUTE_UNUSED
)
10580 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10581 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
10585 case IA64_BUILTIN_BSP
:
10586 if (! target
|| ! register_operand (target
, DImode
))
10587 target
= gen_reg_rtx (DImode
);
10588 emit_insn (gen_bsp_value (target
));
10589 #ifdef POINTERS_EXTEND_UNSIGNED
10590 target
= convert_memory_address (ptr_mode
, target
);
10594 case IA64_BUILTIN_FLUSHRS
:
10595 emit_insn (gen_flushrs ());
10598 case IA64_BUILTIN_INFQ
:
10599 case IA64_BUILTIN_HUGE_VALQ
:
10601 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10602 REAL_VALUE_TYPE inf
;
10606 tmp
= const_double_from_real_value (inf
, target_mode
);
10608 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10611 target
= gen_reg_rtx (target_mode
);
10613 emit_move_insn (target
, tmp
);
10617 case IA64_BUILTIN_NANQ
:
10618 case IA64_BUILTIN_NANSQ
:
10619 case IA64_BUILTIN_FABSQ
:
10620 case IA64_BUILTIN_COPYSIGNQ
:
10621 return expand_call (exp
, target
, ignore
);
10624 gcc_unreachable ();
10630 /* Return the ia64 builtin for CODE. */
10633 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10635 if (code
>= IA64_BUILTIN_max
)
10636 return error_mark_node
;
10638 return ia64_builtins
[code
];
10641 /* Implement TARGET_FUNCTION_ARG_PADDING.
10643 For the HP-UX IA64 aggregate parameters are passed stored in the
10644 most significant bits of the stack slot. */
10646 static pad_direction
10647 ia64_function_arg_padding (machine_mode mode
, const_tree type
)
10649 /* Exception to normal case for structures/unions/etc. */
10652 && AGGREGATE_TYPE_P (type
)
10653 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10656 /* Fall back to the default. */
10657 return default_function_arg_padding (mode
, type
);
10660 /* Emit text to declare externally defined variables and functions, because
10661 the Intel assembler does not support undefined externals. */
10664 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10666 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10667 set in order to avoid putting out names that are never really
10669 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10671 /* maybe_assemble_visibility will return 1 if the assembler
10672 visibility directive is output. */
10673 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10674 && maybe_assemble_visibility (decl
));
10676 /* GNU as does not need anything here, but the HP linker does
10677 need something for external functions. */
10678 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10679 && TREE_CODE (decl
) == FUNCTION_DECL
)
10680 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10681 else if (need_visibility
&& !TARGET_GNU_AS
)
10682 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10686 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10687 modes of word_mode and larger. Rename the TFmode libfuncs using the
10688 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10689 backward compatibility. */
10692 ia64_init_libfuncs (void)
10694 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10695 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10696 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10697 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10699 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10700 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10701 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10702 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10703 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10705 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10706 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10707 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10708 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10709 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10710 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10712 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10713 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10714 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10715 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10716 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10718 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10719 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10720 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10721 /* HP-UX 11.23 libc does not have a function for unsigned
10722 SImode-to-TFmode conversion. */
10723 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10726 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10729 ia64_hpux_init_libfuncs (void)
10731 ia64_init_libfuncs ();
10733 /* The HP SI millicode division and mod functions expect DI arguments.
10734 By turning them off completely we avoid using both libgcc and the
10735 non-standard millicode routines and use the HP DI millicode routines
10738 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10739 set_optab_libfunc (udiv_optab
, SImode
, 0);
10740 set_optab_libfunc (smod_optab
, SImode
, 0);
10741 set_optab_libfunc (umod_optab
, SImode
, 0);
10743 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10744 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10745 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10746 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10748 /* HP-UX libc has TF min/max/abs routines in it. */
10749 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10750 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10751 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10753 /* ia64_expand_compare uses this. */
10754 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10756 /* These should never be used. */
10757 set_optab_libfunc (eq_optab
, TFmode
, 0);
10758 set_optab_libfunc (ne_optab
, TFmode
, 0);
10759 set_optab_libfunc (gt_optab
, TFmode
, 0);
10760 set_optab_libfunc (ge_optab
, TFmode
, 0);
10761 set_optab_libfunc (lt_optab
, TFmode
, 0);
10762 set_optab_libfunc (le_optab
, TFmode
, 0);
10765 /* Rename the division and modulus functions in VMS. */
10768 ia64_vms_init_libfuncs (void)
10770 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10771 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10772 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10773 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10774 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10775 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10776 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10777 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10778 #ifdef MEM_LIBFUNCS_INIT
10783 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10784 the HPUX conventions. */
10787 ia64_sysv4_init_libfuncs (void)
10789 ia64_init_libfuncs ();
10791 /* These functions are not part of the HPUX TFmode interface. We
10792 use them instead of _U_Qfcmp, which doesn't work the way we
10794 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10795 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10796 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10797 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10798 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10799 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10801 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10802 glibc doesn't have them. */
10808 ia64_soft_fp_init_libfuncs (void)
10813 ia64_vms_valid_pointer_mode (scalar_int_mode mode
)
10815 return (mode
== SImode
|| mode
== DImode
);
10818 /* For HPUX, it is illegal to have relocations in shared segments. */
10821 ia64_hpux_reloc_rw_mask (void)
10826 /* For others, relax this so that relocations to local data goes in
10827 read-only segments, but we still cannot allow global relocations
10828 in read-only segments. */
10831 ia64_reloc_rw_mask (void)
10833 return flag_pic
? 3 : 2;
10836 /* Return the section to use for X. The only special thing we do here
10837 is to honor small data. */
10840 ia64_select_rtx_section (machine_mode mode
, rtx x
,
10841 unsigned HOST_WIDE_INT align
)
10843 if (GET_MODE_SIZE (mode
) > 0
10844 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10845 && !TARGET_NO_SDATA
)
10846 return sdata_section
;
10848 return default_elf_select_rtx_section (mode
, x
, align
);
10851 static unsigned int
10852 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10854 unsigned int flags
= 0;
10856 if (strcmp (name
, ".sdata") == 0
10857 || strncmp (name
, ".sdata.", 7) == 0
10858 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10859 || strncmp (name
, ".sdata2.", 8) == 0
10860 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10861 || strcmp (name
, ".sbss") == 0
10862 || strncmp (name
, ".sbss.", 6) == 0
10863 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10864 flags
= SECTION_SMALL
;
10866 flags
|= default_section_type_flags (decl
, name
, reloc
);
10870 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10871 structure type and that the address of that type should be passed
10872 in out0, rather than in r8. */
10875 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10877 tree ret_type
= TREE_TYPE (fntype
);
10879 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10880 as the structure return address parameter, if the return value
10881 type has a non-trivial copy constructor or destructor. It is not
10882 clear if this same convention should be used for other
10883 programming languages. Until G++ 3.4, we incorrectly used r8 for
10884 these return values. */
10885 return (abi_version_at_least (2)
10887 && TYPE_MODE (ret_type
) == BLKmode
10888 && TREE_ADDRESSABLE (ret_type
)
10889 && lang_GNU_CXX ());
10892 /* Output the assembler code for a thunk function. THUNK_DECL is the
10893 declaration for the thunk function itself, FUNCTION is the decl for
10894 the target function. DELTA is an immediate constant offset to be
10895 added to THIS. If VCALL_OFFSET is nonzero, the word at
10896 *(*this + vcall_offset) should be added to THIS. */
10899 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10900 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10903 rtx this_rtx
, funexp
;
10905 unsigned int this_parmno
;
10906 unsigned int this_regno
;
10909 reload_completed
= 1;
10910 epilogue_completed
= 1;
10912 /* Set things up as ia64_expand_prologue might. */
10913 last_scratch_gr_reg
= 15;
10915 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10916 current_frame_info
.spill_cfa_off
= -16;
10917 current_frame_info
.n_input_regs
= 1;
10918 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10920 /* Mark the end of the (empty) prologue. */
10921 emit_note (NOTE_INSN_PROLOGUE_END
);
10923 /* Figure out whether "this" will be the first parameter (the
10924 typical case) or the second parameter (as happens when the
10925 virtual function returns certain class objects). */
10927 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10929 this_regno
= IN_REG (this_parmno
);
10930 if (!TARGET_REG_NAMES
)
10931 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10933 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10935 /* Apply the constant offset, if required. */
10936 delta_rtx
= GEN_INT (delta
);
10939 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10940 REG_POINTER (tmp
) = 1;
10941 if (delta
&& satisfies_constraint_I (delta_rtx
))
10943 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10947 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10951 if (!satisfies_constraint_I (delta_rtx
))
10953 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10954 emit_move_insn (tmp
, delta_rtx
);
10957 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10960 /* Apply the offset from the vtable, if required. */
10963 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10964 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10968 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10969 REG_POINTER (t
) = 1;
10970 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10971 if (satisfies_constraint_I (vcall_offset_rtx
))
10973 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10977 emit_insn (gen_ptr_extend (tmp
, t
));
10980 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10984 if (!satisfies_constraint_J (vcall_offset_rtx
))
10986 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10987 emit_move_insn (tmp2
, vcall_offset_rtx
);
10988 vcall_offset_rtx
= tmp2
;
10990 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
10994 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
10996 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
10998 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
11001 /* Generate a tail call to the target function. */
11002 if (! TREE_USED (function
))
11004 assemble_external (function
);
11005 TREE_USED (function
) = 1;
11007 funexp
= XEXP (DECL_RTL (function
), 0);
11008 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
11009 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
11010 insn
= get_last_insn ();
11011 SIBLING_CALL_P (insn
) = 1;
11013 /* Code generation for calls relies on splitting. */
11014 reload_completed
= 1;
11015 epilogue_completed
= 1;
11016 try_split (PATTERN (insn
), insn
, 0);
11020 /* Run just enough of rest_of_compilation to get the insns emitted.
11021 There's not really enough bulk here to make other passes such as
11022 instruction scheduling worth while. Note that use_thunk calls
11023 assemble_start_function and assemble_end_function. */
11025 emit_all_insn_group_barriers (NULL
);
11026 insn
= get_insns ();
11027 shorten_branches (insn
);
11028 final_start_function (insn
, file
, 1);
11029 final (insn
, file
, 1);
11030 final_end_function ();
11032 reload_completed
= 0;
11033 epilogue_completed
= 0;
11036 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
11039 ia64_struct_value_rtx (tree fntype
,
11040 int incoming ATTRIBUTE_UNUSED
)
11042 if (TARGET_ABI_OPEN_VMS
||
11043 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
11045 return gen_rtx_REG (Pmode
, GR_REG (8));
11049 ia64_scalar_mode_supported_p (scalar_mode mode
)
11075 ia64_vector_mode_supported_p (machine_mode mode
)
11092 /* Implement the FUNCTION_PROFILER macro. */
11095 ia64_output_function_profiler (FILE *file
, int labelno
)
11097 bool indirect_call
;
11099 /* If the function needs a static chain and the static chain
11100 register is r15, we use an indirect call so as to bypass
11101 the PLT stub in case the executable is dynamically linked,
11102 because the stub clobbers r15 as per 5.3.6 of the psABI.
11103 We don't need to do that in non canonical PIC mode. */
11105 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
11107 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
11108 indirect_call
= true;
11111 indirect_call
= false;
11114 fputs ("\t.prologue 4, r40\n", file
);
11116 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
11117 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
11119 if (NO_PROFILE_COUNTERS
)
11120 fputs ("\tmov out3 = r0\n", file
);
11124 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11126 if (TARGET_AUTO_PIC
)
11127 fputs ("\tmovl out3 = @gprel(", file
);
11129 fputs ("\taddl out3 = @ltoff(", file
);
11130 assemble_name (file
, buf
);
11131 if (TARGET_AUTO_PIC
)
11132 fputs (")\n", file
);
11134 fputs ("), r1\n", file
);
11138 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
11139 fputs ("\t;;\n", file
);
11141 fputs ("\t.save rp, r42\n", file
);
11142 fputs ("\tmov out2 = b0\n", file
);
11144 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
11145 fputs ("\t.body\n", file
);
11146 fputs ("\tmov out1 = r1\n", file
);
11149 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
11150 fputs ("\tmov b6 = r16\n", file
);
11151 fputs ("\tld8 r1 = [r14]\n", file
);
11152 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
11155 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
11158 static GTY(()) rtx mcount_func_rtx
;
11160 gen_mcount_func_rtx (void)
11162 if (!mcount_func_rtx
)
11163 mcount_func_rtx
= init_one_libfunc ("_mcount");
11164 return mcount_func_rtx
;
11168 ia64_profile_hook (int labelno
)
11172 if (NO_PROFILE_COUNTERS
)
11173 label
= const0_rtx
;
11177 const char *label_name
;
11178 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11179 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
11180 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
11181 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
11183 ip
= gen_reg_rtx (Pmode
);
11184 emit_insn (gen_ip_value (ip
));
11185 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
11187 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
11192 /* Return the mangling of TYPE if it is an extended fundamental type. */
11194 static const char *
11195 ia64_mangle_type (const_tree type
)
11197 type
= TYPE_MAIN_VARIANT (type
);
11199 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
11200 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
11203 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11205 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
11207 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11208 an extended mangling. Elsewhere, "e" is available since long
11209 double is 80 bits. */
11210 if (TYPE_MODE (type
) == XFmode
)
11211 return TARGET_HPUX
? "u9__float80" : "e";
11212 if (TYPE_MODE (type
) == RFmode
)
11213 return "u7__fpreg";
11217 /* Return the diagnostic message string if conversion from FROMTYPE to
11218 TOTYPE is not allowed, NULL otherwise. */
11219 static const char *
11220 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
11222 /* Reject nontrivial conversion to or from __fpreg. */
11223 if (TYPE_MODE (fromtype
) == RFmode
11224 && TYPE_MODE (totype
) != RFmode
11225 && TYPE_MODE (totype
) != VOIDmode
)
11226 return N_("invalid conversion from %<__fpreg%>");
11227 if (TYPE_MODE (totype
) == RFmode
11228 && TYPE_MODE (fromtype
) != RFmode
)
11229 return N_("invalid conversion to %<__fpreg%>");
11233 /* Return the diagnostic message string if the unary operation OP is
11234 not permitted on TYPE, NULL otherwise. */
11235 static const char *
11236 ia64_invalid_unary_op (int op
, const_tree type
)
11238 /* Reject operations on __fpreg other than unary + or &. */
11239 if (TYPE_MODE (type
) == RFmode
11240 && op
!= CONVERT_EXPR
11241 && op
!= ADDR_EXPR
)
11242 return N_("invalid operation on %<__fpreg%>");
11246 /* Return the diagnostic message string if the binary operation OP is
11247 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11248 static const char *
11249 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
11251 /* Reject operations on __fpreg. */
11252 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
11253 return N_("invalid operation on %<__fpreg%>");
11257 /* HP-UX version_id attribute.
11258 For object foo, if the version_id is set to 1234 put out an alias
11259 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11260 other than an alias statement because it is an illegal symbol name. */
11263 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
11264 tree name ATTRIBUTE_UNUSED
,
11266 int flags ATTRIBUTE_UNUSED
,
11267 bool *no_add_attrs
)
11269 tree arg
= TREE_VALUE (args
);
11271 if (TREE_CODE (arg
) != STRING_CST
)
11273 error("version attribute is not a string");
11274 *no_add_attrs
= true;
11280 /* Target hook for c_mode_for_suffix. */
11282 static machine_mode
11283 ia64_c_mode_for_suffix (char suffix
)
11293 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11296 ia64_dconst_0_5 (void)
11298 if (! ia64_dconst_0_5_rtx
)
11300 REAL_VALUE_TYPE rv
;
11301 real_from_string (&rv
, "0.5");
11302 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11304 return ia64_dconst_0_5_rtx
;
11307 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11310 ia64_dconst_0_375 (void)
11312 if (! ia64_dconst_0_375_rtx
)
11314 REAL_VALUE_TYPE rv
;
11315 real_from_string (&rv
, "0.375");
11316 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11318 return ia64_dconst_0_375_rtx
;
11321 static machine_mode
11322 ia64_get_reg_raw_mode (int regno
)
11324 if (FR_REGNO_P (regno
))
11326 return default_get_reg_raw_mode(regno
);
11329 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11333 ia64_member_type_forces_blk (const_tree
, machine_mode mode
)
11335 return TARGET_HPUX
&& mode
== TFmode
;
11338 /* Always default to .text section until HP-UX linker is fixed. */
11340 ATTRIBUTE_UNUSED
static section
*
11341 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11342 enum node_frequency freq ATTRIBUTE_UNUSED
,
11343 bool startup ATTRIBUTE_UNUSED
,
11344 bool exit ATTRIBUTE_UNUSED
)
11349 /* Construct (set target (vec_select op0 (parallel perm))) and
11350 return true if that's a valid instruction in the active ISA. */
11353 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
11355 rtx rperm
[MAX_VECT_LEN
], x
;
11358 for (i
= 0; i
< nelt
; ++i
)
11359 rperm
[i
] = GEN_INT (perm
[i
]);
11361 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
11362 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
11363 x
= gen_rtx_SET (target
, x
);
11365 rtx_insn
*insn
= emit_insn (x
);
11366 if (recog_memoized (insn
) < 0)
11368 remove_insn (insn
);
11374 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11377 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
11378 const unsigned char *perm
, unsigned nelt
)
11380 machine_mode v2mode
;
11383 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0
)).exists (&v2mode
))
11385 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
11386 return expand_vselect (target
, x
, perm
, nelt
);
11389 /* Try to expand a no-op permutation. */
11392 expand_vec_perm_identity (struct expand_vec_perm_d
*d
)
11394 unsigned i
, nelt
= d
->nelt
;
11396 for (i
= 0; i
< nelt
; ++i
)
11397 if (d
->perm
[i
] != i
)
11401 emit_move_insn (d
->target
, d
->op0
);
11406 /* Try to expand D via a shrp instruction. */
11409 expand_vec_perm_shrp (struct expand_vec_perm_d
*d
)
11411 unsigned i
, nelt
= d
->nelt
, shift
, mask
;
11414 /* ??? Don't force V2SFmode into the integer registers. */
11415 if (d
->vmode
== V2SFmode
)
11418 mask
= (d
->one_operand_p
? nelt
- 1 : 2 * nelt
- 1);
11420 shift
= d
->perm
[0];
11421 if (BYTES_BIG_ENDIAN
&& shift
> nelt
)
11424 for (i
= 1; i
< nelt
; ++i
)
11425 if (d
->perm
[i
] != ((shift
+ i
) & mask
))
11431 hi
= shift
< nelt
? d
->op1
: d
->op0
;
11432 lo
= shift
< nelt
? d
->op0
: d
->op1
;
11436 shift
*= GET_MODE_UNIT_SIZE (d
->vmode
) * BITS_PER_UNIT
;
11438 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11439 gcc_assert (IN_RANGE (shift
, 1, 63));
11441 /* Recall that big-endian elements are numbered starting at the top of
11442 the register. Ideally we'd have a shift-left-pair. But since we
11443 don't, convert to a shift the other direction. */
11444 if (BYTES_BIG_ENDIAN
)
11445 shift
= 64 - shift
;
11447 tmp
= gen_reg_rtx (DImode
);
11448 hi
= gen_lowpart (DImode
, hi
);
11449 lo
= gen_lowpart (DImode
, lo
);
11450 emit_insn (gen_shrp (tmp
, hi
, lo
, GEN_INT (shift
)));
11452 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, tmp
));
11456 /* Try to instantiate D in a single instruction. */
11459 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
11461 unsigned i
, nelt
= d
->nelt
;
11462 unsigned char perm2
[MAX_VECT_LEN
];
11464 /* Try single-operand selections. */
11465 if (d
->one_operand_p
)
11467 if (expand_vec_perm_identity (d
))
11469 if (expand_vselect (d
->target
, d
->op0
, d
->perm
, nelt
))
11473 /* Try two operand selections. */
11474 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
11477 /* Recognize interleave style patterns with reversed operands. */
11478 if (!d
->one_operand_p
)
11480 for (i
= 0; i
< nelt
; ++i
)
11482 unsigned e
= d
->perm
[i
];
11490 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
11494 if (expand_vec_perm_shrp (d
))
11497 /* ??? Look for deposit-like permutations where most of the result
11498 comes from one vector unchanged and the rest comes from a
11499 sequential hunk of the other vector. */
11504 /* Pattern match broadcast permutations. */
11507 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
11509 unsigned i
, elt
, nelt
= d
->nelt
;
11510 unsigned char perm2
[2];
11514 if (!d
->one_operand_p
)
11518 for (i
= 1; i
< nelt
; ++i
)
11519 if (d
->perm
[i
] != elt
)
11526 /* Implementable by interleave. */
11528 perm2
[1] = elt
+ 2;
11529 ok
= expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, 2);
11534 /* Implementable by extract + broadcast. */
11535 if (BYTES_BIG_ENDIAN
)
11537 elt
*= BITS_PER_UNIT
;
11538 temp
= gen_reg_rtx (DImode
);
11539 emit_insn (gen_extzv (temp
, gen_lowpart (DImode
, d
->op0
),
11540 GEN_INT (8), GEN_INT (elt
)));
11541 emit_insn (gen_mux1_brcst_qi (d
->target
, gen_lowpart (QImode
, temp
)));
11545 /* Should have been matched directly by vec_select. */
11547 gcc_unreachable ();
11553 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11554 two vector permutation into a single vector permutation by using
11555 an interleave operation to merge the vectors. */
11558 expand_vec_perm_interleave_2 (struct expand_vec_perm_d
*d
)
11560 struct expand_vec_perm_d dremap
, dfinal
;
11561 unsigned char remap
[2 * MAX_VECT_LEN
];
11562 unsigned contents
, i
, nelt
, nelt2
;
11563 unsigned h0
, h1
, h2
, h3
;
11567 if (d
->one_operand_p
)
11573 /* Examine from whence the elements come. */
11575 for (i
= 0; i
< nelt
; ++i
)
11576 contents
|= 1u << d
->perm
[i
];
11578 memset (remap
, 0xff, sizeof (remap
));
11581 h0
= (1u << nelt2
) - 1;
11584 h3
= h0
<< (nelt
+ nelt2
);
11586 if ((contents
& (h0
| h2
)) == contents
) /* punpck even halves */
11588 for (i
= 0; i
< nelt
; ++i
)
11590 unsigned which
= i
/ 2 + (i
& 1 ? nelt
: 0);
11592 dremap
.perm
[i
] = which
;
11595 else if ((contents
& (h1
| h3
)) == contents
) /* punpck odd halves */
11597 for (i
= 0; i
< nelt
; ++i
)
11599 unsigned which
= i
/ 2 + nelt2
+ (i
& 1 ? nelt
: 0);
11601 dremap
.perm
[i
] = which
;
11604 else if ((contents
& 0x5555) == contents
) /* mix even elements */
11606 for (i
= 0; i
< nelt
; ++i
)
11608 unsigned which
= (i
& ~1) + (i
& 1 ? nelt
: 0);
11610 dremap
.perm
[i
] = which
;
11613 else if ((contents
& 0xaaaa) == contents
) /* mix odd elements */
11615 for (i
= 0; i
< nelt
; ++i
)
11617 unsigned which
= (i
| 1) + (i
& 1 ? nelt
: 0);
11619 dremap
.perm
[i
] = which
;
11622 else if (floor_log2 (contents
) - ctz_hwi (contents
) < (int)nelt
) /* shrp */
11624 unsigned shift
= ctz_hwi (contents
);
11625 for (i
= 0; i
< nelt
; ++i
)
11627 unsigned which
= (i
+ shift
) & (2 * nelt
- 1);
11629 dremap
.perm
[i
] = which
;
11635 /* Use the remapping array set up above to move the elements from their
11636 swizzled locations into their final destinations. */
11638 for (i
= 0; i
< nelt
; ++i
)
11640 unsigned e
= remap
[d
->perm
[i
]];
11641 gcc_assert (e
< nelt
);
11642 dfinal
.perm
[i
] = e
;
11645 dfinal
.op0
= gen_raw_REG (dfinal
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11647 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
11648 dfinal
.op1
= dfinal
.op0
;
11649 dfinal
.one_operand_p
= true;
11650 dremap
.target
= dfinal
.op0
;
11652 /* Test if the final remap can be done with a single insn. For V4HImode
11653 this *will* succeed. For V8QImode or V2SImode it may not. */
11655 ok
= expand_vec_perm_1 (&dfinal
);
11656 seq
= get_insns ();
11663 ok
= expand_vec_perm_1 (&dremap
);
11670 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11671 constant permutation via two mux2 and a merge. */
11674 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d
*d
)
11676 unsigned char perm2
[4];
11679 rtx t0
, t1
, mask
, x
;
11682 if (d
->vmode
!= V4HImode
|| d
->one_operand_p
)
11687 for (i
= 0; i
< 4; ++i
)
11689 perm2
[i
] = d
->perm
[i
] & 3;
11690 rmask
[i
] = (d
->perm
[i
] & 4 ? const0_rtx
: constm1_rtx
);
11692 mask
= gen_rtx_CONST_VECTOR (V4HImode
, gen_rtvec_v (4, rmask
));
11693 mask
= force_reg (V4HImode
, mask
);
11695 t0
= gen_reg_rtx (V4HImode
);
11696 t1
= gen_reg_rtx (V4HImode
);
11698 ok
= expand_vselect (t0
, d
->op0
, perm2
, 4);
11700 ok
= expand_vselect (t1
, d
->op1
, perm2
, 4);
11703 x
= gen_rtx_AND (V4HImode
, mask
, t0
);
11704 emit_insn (gen_rtx_SET (t0
, x
));
11706 x
= gen_rtx_NOT (V4HImode
, mask
);
11707 x
= gen_rtx_AND (V4HImode
, x
, t1
);
11708 emit_insn (gen_rtx_SET (t1
, x
));
11710 x
= gen_rtx_IOR (V4HImode
, t0
, t1
);
11711 emit_insn (gen_rtx_SET (d
->target
, x
));
11716 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11717 With all of the interface bits taken care of, perform the expansion
11718 in D and return true on success. */
11721 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
11723 if (expand_vec_perm_1 (d
))
11725 if (expand_vec_perm_broadcast (d
))
11727 if (expand_vec_perm_interleave_2 (d
))
11729 if (expand_vec_perm_v4hi_5 (d
))
11735 ia64_expand_vec_perm_const (rtx operands
[4])
11737 struct expand_vec_perm_d d
;
11738 unsigned char perm
[MAX_VECT_LEN
];
11739 int i
, nelt
, which
;
11742 d
.target
= operands
[0];
11743 d
.op0
= operands
[1];
11744 d
.op1
= operands
[2];
11747 d
.vmode
= GET_MODE (d
.target
);
11748 gcc_assert (VECTOR_MODE_P (d
.vmode
));
11749 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11750 d
.testing_p
= false;
11752 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
11753 gcc_assert (XVECLEN (sel
, 0) == nelt
);
11754 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
11756 for (i
= which
= 0; i
< nelt
; ++i
)
11758 rtx e
= XVECEXP (sel
, 0, i
);
11759 int ei
= INTVAL (e
) & (2 * nelt
- 1);
11761 which
|= (ei
< nelt
? 1 : 2);
11772 if (!rtx_equal_p (d
.op0
, d
.op1
))
11774 d
.one_operand_p
= false;
11778 /* The elements of PERM do not suggest that only the first operand
11779 is used, but both operands are identical. Allow easier matching
11780 of the permutation by folding the permutation into the single
11782 for (i
= 0; i
< nelt
; ++i
)
11783 if (d
.perm
[i
] >= nelt
)
11789 d
.one_operand_p
= true;
11793 for (i
= 0; i
< nelt
; ++i
)
11796 d
.one_operand_p
= true;
11800 if (ia64_expand_vec_perm_const_1 (&d
))
11803 /* If the mask says both arguments are needed, but they are the same,
11804 the above tried to expand with one_operand_p true. If that didn't
11805 work, retry with one_operand_p false, as that's what we used in _ok. */
11806 if (which
== 3 && d
.one_operand_p
)
11808 memcpy (d
.perm
, perm
, sizeof (perm
));
11809 d
.one_operand_p
= false;
11810 return ia64_expand_vec_perm_const_1 (&d
);
11816 /* Implement targetm.vectorize.vec_perm_const_ok. */
11819 ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
11820 const unsigned char *sel
)
11822 struct expand_vec_perm_d d
;
11823 unsigned int i
, nelt
, which
;
11827 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11828 d
.testing_p
= true;
11830 /* Extract the values from the vector CST into the permutation
11832 memcpy (d
.perm
, sel
, nelt
);
11833 for (i
= which
= 0; i
< nelt
; ++i
)
11835 unsigned char e
= d
.perm
[i
];
11836 gcc_assert (e
< 2 * nelt
);
11837 which
|= (e
< nelt
? 1 : 2);
11840 /* For all elements from second vector, fold the elements to first. */
11842 for (i
= 0; i
< nelt
; ++i
)
11845 /* Check whether the mask can be applied to the vector type. */
11846 d
.one_operand_p
= (which
!= 3);
11848 /* Otherwise we have to go through the motions and see if we can
11849 figure out how to generate the requested permutation. */
11850 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11851 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
11852 if (!d
.one_operand_p
)
11853 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
11856 ret
= ia64_expand_vec_perm_const_1 (&d
);
11863 ia64_expand_vec_setv2sf (rtx operands
[3])
11865 struct expand_vec_perm_d d
;
11866 unsigned int which
;
11869 d
.target
= operands
[0];
11870 d
.op0
= operands
[0];
11871 d
.op1
= gen_reg_rtx (V2SFmode
);
11872 d
.vmode
= V2SFmode
;
11874 d
.one_operand_p
= false;
11875 d
.testing_p
= false;
11877 which
= INTVAL (operands
[2]);
11878 gcc_assert (which
<= 1);
11879 d
.perm
[0] = 1 - which
;
11880 d
.perm
[1] = which
+ 2;
11882 emit_insn (gen_fpack (d
.op1
, operands
[1], CONST0_RTX (SFmode
)));
11884 ok
= ia64_expand_vec_perm_const_1 (&d
);
11889 ia64_expand_vec_perm_even_odd (rtx target
, rtx op0
, rtx op1
, int odd
)
11891 struct expand_vec_perm_d d
;
11892 machine_mode vmode
= GET_MODE (target
);
11893 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
11901 d
.one_operand_p
= false;
11902 d
.testing_p
= false;
11904 for (i
= 0; i
< nelt
; ++i
)
11905 d
.perm
[i
] = i
* 2 + odd
;
11907 ok
= ia64_expand_vec_perm_const_1 (&d
);
11911 #include "gt-ia64.h"