1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stringpool.h"
39 #include "stor-layout.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
47 #include "insn-attr.h"
52 #include "statistics.h"
54 #include "fixed-value.h"
61 #include "insn-codes.h"
66 #include "dominance.h"
72 #include "cfgcleanup.h"
73 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "sched-int.h"
79 #include "target-def.h"
80 #include "common/common-target.h"
82 #include "hash-table.h"
83 #include "langhooks.h"
84 #include "tree-ssa-alias.h"
85 #include "internal-fn.h"
86 #include "gimple-fold.h"
88 #include "gimple-expr.h"
97 #include "tm-constrs.h"
98 #include "sel-sched.h"
101 #include "dumpfile.h"
102 #include "builtins.h"
104 /* This is used for communication between ASM_OUTPUT_LABEL and
105 ASM_OUTPUT_LABELREF. */
106 int ia64_asm_output_label
= 0;
108 /* Register names for ia64_expand_prologue. */
109 static const char * const ia64_reg_numbers
[96] =
110 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
111 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
112 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
113 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
114 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
115 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
116 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
117 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
118 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
119 "r104","r105","r106","r107","r108","r109","r110","r111",
120 "r112","r113","r114","r115","r116","r117","r118","r119",
121 "r120","r121","r122","r123","r124","r125","r126","r127"};
123 /* ??? These strings could be shared with REGISTER_NAMES. */
124 static const char * const ia64_input_reg_names
[8] =
125 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
127 /* ??? These strings could be shared with REGISTER_NAMES. */
128 static const char * const ia64_local_reg_names
[80] =
129 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
130 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
131 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
132 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
133 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
134 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
135 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
136 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
137 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
138 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
140 /* ??? These strings could be shared with REGISTER_NAMES. */
141 static const char * const ia64_output_reg_names
[8] =
142 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
144 /* Variables which are this size or smaller are put in the sdata/sbss
147 unsigned int ia64_section_threshold
;
149 /* The following variable is used by the DFA insn scheduler. The value is
150 TRUE if we do insn bundling instead of insn scheduling. */
162 number_of_ia64_frame_regs
165 /* Structure to be filled in by ia64_compute_frame_size with register
166 save masks and offsets for the current function. */
168 struct ia64_frame_info
170 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
171 the caller's scratch area. */
172 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
173 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
174 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
175 HARD_REG_SET mask
; /* mask of saved registers. */
176 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
177 registers or long-term scratches. */
178 int n_spilled
; /* number of spilled registers. */
179 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
180 int n_input_regs
; /* number of input registers used. */
181 int n_local_regs
; /* number of local registers used. */
182 int n_output_regs
; /* number of output registers used. */
183 int n_rotate_regs
; /* number of rotating registers used. */
185 char need_regstk
; /* true if a .regstk directive needed. */
186 char initialized
; /* true if the data is finalized. */
189 /* Current frame information calculated by ia64_compute_frame_size. */
190 static struct ia64_frame_info current_frame_info
;
191 /* The actual registers that are emitted. */
192 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
194 static int ia64_first_cycle_multipass_dfa_lookahead (void);
195 static void ia64_dependencies_evaluation_hook (rtx_insn
*, rtx_insn
*);
196 static void ia64_init_dfa_pre_cycle_insn (void);
197 static rtx
ia64_dfa_pre_cycle_insn (void);
198 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
199 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn
*, int, int, int *);
200 static void ia64_h_i_d_extended (void);
201 static void * ia64_alloc_sched_context (void);
202 static void ia64_init_sched_context (void *, bool);
203 static void ia64_set_sched_context (void *);
204 static void ia64_clear_sched_context (void *);
205 static void ia64_free_sched_context (void *);
206 static int ia64_mode_to_int (machine_mode
);
207 static void ia64_set_sched_flags (spec_info_t
);
208 static ds_t
ia64_get_insn_spec_ds (rtx_insn
*);
209 static ds_t
ia64_get_insn_checked_ds (rtx_insn
*);
210 static bool ia64_skip_rtx_p (const_rtx
);
211 static int ia64_speculate_insn (rtx_insn
*, ds_t
, rtx
*);
212 static bool ia64_needs_block_p (ds_t
);
213 static rtx
ia64_gen_spec_check (rtx_insn
*, rtx_insn
*, ds_t
);
214 static int ia64_spec_check_p (rtx
);
215 static int ia64_spec_check_src_p (rtx
);
216 static rtx
gen_tls_get_addr (void);
217 static rtx
gen_thread_pointer (void);
218 static int find_gr_spill (enum ia64_frame_regs
, int);
219 static int next_scratch_gr_reg (void);
220 static void mark_reg_gr_used_mask (rtx
, void *);
221 static void ia64_compute_frame_size (HOST_WIDE_INT
);
222 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
223 static void finish_spill_pointers (void);
224 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
225 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
226 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
227 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
228 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
229 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
231 static void ia64_option_override (void);
232 static bool ia64_can_eliminate (const int, const int);
233 static machine_mode
hfa_element_mode (const_tree
, bool);
234 static void ia64_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
236 static int ia64_arg_partial_bytes (cumulative_args_t
, machine_mode
,
238 static rtx
ia64_function_arg_1 (cumulative_args_t
, machine_mode
,
239 const_tree
, bool, bool);
240 static rtx
ia64_function_arg (cumulative_args_t
, machine_mode
,
242 static rtx
ia64_function_incoming_arg (cumulative_args_t
,
243 machine_mode
, const_tree
, bool);
244 static void ia64_function_arg_advance (cumulative_args_t
, machine_mode
,
246 static unsigned int ia64_function_arg_boundary (machine_mode
,
248 static bool ia64_function_ok_for_sibcall (tree
, tree
);
249 static bool ia64_return_in_memory (const_tree
, const_tree
);
250 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
251 static rtx
ia64_libcall_value (machine_mode
, const_rtx
);
252 static bool ia64_function_value_regno_p (const unsigned int);
253 static int ia64_register_move_cost (machine_mode
, reg_class_t
,
255 static int ia64_memory_move_cost (machine_mode mode
, reg_class_t
,
257 static bool ia64_rtx_costs (rtx
, int, int, int, int *, bool);
258 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
259 static void fix_range (const char *);
260 static struct machine_function
* ia64_init_machine_status (void);
261 static void emit_insn_group_barriers (FILE *);
262 static void emit_all_insn_group_barriers (FILE *);
263 static void final_emit_insn_group_barriers (FILE *);
264 static void emit_predicate_relation_info (void);
265 static void ia64_reorg (void);
266 static bool ia64_in_small_data_p (const_tree
);
267 static void process_epilogue (FILE *, rtx
, bool, bool);
269 static bool ia64_assemble_integer (rtx
, unsigned int, int);
270 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
271 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
272 static void ia64_output_function_end_prologue (FILE *);
274 static void ia64_print_operand (FILE *, rtx
, int);
275 static void ia64_print_operand_address (FILE *, rtx
);
276 static bool ia64_print_operand_punct_valid_p (unsigned char code
);
278 static int ia64_issue_rate (void);
279 static int ia64_adjust_cost_2 (rtx_insn
*, int, rtx_insn
*, int, dw_t
);
280 static void ia64_sched_init (FILE *, int, int);
281 static void ia64_sched_init_global (FILE *, int, int);
282 static void ia64_sched_finish_global (FILE *, int);
283 static void ia64_sched_finish (FILE *, int);
284 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn
**, int *, int, int);
285 static int ia64_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
286 static int ia64_sched_reorder2 (FILE *, int, rtx_insn
**, int *, int);
287 static int ia64_variable_issue (FILE *, int, rtx_insn
*, int);
289 static void ia64_asm_unwind_emit (FILE *, rtx_insn
*);
290 static void ia64_asm_emit_except_personality (rtx
);
291 static void ia64_asm_init_sections (void);
293 static enum unwind_info_type
ia64_debug_unwind_info (void);
295 static struct bundle_state
*get_free_bundle_state (void);
296 static void free_bundle_state (struct bundle_state
*);
297 static void initiate_bundle_states (void);
298 static void finish_bundle_states (void);
299 static int insert_bundle_state (struct bundle_state
*);
300 static void initiate_bundle_state_table (void);
301 static void finish_bundle_state_table (void);
302 static int try_issue_nops (struct bundle_state
*, int);
303 static int try_issue_insn (struct bundle_state
*, rtx
);
304 static void issue_nops_and_insn (struct bundle_state
*, int, rtx_insn
*,
306 static int get_max_pos (state_t
);
307 static int get_template (state_t
, int);
309 static rtx_insn
*get_next_important_insn (rtx_insn
*, rtx_insn
*);
310 static bool important_for_bundling_p (rtx_insn
*);
311 static bool unknown_for_bundling_p (rtx_insn
*);
312 static void bundling (FILE *, int, rtx_insn
*, rtx_insn
*);
314 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
315 HOST_WIDE_INT
, tree
);
316 static void ia64_file_start (void);
317 static void ia64_globalize_decl_name (FILE *, tree
);
319 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
320 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
321 static section
*ia64_select_rtx_section (machine_mode
, rtx
,
322 unsigned HOST_WIDE_INT
);
323 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
325 static unsigned int ia64_section_type_flags (tree
, const char *, int);
326 static void ia64_init_libfuncs (void)
328 static void ia64_hpux_init_libfuncs (void)
330 static void ia64_sysv4_init_libfuncs (void)
332 static void ia64_vms_init_libfuncs (void)
334 static void ia64_soft_fp_init_libfuncs (void)
336 static bool ia64_vms_valid_pointer_mode (machine_mode mode
)
338 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
341 static bool ia64_attribute_takes_identifier_p (const_tree
);
342 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
343 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
344 static void ia64_encode_section_info (tree
, rtx
, int);
345 static rtx
ia64_struct_value_rtx (tree
, int);
346 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
347 static bool ia64_scalar_mode_supported_p (machine_mode mode
);
348 static bool ia64_vector_mode_supported_p (machine_mode mode
);
349 static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode
);
350 static bool ia64_legitimate_constant_p (machine_mode
, rtx
);
351 static bool ia64_legitimate_address_p (machine_mode
, rtx
, bool);
352 static bool ia64_cannot_force_const_mem (machine_mode
, rtx
);
353 static const char *ia64_mangle_type (const_tree
);
354 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
355 static const char *ia64_invalid_unary_op (int, const_tree
);
356 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
357 static machine_mode
ia64_c_mode_for_suffix (char);
358 static void ia64_trampoline_init (rtx
, tree
, rtx
);
359 static void ia64_override_options_after_change (void);
360 static bool ia64_member_type_forces_blk (const_tree
, machine_mode
);
362 static tree
ia64_builtin_decl (unsigned, bool);
364 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
365 static machine_mode
ia64_get_reg_raw_mode (int regno
);
366 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
369 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
370 const unsigned char *sel
);
372 #define MAX_VECT_LEN 8
374 struct expand_vec_perm_d
376 rtx target
, op0
, op1
;
377 unsigned char perm
[MAX_VECT_LEN
];
384 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
);
387 /* Table of valid machine attributes. */
388 static const struct attribute_spec ia64_attribute_table
[] =
390 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
391 affects_type_identity } */
392 { "syscall_linkage", 0, 0, false, true, true, NULL
, false },
393 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
,
395 #if TARGET_ABI_OPEN_VMS
396 { "common_object", 1, 1, true, false, false,
397 ia64_vms_common_object_attribute
, false },
399 { "version_id", 1, 1, true, false, false,
400 ia64_handle_version_id_attribute
, false },
401 { NULL
, 0, 0, false, false, false, NULL
, false }
404 /* Initialize the GCC target structure. */
405 #undef TARGET_ATTRIBUTE_TABLE
406 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
408 #undef TARGET_INIT_BUILTINS
409 #define TARGET_INIT_BUILTINS ia64_init_builtins
411 #undef TARGET_EXPAND_BUILTIN
412 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
414 #undef TARGET_BUILTIN_DECL
415 #define TARGET_BUILTIN_DECL ia64_builtin_decl
417 #undef TARGET_ASM_BYTE_OP
418 #define TARGET_ASM_BYTE_OP "\tdata1\t"
419 #undef TARGET_ASM_ALIGNED_HI_OP
420 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
421 #undef TARGET_ASM_ALIGNED_SI_OP
422 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
423 #undef TARGET_ASM_ALIGNED_DI_OP
424 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
425 #undef TARGET_ASM_UNALIGNED_HI_OP
426 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
427 #undef TARGET_ASM_UNALIGNED_SI_OP
428 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
429 #undef TARGET_ASM_UNALIGNED_DI_OP
430 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
431 #undef TARGET_ASM_INTEGER
432 #define TARGET_ASM_INTEGER ia64_assemble_integer
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE ia64_option_override
437 #undef TARGET_ASM_FUNCTION_PROLOGUE
438 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
439 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
440 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
441 #undef TARGET_ASM_FUNCTION_EPILOGUE
442 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
444 #undef TARGET_PRINT_OPERAND
445 #define TARGET_PRINT_OPERAND ia64_print_operand
446 #undef TARGET_PRINT_OPERAND_ADDRESS
447 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
448 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
449 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
451 #undef TARGET_IN_SMALL_DATA_P
452 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
454 #undef TARGET_SCHED_ADJUST_COST_2
455 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
456 #undef TARGET_SCHED_ISSUE_RATE
457 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
458 #undef TARGET_SCHED_VARIABLE_ISSUE
459 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
460 #undef TARGET_SCHED_INIT
461 #define TARGET_SCHED_INIT ia64_sched_init
462 #undef TARGET_SCHED_FINISH
463 #define TARGET_SCHED_FINISH ia64_sched_finish
464 #undef TARGET_SCHED_INIT_GLOBAL
465 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
466 #undef TARGET_SCHED_FINISH_GLOBAL
467 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
468 #undef TARGET_SCHED_REORDER
469 #define TARGET_SCHED_REORDER ia64_sched_reorder
470 #undef TARGET_SCHED_REORDER2
471 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
473 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
474 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
476 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
477 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
479 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
480 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
481 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
482 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
484 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
485 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
486 ia64_first_cycle_multipass_dfa_lookahead_guard
488 #undef TARGET_SCHED_DFA_NEW_CYCLE
489 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
491 #undef TARGET_SCHED_H_I_D_EXTENDED
492 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
494 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
495 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
497 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
498 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
500 #undef TARGET_SCHED_SET_SCHED_CONTEXT
501 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
503 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
504 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
506 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
507 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
509 #undef TARGET_SCHED_SET_SCHED_FLAGS
510 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
512 #undef TARGET_SCHED_GET_INSN_SPEC_DS
513 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
515 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
516 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
518 #undef TARGET_SCHED_SPECULATE_INSN
519 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
521 #undef TARGET_SCHED_NEEDS_BLOCK_P
522 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
524 #undef TARGET_SCHED_GEN_SPEC_CHECK
525 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
527 #undef TARGET_SCHED_SKIP_RTX_P
528 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
530 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
531 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
532 #undef TARGET_ARG_PARTIAL_BYTES
533 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
534 #undef TARGET_FUNCTION_ARG
535 #define TARGET_FUNCTION_ARG ia64_function_arg
536 #undef TARGET_FUNCTION_INCOMING_ARG
537 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
538 #undef TARGET_FUNCTION_ARG_ADVANCE
539 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
540 #undef TARGET_FUNCTION_ARG_BOUNDARY
541 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
543 #undef TARGET_ASM_OUTPUT_MI_THUNK
544 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
545 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
546 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
548 #undef TARGET_ASM_FILE_START
549 #define TARGET_ASM_FILE_START ia64_file_start
551 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
552 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
554 #undef TARGET_REGISTER_MOVE_COST
555 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
556 #undef TARGET_MEMORY_MOVE_COST
557 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
558 #undef TARGET_RTX_COSTS
559 #define TARGET_RTX_COSTS ia64_rtx_costs
560 #undef TARGET_ADDRESS_COST
561 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
563 #undef TARGET_UNSPEC_MAY_TRAP_P
564 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
566 #undef TARGET_MACHINE_DEPENDENT_REORG
567 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
569 #undef TARGET_ENCODE_SECTION_INFO
570 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
572 #undef TARGET_SECTION_TYPE_FLAGS
573 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
576 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
577 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
580 /* ??? Investigate. */
582 #undef TARGET_PROMOTE_PROTOTYPES
583 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
586 #undef TARGET_FUNCTION_VALUE
587 #define TARGET_FUNCTION_VALUE ia64_function_value
588 #undef TARGET_LIBCALL_VALUE
589 #define TARGET_LIBCALL_VALUE ia64_libcall_value
590 #undef TARGET_FUNCTION_VALUE_REGNO_P
591 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
593 #undef TARGET_STRUCT_VALUE_RTX
594 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
595 #undef TARGET_RETURN_IN_MEMORY
596 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
597 #undef TARGET_SETUP_INCOMING_VARARGS
598 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
599 #undef TARGET_STRICT_ARGUMENT_NAMING
600 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
601 #undef TARGET_MUST_PASS_IN_STACK
602 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
603 #undef TARGET_GET_RAW_RESULT_MODE
604 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
605 #undef TARGET_GET_RAW_ARG_MODE
606 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
608 #undef TARGET_MEMBER_TYPE_FORCES_BLK
609 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
611 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
612 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
614 #undef TARGET_ASM_UNWIND_EMIT
615 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
616 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
617 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
618 #undef TARGET_ASM_INIT_SECTIONS
619 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
621 #undef TARGET_DEBUG_UNWIND_INFO
622 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
626 #undef TARGET_VECTOR_MODE_SUPPORTED_P
627 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
629 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
630 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
631 ia64_libgcc_floating_mode_supported_p
633 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
634 in an order different from the specified program order. */
635 #undef TARGET_RELAXED_ORDERING
636 #define TARGET_RELAXED_ORDERING true
638 #undef TARGET_LEGITIMATE_CONSTANT_P
639 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
640 #undef TARGET_LEGITIMATE_ADDRESS_P
641 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
643 #undef TARGET_CANNOT_FORCE_CONST_MEM
644 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
646 #undef TARGET_MANGLE_TYPE
647 #define TARGET_MANGLE_TYPE ia64_mangle_type
649 #undef TARGET_INVALID_CONVERSION
650 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
651 #undef TARGET_INVALID_UNARY_OP
652 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
653 #undef TARGET_INVALID_BINARY_OP
654 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
656 #undef TARGET_C_MODE_FOR_SUFFIX
657 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
659 #undef TARGET_CAN_ELIMINATE
660 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
662 #undef TARGET_TRAMPOLINE_INIT
663 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
665 #undef TARGET_CAN_USE_DOLOOP_P
666 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
667 #undef TARGET_INVALID_WITHIN_DOLOOP
668 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
670 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
671 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
673 #undef TARGET_PREFERRED_RELOAD_CLASS
674 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
676 #undef TARGET_DELAY_SCHED2
677 #define TARGET_DELAY_SCHED2 true
679 /* Variable tracking should be run after all optimizations which
680 change order of insns. It also needs a valid CFG. */
681 #undef TARGET_DELAY_VARTRACK
682 #define TARGET_DELAY_VARTRACK true
684 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
685 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
687 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
688 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
690 struct gcc_target targetm
= TARGET_INITIALIZER
;
692 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
693 identifier as an argument, so the front end shouldn't look it up. */
696 ia64_attribute_takes_identifier_p (const_tree attr_id
)
698 if (is_attribute_p ("model", attr_id
))
700 #if TARGET_ABI_OPEN_VMS
701 if (is_attribute_p ("common_object", attr_id
))
709 ADDR_AREA_NORMAL
, /* normal address area */
710 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
714 static GTY(()) tree small_ident1
;
715 static GTY(()) tree small_ident2
;
720 if (small_ident1
== 0)
722 small_ident1
= get_identifier ("small");
723 small_ident2
= get_identifier ("__small__");
727 /* Retrieve the address area that has been chosen for the given decl. */
729 static ia64_addr_area
730 ia64_get_addr_area (tree decl
)
734 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
740 id
= TREE_VALUE (TREE_VALUE (model_attr
));
741 if (id
== small_ident1
|| id
== small_ident2
)
742 return ADDR_AREA_SMALL
;
744 return ADDR_AREA_NORMAL
;
748 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
749 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
751 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
753 tree arg
, decl
= *node
;
756 arg
= TREE_VALUE (args
);
757 if (arg
== small_ident1
|| arg
== small_ident2
)
759 addr_area
= ADDR_AREA_SMALL
;
763 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
765 *no_add_attrs
= true;
768 switch (TREE_CODE (decl
))
771 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
773 && !TREE_STATIC (decl
))
775 error_at (DECL_SOURCE_LOCATION (decl
),
776 "an address area attribute cannot be specified for "
778 *no_add_attrs
= true;
780 area
= ia64_get_addr_area (decl
);
781 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
783 error ("address area of %q+D conflicts with previous "
784 "declaration", decl
);
785 *no_add_attrs
= true;
790 error_at (DECL_SOURCE_LOCATION (decl
),
791 "address area attribute cannot be specified for "
793 *no_add_attrs
= true;
797 warning (OPT_Wattributes
, "%qE attribute ignored",
799 *no_add_attrs
= true;
806 /* Part of the low level implementation of DEC Ada pragma Common_Object which
807 enables the shared use of variables stored in overlaid linker areas
808 corresponding to the use of Fortran COMMON. */
811 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
812 int flags ATTRIBUTE_UNUSED
,
818 gcc_assert (DECL_P (decl
));
820 DECL_COMMON (decl
) = 1;
821 id
= TREE_VALUE (args
);
822 if (TREE_CODE (id
) != IDENTIFIER_NODE
&& TREE_CODE (id
) != STRING_CST
)
824 error ("%qE attribute requires a string constant argument", name
);
825 *no_add_attrs
= true;
831 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
834 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
835 unsigned HOST_WIDE_INT size
,
838 tree attr
= DECL_ATTRIBUTES (decl
);
841 attr
= lookup_attribute ("common_object", attr
);
844 tree id
= TREE_VALUE (TREE_VALUE (attr
));
847 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
848 name
= IDENTIFIER_POINTER (id
);
849 else if (TREE_CODE (id
) == STRING_CST
)
850 name
= TREE_STRING_POINTER (id
);
854 fprintf (file
, "\t.vms_common\t\"%s\",", name
);
857 fprintf (file
, "%s", COMMON_ASM_OP
);
859 /* Code from elfos.h. */
860 assemble_name (file
, name
);
861 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u",
862 size
, align
/ BITS_PER_UNIT
);
868 ia64_encode_addr_area (tree decl
, rtx symbol
)
872 flags
= SYMBOL_REF_FLAGS (symbol
);
873 switch (ia64_get_addr_area (decl
))
875 case ADDR_AREA_NORMAL
: break;
876 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
877 default: gcc_unreachable ();
879 SYMBOL_REF_FLAGS (symbol
) = flags
;
883 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
885 default_encode_section_info (decl
, rtl
, first
);
887 /* Careful not to prod global register variables. */
888 if (TREE_CODE (decl
) == VAR_DECL
889 && GET_CODE (DECL_RTL (decl
)) == MEM
890 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
891 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
892 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
895 /* Return 1 if the operands of a move are ok. */
898 ia64_move_ok (rtx dst
, rtx src
)
900 /* If we're under init_recog_no_volatile, we'll not be able to use
901 memory_operand. So check the code directly and don't worry about
902 the validity of the underlying address, which should have been
903 checked elsewhere anyway. */
904 if (GET_CODE (dst
) != MEM
)
906 if (GET_CODE (src
) == MEM
)
908 if (register_operand (src
, VOIDmode
))
911 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
912 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
913 return src
== const0_rtx
;
915 return satisfies_constraint_G (src
);
918 /* Return 1 if the operands are ok for a floating point load pair. */
921 ia64_load_pair_ok (rtx dst
, rtx src
)
923 /* ??? There is a thinko in the implementation of the "x" constraint and the
924 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
925 also return false for it. */
926 if (GET_CODE (dst
) != REG
927 || !(FP_REGNO_P (REGNO (dst
)) && FP_REGNO_P (REGNO (dst
) + 1)))
929 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
931 switch (GET_CODE (XEXP (src
, 0)))
940 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
942 if (GET_CODE (adjust
) != CONST_INT
943 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
954 addp4_optimize_ok (rtx op1
, rtx op2
)
956 return (basereg_operand (op1
, GET_MODE(op1
)) !=
957 basereg_operand (op2
, GET_MODE(op2
)));
960 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
961 Return the length of the field, or <= 0 on failure. */
964 ia64_depz_field_mask (rtx rop
, rtx rshift
)
966 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
967 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
969 /* Get rid of the zero bits we're shifting in. */
972 /* We must now have a solid block of 1's at bit 0. */
973 return exact_log2 (op
+ 1);
976 /* Return the TLS model to use for ADDR. */
978 static enum tls_model
979 tls_symbolic_operand_type (rtx addr
)
981 enum tls_model tls_kind
= TLS_MODEL_NONE
;
983 if (GET_CODE (addr
) == CONST
)
985 if (GET_CODE (XEXP (addr
, 0)) == PLUS
986 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
987 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
989 else if (GET_CODE (addr
) == SYMBOL_REF
)
990 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
995 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
996 as a base register. */
999 ia64_reg_ok_for_base_p (const_rtx reg
, bool strict
)
1002 && REGNO_OK_FOR_BASE_P (REGNO (reg
)))
1005 && (GENERAL_REGNO_P (REGNO (reg
))
1006 || !HARD_REGISTER_P (reg
)))
1013 ia64_legitimate_address_reg (const_rtx reg
, bool strict
)
1015 if ((REG_P (reg
) && ia64_reg_ok_for_base_p (reg
, strict
))
1016 || (GET_CODE (reg
) == SUBREG
&& REG_P (XEXP (reg
, 0))
1017 && ia64_reg_ok_for_base_p (XEXP (reg
, 0), strict
)))
1024 ia64_legitimate_address_disp (const_rtx reg
, const_rtx disp
, bool strict
)
1026 if (GET_CODE (disp
) == PLUS
1027 && rtx_equal_p (reg
, XEXP (disp
, 0))
1028 && (ia64_legitimate_address_reg (XEXP (disp
, 1), strict
)
1029 || (CONST_INT_P (XEXP (disp
, 1))
1030 && IN_RANGE (INTVAL (XEXP (disp
, 1)), -256, 255))))
1036 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1039 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED
,
1042 if (ia64_legitimate_address_reg (x
, strict
))
1044 else if ((GET_CODE (x
) == POST_INC
|| GET_CODE (x
) == POST_DEC
)
1045 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1046 && XEXP (x
, 0) != arg_pointer_rtx
)
1048 else if (GET_CODE (x
) == POST_MODIFY
1049 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1050 && XEXP (x
, 0) != arg_pointer_rtx
1051 && ia64_legitimate_address_disp (XEXP (x
, 0), XEXP (x
, 1), strict
))
1057 /* Return true if X is a constant that is valid for some immediate
1058 field in an instruction. */
1061 ia64_legitimate_constant_p (machine_mode mode
, rtx x
)
1063 switch (GET_CODE (x
))
1070 if (GET_MODE (x
) == VOIDmode
|| mode
== SFmode
|| mode
== DFmode
)
1072 return satisfies_constraint_G (x
);
1076 /* ??? Short term workaround for PR 28490. We must make the code here
1077 match the code in ia64_expand_move and move_operand, even though they
1078 are both technically wrong. */
1079 if (tls_symbolic_operand_type (x
) == 0)
1081 HOST_WIDE_INT addend
= 0;
1084 if (GET_CODE (op
) == CONST
1085 && GET_CODE (XEXP (op
, 0)) == PLUS
1086 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1088 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
1089 op
= XEXP (XEXP (op
, 0), 0);
1092 if (any_offset_symbol_operand (op
, mode
)
1093 || function_operand (op
, mode
))
1095 if (aligned_offset_symbol_operand (op
, mode
))
1096 return (addend
& 0x3fff) == 0;
1102 if (mode
== V2SFmode
)
1103 return satisfies_constraint_Y (x
);
1105 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1106 && GET_MODE_SIZE (mode
) <= 8);
1113 /* Don't allow TLS addresses to get spilled to memory. */
1116 ia64_cannot_force_const_mem (machine_mode mode
, rtx x
)
1120 return tls_symbolic_operand_type (x
) != 0;
1123 /* Expand a symbolic constant load. */
1126 ia64_expand_load_address (rtx dest
, rtx src
)
1128 gcc_assert (GET_CODE (dest
) == REG
);
1130 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1131 having to pointer-extend the value afterward. Other forms of address
1132 computation below are also more natural to compute as 64-bit quantities.
1133 If we've been given an SImode destination register, change it. */
1134 if (GET_MODE (dest
) != Pmode
)
1135 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1136 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1140 if (small_addr_symbolic_operand (src
, VOIDmode
))
1143 if (TARGET_AUTO_PIC
)
1144 emit_insn (gen_load_gprel64 (dest
, src
));
1145 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1146 emit_insn (gen_load_fptr (dest
, src
));
1147 else if (sdata_symbolic_operand (src
, VOIDmode
))
1148 emit_insn (gen_load_gprel (dest
, src
));
1151 HOST_WIDE_INT addend
= 0;
1154 /* We did split constant offsets in ia64_expand_move, and we did try
1155 to keep them split in move_operand, but we also allowed reload to
1156 rematerialize arbitrary constants rather than spill the value to
1157 the stack and reload it. So we have to be prepared here to split
1158 them apart again. */
1159 if (GET_CODE (src
) == CONST
)
1161 HOST_WIDE_INT hi
, lo
;
1163 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1164 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1170 src
= plus_constant (Pmode
, XEXP (XEXP (src
, 0), 0), hi
);
1174 tmp
= gen_rtx_HIGH (Pmode
, src
);
1175 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1176 emit_insn (gen_rtx_SET (dest
, tmp
));
1178 tmp
= gen_rtx_LO_SUM (Pmode
, gen_const_mem (Pmode
, dest
), src
);
1179 emit_insn (gen_rtx_SET (dest
, tmp
));
1183 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1184 emit_insn (gen_rtx_SET (dest
, tmp
));
1191 static GTY(()) rtx gen_tls_tga
;
1193 gen_tls_get_addr (void)
1196 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1200 static GTY(()) rtx thread_pointer_rtx
;
1202 gen_thread_pointer (void)
1204 if (!thread_pointer_rtx
)
1205 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1206 return thread_pointer_rtx
;
1210 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1211 rtx orig_op1
, HOST_WIDE_INT addend
)
1213 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
;
1216 HOST_WIDE_INT addend_lo
, addend_hi
;
1220 case TLS_MODEL_GLOBAL_DYNAMIC
:
1223 tga_op1
= gen_reg_rtx (Pmode
);
1224 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1226 tga_op2
= gen_reg_rtx (Pmode
);
1227 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1229 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1230 LCT_CONST
, Pmode
, 2, tga_op1
,
1231 Pmode
, tga_op2
, Pmode
);
1233 insns
= get_insns ();
1236 if (GET_MODE (op0
) != Pmode
)
1238 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1241 case TLS_MODEL_LOCAL_DYNAMIC
:
1242 /* ??? This isn't the completely proper way to do local-dynamic
1243 If the call to __tls_get_addr is used only by a single symbol,
1244 then we should (somehow) move the dtprel to the second arg
1245 to avoid the extra add. */
1248 tga_op1
= gen_reg_rtx (Pmode
);
1249 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1251 tga_op2
= const0_rtx
;
1253 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1254 LCT_CONST
, Pmode
, 2, tga_op1
,
1255 Pmode
, tga_op2
, Pmode
);
1257 insns
= get_insns ();
1260 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1262 tmp
= gen_reg_rtx (Pmode
);
1263 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1265 if (!register_operand (op0
, Pmode
))
1266 op0
= gen_reg_rtx (Pmode
);
1269 emit_insn (gen_load_dtprel (op0
, op1
));
1270 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1273 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1276 case TLS_MODEL_INITIAL_EXEC
:
1277 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1278 addend_hi
= addend
- addend_lo
;
1280 op1
= plus_constant (Pmode
, op1
, addend_hi
);
1283 tmp
= gen_reg_rtx (Pmode
);
1284 emit_insn (gen_load_tprel (tmp
, op1
));
1286 if (!register_operand (op0
, Pmode
))
1287 op0
= gen_reg_rtx (Pmode
);
1288 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1291 case TLS_MODEL_LOCAL_EXEC
:
1292 if (!register_operand (op0
, Pmode
))
1293 op0
= gen_reg_rtx (Pmode
);
1299 emit_insn (gen_load_tprel (op0
, op1
));
1300 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1303 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1311 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1312 orig_op0
, 1, OPTAB_DIRECT
);
1313 if (orig_op0
== op0
)
1315 if (GET_MODE (orig_op0
) == Pmode
)
1317 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1321 ia64_expand_move (rtx op0
, rtx op1
)
1323 machine_mode mode
= GET_MODE (op0
);
1325 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1326 op1
= force_reg (mode
, op1
);
1328 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1330 HOST_WIDE_INT addend
= 0;
1331 enum tls_model tls_kind
;
1334 if (GET_CODE (op1
) == CONST
1335 && GET_CODE (XEXP (op1
, 0)) == PLUS
1336 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1338 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1339 sym
= XEXP (XEXP (op1
, 0), 0);
1342 tls_kind
= tls_symbolic_operand_type (sym
);
1344 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1346 if (any_offset_symbol_operand (sym
, mode
))
1348 else if (aligned_offset_symbol_operand (sym
, mode
))
1350 HOST_WIDE_INT addend_lo
, addend_hi
;
1352 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1353 addend_hi
= addend
- addend_lo
;
1357 op1
= plus_constant (mode
, sym
, addend_hi
);
1366 if (reload_completed
)
1368 /* We really should have taken care of this offset earlier. */
1369 gcc_assert (addend
== 0);
1370 if (ia64_expand_load_address (op0
, op1
))
1376 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1378 emit_insn (gen_rtx_SET (subtarget
, op1
));
1380 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1381 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1390 /* Split a move from OP1 to OP0 conditional on COND. */
1393 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1395 rtx_insn
*insn
, *first
= get_last_insn ();
1397 emit_move_insn (op0
, op1
);
1399 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1401 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1405 /* Split a post-reload TImode or TFmode reference into two DImode
1406 components. This is made extra difficult by the fact that we do
1407 not get any scratch registers to work with, because reload cannot
1408 be prevented from giving us a scratch that overlaps the register
1409 pair involved. So instead, when addressing memory, we tweak the
1410 pointer register up and back down with POST_INCs. Or up and not
1411 back down when we can get away with it.
1413 REVERSED is true when the loads must be done in reversed order
1414 (high word first) for correctness. DEAD is true when the pointer
1415 dies with the second insn we generate and therefore the second
1416 address must not carry a postmodify.
1418 May return an insn which is to be emitted after the moves. */
1421 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1425 switch (GET_CODE (in
))
1428 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1429 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1434 /* Cannot occur reversed. */
1435 gcc_assert (!reversed
);
1437 if (GET_MODE (in
) != TFmode
)
1438 split_double (in
, &out
[0], &out
[1]);
1440 /* split_double does not understand how to split a TFmode
1441 quantity into a pair of DImode constants. */
1444 unsigned HOST_WIDE_INT p
[2];
1445 long l
[4]; /* TFmode is 128 bits */
1447 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
1448 real_to_target (l
, &r
, TFmode
);
1450 if (FLOAT_WORDS_BIG_ENDIAN
)
1452 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1453 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1457 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1458 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1460 out
[0] = GEN_INT (p
[0]);
1461 out
[1] = GEN_INT (p
[1]);
1467 rtx base
= XEXP (in
, 0);
1470 switch (GET_CODE (base
))
1475 out
[0] = adjust_automodify_address
1476 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1477 out
[1] = adjust_automodify_address
1478 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1482 /* Reversal requires a pre-increment, which can only
1483 be done as a separate insn. */
1484 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1485 out
[0] = adjust_automodify_address
1486 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1487 out
[1] = adjust_address (in
, DImode
, 0);
1492 gcc_assert (!reversed
&& !dead
);
1494 /* Just do the increment in two steps. */
1495 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1496 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1500 gcc_assert (!reversed
&& !dead
);
1502 /* Add 8, subtract 24. */
1503 base
= XEXP (base
, 0);
1504 out
[0] = adjust_automodify_address
1505 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1506 out
[1] = adjust_automodify_address
1508 gen_rtx_POST_MODIFY (Pmode
, base
,
1509 plus_constant (Pmode
, base
, -24)),
1514 gcc_assert (!reversed
&& !dead
);
1516 /* Extract and adjust the modification. This case is
1517 trickier than the others, because we might have an
1518 index register, or we might have a combined offset that
1519 doesn't fit a signed 9-bit displacement field. We can
1520 assume the incoming expression is already legitimate. */
1521 offset
= XEXP (base
, 1);
1522 base
= XEXP (base
, 0);
1524 out
[0] = adjust_automodify_address
1525 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1527 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1529 /* Can't adjust the postmodify to match. Emit the
1530 original, then a separate addition insn. */
1531 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1532 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1536 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1537 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1539 /* Again the postmodify cannot be made to match,
1540 but in this case it's more efficient to get rid
1541 of the postmodify entirely and fix up with an
1543 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1545 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1549 /* Combined offset still fits in the displacement field.
1550 (We cannot overflow it at the high end.) */
1551 out
[1] = adjust_automodify_address
1552 (in
, DImode
, gen_rtx_POST_MODIFY
1553 (Pmode
, base
, gen_rtx_PLUS
1555 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1574 /* Split a TImode or TFmode move instruction after reload.
1575 This is used by *movtf_internal and *movti_internal. */
1577 ia64_split_tmode_move (rtx operands
[])
1579 rtx in
[2], out
[2], insn
;
1582 bool reversed
= false;
1584 /* It is possible for reload to decide to overwrite a pointer with
1585 the value it points to. In that case we have to do the loads in
1586 the appropriate order so that the pointer is not destroyed too
1587 early. Also we must not generate a postmodify for that second
1588 load, or rws_access_regno will die. And we must not generate a
1589 postmodify for the second load if the destination register
1590 overlaps with the base register. */
1591 if (GET_CODE (operands
[1]) == MEM
1592 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1594 rtx base
= XEXP (operands
[1], 0);
1595 while (GET_CODE (base
) != REG
)
1596 base
= XEXP (base
, 0);
1598 if (REGNO (base
) == REGNO (operands
[0]))
1601 if (refers_to_regno_p (REGNO (operands
[0]),
1602 REGNO (operands
[0])+2,
1606 /* Another reason to do the moves in reversed order is if the first
1607 element of the target register pair is also the second element of
1608 the source register pair. */
1609 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1610 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1613 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1614 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1616 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1617 if (GET_CODE (EXP) == MEM \
1618 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1619 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1620 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1621 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1623 insn
= emit_insn (gen_rtx_SET (out
[0], in
[0]));
1624 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1625 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1627 insn
= emit_insn (gen_rtx_SET (out
[1], in
[1]));
1628 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1629 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1632 emit_insn (fixup
[0]);
1634 emit_insn (fixup
[1]);
1636 #undef MAYBE_ADD_REG_INC_NOTE
1639 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1640 through memory plus an extra GR scratch register. Except that you can
1641 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1642 SECONDARY_RELOAD_CLASS, but not both.
1644 We got into problems in the first place by allowing a construct like
1645 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1646 This solution attempts to prevent this situation from occurring. When
1647 we see something like the above, we spill the inner register to memory. */
1650 spill_xfmode_rfmode_operand (rtx in
, int force
, machine_mode mode
)
1652 if (GET_CODE (in
) == SUBREG
1653 && GET_MODE (SUBREG_REG (in
)) == TImode
1654 && GET_CODE (SUBREG_REG (in
)) == REG
)
1656 rtx memt
= assign_stack_temp (TImode
, 16);
1657 emit_move_insn (memt
, SUBREG_REG (in
));
1658 return adjust_address (memt
, mode
, 0);
1660 else if (force
&& GET_CODE (in
) == REG
)
1662 rtx memx
= assign_stack_temp (mode
, 16);
1663 emit_move_insn (memx
, in
);
1670 /* Expand the movxf or movrf pattern (MODE says which) with the given
1671 OPERANDS, returning true if the pattern should then invoke
1675 ia64_expand_movxf_movrf (machine_mode mode
, rtx operands
[])
1677 rtx op0
= operands
[0];
1679 if (GET_CODE (op0
) == SUBREG
)
1680 op0
= SUBREG_REG (op0
);
1682 /* We must support XFmode loads into general registers for stdarg/vararg,
1683 unprototyped calls, and a rare case where a long double is passed as
1684 an argument after a float HFA fills the FP registers. We split them into
1685 DImode loads for convenience. We also need to support XFmode stores
1686 for the last case. This case does not happen for stdarg/vararg routines,
1687 because we do a block store to memory of unnamed arguments. */
1689 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1693 /* We're hoping to transform everything that deals with XFmode
1694 quantities and GR registers early in the compiler. */
1695 gcc_assert (can_create_pseudo_p ());
1697 /* Struct to register can just use TImode instead. */
1698 if ((GET_CODE (operands
[1]) == SUBREG
1699 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1700 || (GET_CODE (operands
[1]) == REG
1701 && GR_REGNO_P (REGNO (operands
[1]))))
1703 rtx op1
= operands
[1];
1705 if (GET_CODE (op1
) == SUBREG
)
1706 op1
= SUBREG_REG (op1
);
1708 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1710 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1714 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1716 /* Don't word-swap when reading in the constant. */
1717 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1718 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1720 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1721 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1726 /* If the quantity is in a register not known to be GR, spill it. */
1727 if (register_operand (operands
[1], mode
))
1728 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1730 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1732 /* Don't word-swap when reading in the value. */
1733 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1734 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1736 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1737 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1741 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1743 /* We're hoping to transform everything that deals with XFmode
1744 quantities and GR registers early in the compiler. */
1745 gcc_assert (can_create_pseudo_p ());
1747 /* Op0 can't be a GR_REG here, as that case is handled above.
1748 If op0 is a register, then we spill op1, so that we now have a
1749 MEM operand. This requires creating an XFmode subreg of a TImode reg
1750 to force the spill. */
1751 if (register_operand (operands
[0], mode
))
1753 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1754 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1755 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1762 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1764 /* Don't word-swap when writing out the value. */
1765 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1766 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1768 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1769 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1774 if (!reload_in_progress
&& !reload_completed
)
1776 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1778 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1780 rtx memt
, memx
, in
= operands
[1];
1781 if (CONSTANT_P (in
))
1782 in
= validize_mem (force_const_mem (mode
, in
));
1783 if (GET_CODE (in
) == MEM
)
1784 memt
= adjust_address (in
, TImode
, 0);
1787 memt
= assign_stack_temp (TImode
, 16);
1788 memx
= adjust_address (memt
, mode
, 0);
1789 emit_move_insn (memx
, in
);
1791 emit_move_insn (op0
, memt
);
1795 if (!ia64_move_ok (operands
[0], operands
[1]))
1796 operands
[1] = force_reg (mode
, operands
[1]);
1802 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1803 with the expression that holds the compare result (in VOIDmode). */
1805 static GTY(()) rtx cmptf_libfunc
;
1808 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1810 enum rtx_code code
= GET_CODE (*expr
);
1813 /* If we have a BImode input, then we already have a compare result, and
1814 do not need to emit another comparison. */
1815 if (GET_MODE (*op0
) == BImode
)
1817 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1820 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1821 magic number as its third argument, that indicates what to do.
1822 The return value is an integer to be compared against zero. */
1823 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1826 QCMP_INV
= 1, /* Raise FP_INVALID on NaNs as a side effect. */
1833 enum rtx_code ncode
;
1836 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1839 /* 1 = equal, 0 = not equal. Equality operators do
1840 not raise FP_INVALID when given a NaN operand. */
1841 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1842 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1843 /* isunordered() from C99. */
1844 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1845 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1846 /* Relational operators raise FP_INVALID when given
1848 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1849 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1850 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1851 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1852 /* Unordered relational operators do not raise FP_INVALID
1853 when given a NaN operand. */
1854 case UNLT
: magic
= QCMP_LT
|QCMP_UNORD
; ncode
= NE
; break;
1855 case UNLE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1856 case UNGT
: magic
= QCMP_GT
|QCMP_UNORD
; ncode
= NE
; break;
1857 case UNGE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1858 /* Not supported. */
1861 default: gcc_unreachable ();
1866 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1867 *op0
, TFmode
, *op1
, TFmode
,
1868 GEN_INT (magic
), DImode
);
1869 cmp
= gen_reg_rtx (BImode
);
1870 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (ncode
, BImode
,
1873 insns
= get_insns ();
1876 emit_libcall_block (insns
, cmp
, cmp
,
1877 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1882 cmp
= gen_reg_rtx (BImode
);
1883 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1887 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1892 /* Generate an integral vector comparison. Return true if the condition has
1893 been reversed, and so the sense of the comparison should be inverted. */
1896 ia64_expand_vecint_compare (enum rtx_code code
, machine_mode mode
,
1897 rtx dest
, rtx op0
, rtx op1
)
1899 bool negate
= false;
1902 /* Canonicalize the comparison to EQ, GT, GTU. */
1913 code
= reverse_condition (code
);
1919 code
= reverse_condition (code
);
1925 code
= swap_condition (code
);
1926 x
= op0
, op0
= op1
, op1
= x
;
1933 /* Unsigned parallel compare is not supported by the hardware. Play some
1934 tricks to turn this into a signed comparison against 0. */
1943 /* Subtract (-(INT MAX) - 1) from both operands to make
1945 mask
= GEN_INT (0x80000000);
1946 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1947 mask
= force_reg (mode
, mask
);
1948 t1
= gen_reg_rtx (mode
);
1949 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1950 t2
= gen_reg_rtx (mode
);
1951 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1960 /* Perform a parallel unsigned saturating subtraction. */
1961 x
= gen_reg_rtx (mode
);
1962 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, op0
, op1
)));
1966 op1
= CONST0_RTX (mode
);
1975 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1976 emit_insn (gen_rtx_SET (dest
, x
));
1981 /* Emit an integral vector conditional move. */
1984 ia64_expand_vecint_cmov (rtx operands
[])
1986 machine_mode mode
= GET_MODE (operands
[0]);
1987 enum rtx_code code
= GET_CODE (operands
[3]);
1991 cmp
= gen_reg_rtx (mode
);
1992 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1993 operands
[4], operands
[5]);
1995 ot
= operands
[1+negate
];
1996 of
= operands
[2-negate
];
1998 if (ot
== CONST0_RTX (mode
))
2000 if (of
== CONST0_RTX (mode
))
2002 emit_move_insn (operands
[0], ot
);
2006 x
= gen_rtx_NOT (mode
, cmp
);
2007 x
= gen_rtx_AND (mode
, x
, of
);
2008 emit_insn (gen_rtx_SET (operands
[0], x
));
2010 else if (of
== CONST0_RTX (mode
))
2012 x
= gen_rtx_AND (mode
, cmp
, ot
);
2013 emit_insn (gen_rtx_SET (operands
[0], x
));
2019 t
= gen_reg_rtx (mode
);
2020 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
2021 emit_insn (gen_rtx_SET (t
, x
));
2023 f
= gen_reg_rtx (mode
);
2024 x
= gen_rtx_NOT (mode
, cmp
);
2025 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
2026 emit_insn (gen_rtx_SET (f
, x
));
2028 x
= gen_rtx_IOR (mode
, t
, f
);
2029 emit_insn (gen_rtx_SET (operands
[0], x
));
2033 /* Emit an integral vector min or max operation. Return true if all done. */
2036 ia64_expand_vecint_minmax (enum rtx_code code
, machine_mode mode
,
2041 /* These four combinations are supported directly. */
2042 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
2044 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
2047 /* This combination can be implemented with only saturating subtraction. */
2048 if (mode
== V4HImode
&& code
== UMAX
)
2050 rtx x
, tmp
= gen_reg_rtx (mode
);
2052 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
2053 emit_insn (gen_rtx_SET (tmp
, x
));
2055 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
2059 /* Everything else implemented via vector comparisons. */
2060 xops
[0] = operands
[0];
2061 xops
[4] = xops
[1] = operands
[1];
2062 xops
[5] = xops
[2] = operands
[2];
2081 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
2083 ia64_expand_vecint_cmov (xops
);
2087 /* The vectors LO and HI each contain N halves of a double-wide vector.
2088 Reassemble either the first N/2 or the second N/2 elements. */
2091 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
2093 machine_mode vmode
= GET_MODE (lo
);
2094 unsigned int i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
2095 struct expand_vec_perm_d d
;
2098 d
.target
= gen_lowpart (vmode
, out
);
2099 d
.op0
= (TARGET_BIG_ENDIAN
? hi
: lo
);
2100 d
.op1
= (TARGET_BIG_ENDIAN
? lo
: hi
);
2103 d
.one_operand_p
= false;
2104 d
.testing_p
= false;
2106 high
= (highp
? nelt
/ 2 : 0);
2107 for (i
= 0; i
< nelt
/ 2; ++i
)
2109 d
.perm
[i
* 2] = i
+ high
;
2110 d
.perm
[i
* 2 + 1] = i
+ high
+ nelt
;
2113 ok
= ia64_expand_vec_perm_const_1 (&d
);
2117 /* Return a vector of the sign-extension of VEC. */
2120 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2122 machine_mode mode
= GET_MODE (vec
);
2123 rtx zero
= CONST0_RTX (mode
);
2129 rtx sign
= gen_reg_rtx (mode
);
2132 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2139 /* Emit an integral vector unpack operation. */
2142 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2144 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2145 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2148 /* Emit an integral vector widening sum operations. */
2151 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2156 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2158 wmode
= GET_MODE (operands
[0]);
2159 l
= gen_reg_rtx (wmode
);
2160 h
= gen_reg_rtx (wmode
);
2162 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2163 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2165 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2166 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2167 if (t
!= operands
[0])
2168 emit_move_insn (operands
[0], t
);
2171 /* Emit the appropriate sequence for a call. */
2174 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2179 addr
= XEXP (addr
, 0);
2180 addr
= convert_memory_address (DImode
, addr
);
2181 b0
= gen_rtx_REG (DImode
, R_BR (0));
2183 /* ??? Should do this for functions known to bind local too. */
2184 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2187 insn
= gen_sibcall_nogp (addr
);
2189 insn
= gen_call_nogp (addr
, b0
);
2191 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2192 insn
= emit_call_insn (insn
);
2197 insn
= gen_sibcall_gp (addr
);
2199 insn
= gen_call_gp (addr
, b0
);
2201 insn
= gen_call_value_gp (retval
, addr
, b0
);
2202 insn
= emit_call_insn (insn
);
2204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2208 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2210 if (TARGET_ABI_OPEN_VMS
)
2211 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2212 gen_rtx_REG (DImode
, GR_REG (25)));
2216 reg_emitted (enum ia64_frame_regs r
)
2218 if (emitted_frame_related_regs
[r
] == 0)
2219 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2221 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2225 get_reg (enum ia64_frame_regs r
)
2228 return current_frame_info
.r
[r
];
2232 is_emitted (int regno
)
2236 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2237 if (emitted_frame_related_regs
[r
] == regno
)
2243 ia64_reload_gp (void)
2247 if (current_frame_info
.r
[reg_save_gp
])
2249 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2253 HOST_WIDE_INT offset
;
2256 offset
= (current_frame_info
.spill_cfa_off
2257 + current_frame_info
.spill_size
);
2258 if (frame_pointer_needed
)
2260 tmp
= hard_frame_pointer_rtx
;
2265 tmp
= stack_pointer_rtx
;
2266 offset
= current_frame_info
.total_size
- offset
;
2269 offset_r
= GEN_INT (offset
);
2270 if (satisfies_constraint_I (offset_r
))
2271 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2274 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2275 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2276 pic_offset_table_rtx
, tmp
));
2279 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2282 emit_move_insn (pic_offset_table_rtx
, tmp
);
2286 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2287 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2290 bool is_desc
= false;
2292 /* If we find we're calling through a register, then we're actually
2293 calling through a descriptor, so load up the values. */
2294 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2299 /* ??? We are currently constrained to *not* use peep2, because
2300 we can legitimately change the global lifetime of the GP
2301 (in the form of killing where previously live). This is
2302 because a call through a descriptor doesn't use the previous
2303 value of the GP, while a direct call does, and we do not
2304 commit to either form until the split here.
2306 That said, this means that we lack precise life info for
2307 whether ADDR is dead after this call. This is not terribly
2308 important, since we can fix things up essentially for free
2309 with the POST_DEC below, but it's nice to not use it when we
2310 can immediately tell it's not necessary. */
2311 addr_dead_p
= ((noreturn_p
|| sibcall_p
2312 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2314 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2316 /* Load the code address into scratch_b. */
2317 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2318 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2319 emit_move_insn (scratch_r
, tmp
);
2320 emit_move_insn (scratch_b
, scratch_r
);
2322 /* Load the GP address. If ADDR is not dead here, then we must
2323 revert the change made above via the POST_INCREMENT. */
2325 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2328 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2329 emit_move_insn (pic_offset_table_rtx
, tmp
);
2336 insn
= gen_sibcall_nogp (addr
);
2338 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2340 insn
= gen_call_nogp (addr
, retaddr
);
2341 emit_call_insn (insn
);
2343 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2347 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2349 This differs from the generic code in that we know about the zero-extending
2350 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2351 also know that ld.acq+cmpxchg.rel equals a full barrier.
2353 The loop we want to generate looks like
2358 new_reg = cmp_reg op val;
2359 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2360 if (cmp_reg != old_reg)
2363 Note that we only do the plain load from memory once. Subsequent
2364 iterations use the value loaded by the compare-and-swap pattern. */
2367 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2368 rtx old_dst
, rtx new_dst
, enum memmodel model
)
2370 machine_mode mode
= GET_MODE (mem
);
2371 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2372 enum insn_code icode
;
2374 /* Special case for using fetchadd. */
2375 if ((mode
== SImode
|| mode
== DImode
)
2376 && (code
== PLUS
|| code
== MINUS
)
2377 && fetchadd_operand (val
, mode
))
2380 val
= GEN_INT (-INTVAL (val
));
2383 old_dst
= gen_reg_rtx (mode
);
2387 case MEMMODEL_ACQ_REL
:
2388 case MEMMODEL_SEQ_CST
:
2389 emit_insn (gen_memory_barrier ());
2391 case MEMMODEL_RELAXED
:
2392 case MEMMODEL_ACQUIRE
:
2393 case MEMMODEL_CONSUME
:
2395 icode
= CODE_FOR_fetchadd_acq_si
;
2397 icode
= CODE_FOR_fetchadd_acq_di
;
2399 case MEMMODEL_RELEASE
:
2401 icode
= CODE_FOR_fetchadd_rel_si
;
2403 icode
= CODE_FOR_fetchadd_rel_di
;
2410 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2414 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2416 if (new_reg
!= new_dst
)
2417 emit_move_insn (new_dst
, new_reg
);
2422 /* Because of the volatile mem read, we get an ld.acq, which is the
2423 front half of the full barrier. The end half is the cmpxchg.rel.
2424 For relaxed and release memory models, we don't need this. But we
2425 also don't bother trying to prevent it either. */
2426 gcc_assert (model
== MEMMODEL_RELAXED
2427 || model
== MEMMODEL_RELEASE
2428 || MEM_VOLATILE_P (mem
));
2430 old_reg
= gen_reg_rtx (DImode
);
2431 cmp_reg
= gen_reg_rtx (DImode
);
2432 label
= gen_label_rtx ();
2436 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2437 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2440 emit_move_insn (cmp_reg
, mem
);
2444 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2445 emit_move_insn (old_reg
, cmp_reg
);
2446 emit_move_insn (ar_ccv
, cmp_reg
);
2449 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2454 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2455 true, OPTAB_DIRECT
);
2456 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2459 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2460 true, OPTAB_DIRECT
);
2463 new_reg
= gen_lowpart (mode
, new_reg
);
2465 emit_move_insn (new_dst
, new_reg
);
2469 case MEMMODEL_RELAXED
:
2470 case MEMMODEL_ACQUIRE
:
2471 case MEMMODEL_CONSUME
:
2474 case QImode
: icode
= CODE_FOR_cmpxchg_acq_qi
; break;
2475 case HImode
: icode
= CODE_FOR_cmpxchg_acq_hi
; break;
2476 case SImode
: icode
= CODE_FOR_cmpxchg_acq_si
; break;
2477 case DImode
: icode
= CODE_FOR_cmpxchg_acq_di
; break;
2483 case MEMMODEL_RELEASE
:
2484 case MEMMODEL_ACQ_REL
:
2485 case MEMMODEL_SEQ_CST
:
2488 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2489 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2490 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2491 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2501 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2503 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2506 /* Begin the assembly file. */
2509 ia64_file_start (void)
2511 default_file_start ();
2512 emit_safe_across_calls ();
2516 emit_safe_across_calls (void)
2518 unsigned int rs
, re
;
2525 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2529 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2533 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2537 fputc (',', asm_out_file
);
2539 fprintf (asm_out_file
, "p%u", rs
);
2541 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2545 fputc ('\n', asm_out_file
);
2548 /* Globalize a declaration. */
2551 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2553 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2554 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2557 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2558 const char *p
= TREE_STRING_POINTER (v
);
2559 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2561 targetm
.asm_out
.globalize_label (stream
, name
);
2562 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2563 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2566 /* Helper function for ia64_compute_frame_size: find an appropriate general
2567 register to spill some special register to. SPECIAL_SPILL_MASK contains
2568 bits in GR0 to GR31 that have already been allocated by this routine.
2569 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2572 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2576 if (emitted_frame_related_regs
[r
] != 0)
2578 regno
= emitted_frame_related_regs
[r
];
2579 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2580 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2581 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2582 else if (crtl
->is_leaf
2583 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2584 current_frame_info
.gr_used_mask
|= 1 << regno
;
2589 /* If this is a leaf function, first try an otherwise unused
2590 call-clobbered register. */
2593 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2594 if (! df_regs_ever_live_p (regno
)
2595 && call_used_regs
[regno
]
2596 && ! fixed_regs
[regno
]
2597 && ! global_regs
[regno
]
2598 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2599 && ! is_emitted (regno
))
2601 current_frame_info
.gr_used_mask
|= 1 << regno
;
2608 regno
= current_frame_info
.n_local_regs
;
2609 /* If there is a frame pointer, then we can't use loc79, because
2610 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2611 reg_name switching code in ia64_expand_prologue. */
2612 while (regno
< (80 - frame_pointer_needed
))
2613 if (! is_emitted (LOC_REG (regno
++)))
2615 current_frame_info
.n_local_regs
= regno
;
2616 return LOC_REG (regno
- 1);
2620 /* Failed to find a general register to spill to. Must use stack. */
2624 /* In order to make for nice schedules, we try to allocate every temporary
2625 to a different register. We must of course stay away from call-saved,
2626 fixed, and global registers. We must also stay away from registers
2627 allocated in current_frame_info.gr_used_mask, since those include regs
2628 used all through the prologue.
2630 Any register allocated here must be used immediately. The idea is to
2631 aid scheduling, not to solve data flow problems. */
2633 static int last_scratch_gr_reg
;
2636 next_scratch_gr_reg (void)
2640 for (i
= 0; i
< 32; ++i
)
2642 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2643 if (call_used_regs
[regno
]
2644 && ! fixed_regs
[regno
]
2645 && ! global_regs
[regno
]
2646 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2648 last_scratch_gr_reg
= regno
;
2653 /* There must be _something_ available. */
2657 /* Helper function for ia64_compute_frame_size, called through
2658 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2661 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2663 unsigned int regno
= REGNO (reg
);
2666 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2667 for (i
= 0; i
< n
; ++i
)
2668 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2673 /* Returns the number of bytes offset between the frame pointer and the stack
2674 pointer for the current function. SIZE is the number of bytes of space
2675 needed for local variables. */
2678 ia64_compute_frame_size (HOST_WIDE_INT size
)
2680 HOST_WIDE_INT total_size
;
2681 HOST_WIDE_INT spill_size
= 0;
2682 HOST_WIDE_INT extra_spill_size
= 0;
2683 HOST_WIDE_INT pretend_args_size
;
2686 int spilled_gr_p
= 0;
2687 int spilled_fr_p
= 0;
2693 if (current_frame_info
.initialized
)
2696 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2697 CLEAR_HARD_REG_SET (mask
);
2699 /* Don't allocate scratches to the return register. */
2700 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2702 /* Don't allocate scratches to the EH scratch registers. */
2703 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2704 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2705 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2706 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2708 /* Static stack checking uses r2 and r3. */
2709 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
2710 current_frame_info
.gr_used_mask
|= 0xc;
2712 /* Find the size of the register stack frame. We have only 80 local
2713 registers, because we reserve 8 for the inputs and 8 for the
2716 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2717 since we'll be adjusting that down later. */
2718 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2719 for (; regno
>= LOC_REG (0); regno
--)
2720 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2722 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2724 /* For functions marked with the syscall_linkage attribute, we must mark
2725 all eight input registers as in use, so that locals aren't visible to
2728 if (cfun
->machine
->n_varargs
> 0
2729 || lookup_attribute ("syscall_linkage",
2730 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2731 current_frame_info
.n_input_regs
= 8;
2734 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2735 if (df_regs_ever_live_p (regno
))
2737 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2740 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2741 if (df_regs_ever_live_p (regno
))
2743 i
= regno
- OUT_REG (0) + 1;
2745 #ifndef PROFILE_HOOK
2746 /* When -p profiling, we need one output register for the mcount argument.
2747 Likewise for -a profiling for the bb_init_func argument. For -ax
2748 profiling, we need two output registers for the two bb_init_trace_func
2753 current_frame_info
.n_output_regs
= i
;
2755 /* ??? No rotating register support yet. */
2756 current_frame_info
.n_rotate_regs
= 0;
2758 /* Discover which registers need spilling, and how much room that
2759 will take. Begin with floating point and general registers,
2760 which will always wind up on the stack. */
2762 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2763 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2765 SET_HARD_REG_BIT (mask
, regno
);
2771 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2772 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2774 SET_HARD_REG_BIT (mask
, regno
);
2780 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2781 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2783 SET_HARD_REG_BIT (mask
, regno
);
2788 /* Now come all special registers that might get saved in other
2789 general registers. */
2791 if (frame_pointer_needed
)
2793 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2794 /* If we did not get a register, then we take LOC79. This is guaranteed
2795 to be free, even if regs_ever_live is already set, because this is
2796 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2797 as we don't count loc79 above. */
2798 if (current_frame_info
.r
[reg_fp
] == 0)
2800 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2801 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2805 if (! crtl
->is_leaf
)
2807 /* Emit a save of BR0 if we call other functions. Do this even
2808 if this function doesn't return, as EH depends on this to be
2809 able to unwind the stack. */
2810 SET_HARD_REG_BIT (mask
, BR_REG (0));
2812 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2813 if (current_frame_info
.r
[reg_save_b0
] == 0)
2815 extra_spill_size
+= 8;
2819 /* Similarly for ar.pfs. */
2820 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2821 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2822 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2824 extra_spill_size
+= 8;
2828 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2829 registers are clobbered, so we fall back to the stack. */
2830 current_frame_info
.r
[reg_save_gp
]
2831 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2832 if (current_frame_info
.r
[reg_save_gp
] == 0)
2834 SET_HARD_REG_BIT (mask
, GR_REG (1));
2841 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2843 SET_HARD_REG_BIT (mask
, BR_REG (0));
2844 extra_spill_size
+= 8;
2848 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2850 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2851 current_frame_info
.r
[reg_save_ar_pfs
]
2852 = find_gr_spill (reg_save_ar_pfs
, 1);
2853 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2855 extra_spill_size
+= 8;
2861 /* Unwind descriptor hackery: things are most efficient if we allocate
2862 consecutive GR save registers for RP, PFS, FP in that order. However,
2863 it is absolutely critical that FP get the only hard register that's
2864 guaranteed to be free, so we allocated it first. If all three did
2865 happen to be allocated hard regs, and are consecutive, rearrange them
2866 into the preferred order now.
2868 If we have already emitted code for any of those registers,
2869 then it's already too late to change. */
2870 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2871 MIN (current_frame_info
.r
[reg_save_b0
],
2872 current_frame_info
.r
[reg_save_ar_pfs
]));
2873 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2874 MAX (current_frame_info
.r
[reg_save_b0
],
2875 current_frame_info
.r
[reg_save_ar_pfs
]));
2877 && min_regno
+ 2 == max_regno
2878 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2879 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2880 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2881 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2882 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2883 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2884 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2885 && (emitted_frame_related_regs
[reg_fp
] == 0
2886 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2888 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2889 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2890 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2893 /* See if we need to store the predicate register block. */
2894 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2895 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2897 if (regno
<= PR_REG (63))
2899 SET_HARD_REG_BIT (mask
, PR_REG (0));
2900 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2901 if (current_frame_info
.r
[reg_save_pr
] == 0)
2903 extra_spill_size
+= 8;
2907 /* ??? Mark them all as used so that register renaming and such
2908 are free to use them. */
2909 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2910 df_set_regs_ever_live (regno
, true);
2913 /* If we're forced to use st8.spill, we're forced to save and restore
2914 ar.unat as well. The check for existing liveness allows inline asm
2915 to touch ar.unat. */
2916 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2917 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2919 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2920 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2921 current_frame_info
.r
[reg_save_ar_unat
]
2922 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2923 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2925 extra_spill_size
+= 8;
2930 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2932 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2933 current_frame_info
.r
[reg_save_ar_lc
]
2934 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2935 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2937 extra_spill_size
+= 8;
2942 /* If we have an odd number of words of pretend arguments written to
2943 the stack, then the FR save area will be unaligned. We round the
2944 size of this area up to keep things 16 byte aligned. */
2946 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2948 pretend_args_size
= crtl
->args
.pretend_args_size
;
2950 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2951 + crtl
->outgoing_args_size
);
2952 total_size
= IA64_STACK_ALIGN (total_size
);
2954 /* We always use the 16-byte scratch area provided by the caller, but
2955 if we are a leaf function, there's no one to which we need to provide
2956 a scratch area. However, if the function allocates dynamic stack space,
2957 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2958 so we need to cope. */
2959 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
2960 total_size
= MAX (0, total_size
- 16);
2962 current_frame_info
.total_size
= total_size
;
2963 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2964 current_frame_info
.spill_size
= spill_size
;
2965 current_frame_info
.extra_spill_size
= extra_spill_size
;
2966 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2967 current_frame_info
.n_spilled
= n_spilled
;
2968 current_frame_info
.initialized
= reload_completed
;
2971 /* Worker function for TARGET_CAN_ELIMINATE. */
2974 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2976 return (to
== BR_REG (0) ? crtl
->is_leaf
: true);
2979 /* Compute the initial difference between the specified pair of registers. */
2982 ia64_initial_elimination_offset (int from
, int to
)
2984 HOST_WIDE_INT offset
;
2986 ia64_compute_frame_size (get_frame_size ());
2989 case FRAME_POINTER_REGNUM
:
2992 case HARD_FRAME_POINTER_REGNUM
:
2993 offset
= -current_frame_info
.total_size
;
2994 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2995 offset
+= 16 + crtl
->outgoing_args_size
;
2998 case STACK_POINTER_REGNUM
:
3000 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
3001 offset
+= 16 + crtl
->outgoing_args_size
;
3009 case ARG_POINTER_REGNUM
:
3010 /* Arguments start above the 16 byte save area, unless stdarg
3011 in which case we store through the 16 byte save area. */
3014 case HARD_FRAME_POINTER_REGNUM
:
3015 offset
= 16 - crtl
->args
.pretend_args_size
;
3018 case STACK_POINTER_REGNUM
:
3019 offset
= (current_frame_info
.total_size
3020 + 16 - crtl
->args
.pretend_args_size
);
3035 /* If there are more than a trivial number of register spills, we use
3036 two interleaved iterators so that we can get two memory references
3039 In order to simplify things in the prologue and epilogue expanders,
3040 we use helper functions to fix up the memory references after the
3041 fact with the appropriate offsets to a POST_MODIFY memory mode.
3042 The following data structure tracks the state of the two iterators
3043 while insns are being emitted. */
3045 struct spill_fill_data
3047 rtx_insn
*init_after
; /* point at which to emit initializations */
3048 rtx init_reg
[2]; /* initial base register */
3049 rtx iter_reg
[2]; /* the iterator registers */
3050 rtx
*prev_addr
[2]; /* address of last memory use */
3051 rtx_insn
*prev_insn
[2]; /* the insn corresponding to prev_addr */
3052 HOST_WIDE_INT prev_off
[2]; /* last offset */
3053 int n_iter
; /* number of iterators in use */
3054 int next_iter
; /* next iterator to use */
3055 unsigned int save_gr_used_mask
;
3058 static struct spill_fill_data spill_fill_data
;
3061 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
3065 spill_fill_data
.init_after
= get_last_insn ();
3066 spill_fill_data
.init_reg
[0] = init_reg
;
3067 spill_fill_data
.init_reg
[1] = init_reg
;
3068 spill_fill_data
.prev_addr
[0] = NULL
;
3069 spill_fill_data
.prev_addr
[1] = NULL
;
3070 spill_fill_data
.prev_insn
[0] = NULL
;
3071 spill_fill_data
.prev_insn
[1] = NULL
;
3072 spill_fill_data
.prev_off
[0] = cfa_off
;
3073 spill_fill_data
.prev_off
[1] = cfa_off
;
3074 spill_fill_data
.next_iter
= 0;
3075 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
3077 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
3078 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
3080 int regno
= next_scratch_gr_reg ();
3081 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
3082 current_frame_info
.gr_used_mask
|= 1 << regno
;
3087 finish_spill_pointers (void)
3089 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
3093 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
3095 int iter
= spill_fill_data
.next_iter
;
3096 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
3097 rtx disp_rtx
= GEN_INT (disp
);
3100 if (spill_fill_data
.prev_addr
[iter
])
3102 if (satisfies_constraint_N (disp_rtx
))
3104 *spill_fill_data
.prev_addr
[iter
]
3105 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
3106 gen_rtx_PLUS (DImode
,
3107 spill_fill_data
.iter_reg
[iter
],
3109 add_reg_note (spill_fill_data
.prev_insn
[iter
],
3110 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3114 /* ??? Could use register post_modify for loads. */
3115 if (!satisfies_constraint_I (disp_rtx
))
3117 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3118 emit_move_insn (tmp
, disp_rtx
);
3121 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3122 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3125 /* Micro-optimization: if we've created a frame pointer, it's at
3126 CFA 0, which may allow the real iterator to be initialized lower,
3127 slightly increasing parallelism. Also, if there are few saves
3128 it may eliminate the iterator entirely. */
3130 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3131 && frame_pointer_needed
)
3133 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3134 set_mem_alias_set (mem
, get_varargs_alias_set ());
3143 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3144 spill_fill_data
.init_reg
[iter
]);
3149 if (!satisfies_constraint_I (disp_rtx
))
3151 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3152 emit_move_insn (tmp
, disp_rtx
);
3156 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3157 spill_fill_data
.init_reg
[iter
],
3164 /* Careful for being the first insn in a sequence. */
3165 if (spill_fill_data
.init_after
)
3166 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3169 rtx_insn
*first
= get_insns ();
3171 insn
= emit_insn_before (seq
, first
);
3173 insn
= emit_insn (seq
);
3175 spill_fill_data
.init_after
= insn
;
3178 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3180 /* ??? Not all of the spills are for varargs, but some of them are.
3181 The rest of the spills belong in an alias set of their own. But
3182 it doesn't actually hurt to include them here. */
3183 set_mem_alias_set (mem
, get_varargs_alias_set ());
3185 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3186 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3188 if (++iter
>= spill_fill_data
.n_iter
)
3190 spill_fill_data
.next_iter
= iter
;
3196 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3199 int iter
= spill_fill_data
.next_iter
;
3203 mem
= spill_restore_mem (reg
, cfa_off
);
3204 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3205 spill_fill_data
.prev_insn
[iter
] = insn
;
3212 RTX_FRAME_RELATED_P (insn
) = 1;
3214 /* Don't even pretend that the unwind code can intuit its way
3215 through a pair of interleaved post_modify iterators. Just
3216 provide the correct answer. */
3218 if (frame_pointer_needed
)
3220 base
= hard_frame_pointer_rtx
;
3225 base
= stack_pointer_rtx
;
3226 off
= current_frame_info
.total_size
- cfa_off
;
3229 add_reg_note (insn
, REG_CFA_OFFSET
,
3230 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg
),
3231 plus_constant (Pmode
,
3238 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3240 int iter
= spill_fill_data
.next_iter
;
3243 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3244 GEN_INT (cfa_off
)));
3245 spill_fill_data
.prev_insn
[iter
] = insn
;
3248 /* Wrapper functions that discards the CONST_INT spill offset. These
3249 exist so that we can give gr_spill/gr_fill the offset they need and
3250 use a consistent function interface. */
3253 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3255 return gen_movdi (dest
, src
);
3259 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3261 return gen_fr_spill (dest
, src
);
3265 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3267 return gen_fr_restore (dest
, src
);
3270 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3272 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3273 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3275 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3276 inclusive. These are offsets from the current stack pointer. BS_SIZE
3277 is the size of the backing store. ??? This clobbers r2 and r3. */
3280 ia64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
3283 rtx r2
= gen_rtx_REG (Pmode
, GR_REG (2));
3284 rtx r3
= gen_rtx_REG (Pmode
, GR_REG (3));
3285 rtx p6
= gen_rtx_REG (BImode
, PR_REG (6));
3287 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3288 of the Register Stack Engine. We also need to probe it after checking
3289 that the 2 stacks don't overlap. */
3290 emit_insn (gen_bsp_value (r3
));
3291 emit_move_insn (r2
, GEN_INT (-(first
+ size
)));
3293 /* Compare current value of BSP and SP registers. */
3294 emit_insn (gen_rtx_SET (p6
, gen_rtx_fmt_ee (LTU
, BImode
,
3295 r3
, stack_pointer_rtx
)));
3297 /* Compute the address of the probe for the Backing Store (which grows
3298 towards higher addresses). We probe only at the first offset of
3299 the next page because some OS (eg Linux/ia64) only extend the
3300 backing store when this specific address is hit (but generate a SEGV
3301 on other address). Page size is the worst case (4KB). The reserve
3302 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3303 Also compute the address of the last probe for the memory stack
3304 (which grows towards lower addresses). */
3305 emit_insn (gen_rtx_SET (r3
, plus_constant (Pmode
, r3
, 4095)));
3306 emit_insn (gen_rtx_SET (r2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3308 /* Compare them and raise SEGV if the former has topped the latter. */
3309 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3310 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3311 gen_rtx_SET (p6
, gen_rtx_fmt_ee (GEU
, BImode
,
3313 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode
, r3
, GEN_INT (12),
3316 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3317 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3318 gen_rtx_TRAP_IF (VOIDmode
, const1_rtx
,
3321 /* Probe the Backing Store if necessary. */
3323 emit_stack_probe (r3
);
3325 /* Probe the memory stack if necessary. */
3329 /* See if we have a constant small number of probes to generate. If so,
3330 that's the easy case. */
3331 else if (size
<= PROBE_INTERVAL
)
3332 emit_stack_probe (r2
);
3334 /* The run-time loop is made up of 8 insns in the generic case while this
3335 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3336 else if (size
<= 4 * PROBE_INTERVAL
)
3340 emit_move_insn (r2
, GEN_INT (-(first
+ PROBE_INTERVAL
)));
3341 emit_insn (gen_rtx_SET (r2
,
3342 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3343 emit_stack_probe (r2
);
3345 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3346 it exceeds SIZE. If only two probes are needed, this will not
3347 generate any code. Then probe at FIRST + SIZE. */
3348 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
3350 emit_insn (gen_rtx_SET (r2
,
3351 plus_constant (Pmode
, r2
, -PROBE_INTERVAL
)));
3352 emit_stack_probe (r2
);
3355 emit_insn (gen_rtx_SET (r2
,
3356 plus_constant (Pmode
, r2
,
3357 (i
- PROBE_INTERVAL
) - size
)));
3358 emit_stack_probe (r2
);
3361 /* Otherwise, do the same as above, but in a loop. Note that we must be
3362 extra careful with variables wrapping around because we might be at
3363 the very top (or the very bottom) of the address space and we have
3364 to be able to handle this case properly; in particular, we use an
3365 equality test for the loop condition. */
3368 HOST_WIDE_INT rounded_size
;
3370 emit_move_insn (r2
, GEN_INT (-first
));
3373 /* Step 1: round SIZE to the previous multiple of the interval. */
3375 rounded_size
= size
& -PROBE_INTERVAL
;
3378 /* Step 2: compute initial and final value of the loop counter. */
3380 /* TEST_ADDR = SP + FIRST. */
3381 emit_insn (gen_rtx_SET (r2
,
3382 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3384 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3385 if (rounded_size
> (1 << 21))
3387 emit_move_insn (r3
, GEN_INT (-rounded_size
));
3388 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
, r3
)));
3391 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
,
3392 GEN_INT (-rounded_size
))));
3397 while (TEST_ADDR != LAST_ADDR)
3399 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3403 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3404 until it is equal to ROUNDED_SIZE. */
3406 emit_insn (gen_probe_stack_range (r2
, r2
, r3
));
3409 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3410 that SIZE is equal to ROUNDED_SIZE. */
3412 /* TEMP = SIZE - ROUNDED_SIZE. */
3413 if (size
!= rounded_size
)
3415 emit_insn (gen_rtx_SET (r2
, plus_constant (Pmode
, r2
,
3416 rounded_size
- size
)));
3417 emit_stack_probe (r2
);
3421 /* Make sure nothing is scheduled before we are done. */
3422 emit_insn (gen_blockage ());
3425 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3426 absolute addresses. */
3429 output_probe_stack_range (rtx reg1
, rtx reg2
)
3431 static int labelno
= 0;
3432 char loop_lab
[32], end_lab
[32];
3435 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
3436 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
3438 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
3440 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3443 xops
[2] = gen_rtx_REG (BImode
, PR_REG (6));
3444 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops
);
3445 fprintf (asm_out_file
, "\t(%s) br.cond.dpnt ", reg_names
[REGNO (xops
[2])]);
3446 assemble_name_raw (asm_out_file
, end_lab
);
3447 fputc ('\n', asm_out_file
);
3449 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3450 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
3451 output_asm_insn ("addl %0 = %1, %0", xops
);
3452 fputs ("\t;;\n", asm_out_file
);
3454 /* Probe at TEST_ADDR and branch. */
3455 output_asm_insn ("probe.w.fault %0, 0", xops
);
3456 fprintf (asm_out_file
, "\tbr ");
3457 assemble_name_raw (asm_out_file
, loop_lab
);
3458 fputc ('\n', asm_out_file
);
3460 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
3465 /* Called after register allocation to add any instructions needed for the
3466 prologue. Using a prologue insn is favored compared to putting all of the
3467 instructions in output_function_prologue(), since it allows the scheduler
3468 to intermix instructions with the saves of the caller saved registers. In
3469 some cases, it might be necessary to emit a barrier instruction as the last
3470 insn to prevent such scheduling.
3472 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3473 so that the debug info generation code can handle them properly.
3475 The register save area is laid out like so:
3477 [ varargs spill area ]
3478 [ fr register spill area ]
3479 [ br register spill area ]
3480 [ ar register spill area ]
3481 [ pr register spill area ]
3482 [ gr register spill area ] */
3484 /* ??? Get inefficient code when the frame size is larger than can fit in an
3485 adds instruction. */
3488 ia64_expand_prologue (void)
3491 rtx ar_pfs_save_reg
, ar_unat_save_reg
;
3492 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3495 ia64_compute_frame_size (get_frame_size ());
3496 last_scratch_gr_reg
= 15;
3498 if (flag_stack_usage_info
)
3499 current_function_static_stack_size
= current_frame_info
.total_size
;
3501 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3503 HOST_WIDE_INT size
= current_frame_info
.total_size
;
3504 int bs_size
= BACKING_STORE_SIZE (current_frame_info
.n_input_regs
3505 + current_frame_info
.n_local_regs
);
3507 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3509 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
3510 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
,
3511 size
- STACK_CHECK_PROTECT
,
3513 else if (size
+ bs_size
> STACK_CHECK_PROTECT
)
3514 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, 0, bs_size
);
3516 else if (size
+ bs_size
> 0)
3517 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
, bs_size
);
3522 fprintf (dump_file
, "ia64 frame related registers "
3523 "recorded in current_frame_info.r[]:\n");
3524 #define PRINTREG(a) if (current_frame_info.r[a]) \
3525 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3527 PRINTREG(reg_save_b0
);
3528 PRINTREG(reg_save_pr
);
3529 PRINTREG(reg_save_ar_pfs
);
3530 PRINTREG(reg_save_ar_unat
);
3531 PRINTREG(reg_save_ar_lc
);
3532 PRINTREG(reg_save_gp
);
3536 /* If there is no epilogue, then we don't need some prologue insns.
3537 We need to avoid emitting the dead prologue insns, because flow
3538 will complain about them. */
3544 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
3545 if ((e
->flags
& EDGE_FAKE
) == 0
3546 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3548 epilogue_p
= (e
!= NULL
);
3553 /* Set the local, input, and output register names. We need to do this
3554 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3555 half. If we use in/loc/out register names, then we get assembler errors
3556 in crtn.S because there is no alloc insn or regstk directive in there. */
3557 if (! TARGET_REG_NAMES
)
3559 int inputs
= current_frame_info
.n_input_regs
;
3560 int locals
= current_frame_info
.n_local_regs
;
3561 int outputs
= current_frame_info
.n_output_regs
;
3563 for (i
= 0; i
< inputs
; i
++)
3564 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3565 for (i
= 0; i
< locals
; i
++)
3566 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3567 for (i
= 0; i
< outputs
; i
++)
3568 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3571 /* Set the frame pointer register name. The regnum is logically loc79,
3572 but of course we'll not have allocated that many locals. Rather than
3573 worrying about renumbering the existing rtxs, we adjust the name. */
3574 /* ??? This code means that we can never use one local register when
3575 there is a frame pointer. loc79 gets wasted in this case, as it is
3576 renamed to a register that will never be used. See also the try_locals
3577 code in find_gr_spill. */
3578 if (current_frame_info
.r
[reg_fp
])
3580 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3581 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3582 = reg_names
[current_frame_info
.r
[reg_fp
]];
3583 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3586 /* We don't need an alloc instruction if we've used no outputs or locals. */
3587 if (current_frame_info
.n_local_regs
== 0
3588 && current_frame_info
.n_output_regs
== 0
3589 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3590 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3592 /* If there is no alloc, but there are input registers used, then we
3593 need a .regstk directive. */
3594 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3595 ar_pfs_save_reg
= NULL_RTX
;
3599 current_frame_info
.need_regstk
= 0;
3601 if (current_frame_info
.r
[reg_save_ar_pfs
])
3603 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3604 reg_emitted (reg_save_ar_pfs
);
3607 regno
= next_scratch_gr_reg ();
3608 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3610 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3611 GEN_INT (current_frame_info
.n_input_regs
),
3612 GEN_INT (current_frame_info
.n_local_regs
),
3613 GEN_INT (current_frame_info
.n_output_regs
),
3614 GEN_INT (current_frame_info
.n_rotate_regs
)));
3615 if (current_frame_info
.r
[reg_save_ar_pfs
])
3617 RTX_FRAME_RELATED_P (insn
) = 1;
3618 add_reg_note (insn
, REG_CFA_REGISTER
,
3619 gen_rtx_SET (ar_pfs_save_reg
,
3620 gen_rtx_REG (DImode
, AR_PFS_REGNUM
)));
3624 /* Set up frame pointer, stack pointer, and spill iterators. */
3626 n_varargs
= cfun
->machine
->n_varargs
;
3627 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3628 stack_pointer_rtx
, 0);
3630 if (frame_pointer_needed
)
3632 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3633 RTX_FRAME_RELATED_P (insn
) = 1;
3635 /* Force the unwind info to recognize this as defining a new CFA,
3636 rather than some temp register setup. */
3637 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3640 if (current_frame_info
.total_size
!= 0)
3642 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3645 if (satisfies_constraint_I (frame_size_rtx
))
3646 offset
= frame_size_rtx
;
3649 regno
= next_scratch_gr_reg ();
3650 offset
= gen_rtx_REG (DImode
, regno
);
3651 emit_move_insn (offset
, frame_size_rtx
);
3654 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3655 stack_pointer_rtx
, offset
));
3657 if (! frame_pointer_needed
)
3659 RTX_FRAME_RELATED_P (insn
) = 1;
3660 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3661 gen_rtx_SET (stack_pointer_rtx
,
3662 gen_rtx_PLUS (DImode
,
3667 /* ??? At this point we must generate a magic insn that appears to
3668 modify the stack pointer, the frame pointer, and all spill
3669 iterators. This would allow the most scheduling freedom. For
3670 now, just hard stop. */
3671 emit_insn (gen_blockage ());
3674 /* Must copy out ar.unat before doing any integer spills. */
3675 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3677 if (current_frame_info
.r
[reg_save_ar_unat
])
3680 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3681 reg_emitted (reg_save_ar_unat
);
3685 alt_regno
= next_scratch_gr_reg ();
3686 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3687 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3690 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3691 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3692 if (current_frame_info
.r
[reg_save_ar_unat
])
3694 RTX_FRAME_RELATED_P (insn
) = 1;
3695 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3698 /* Even if we're not going to generate an epilogue, we still
3699 need to save the register so that EH works. */
3700 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3701 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3704 ar_unat_save_reg
= NULL_RTX
;
3706 /* Spill all varargs registers. Do this before spilling any GR registers,
3707 since we want the UNAT bits for the GR registers to override the UNAT
3708 bits from varargs, which we don't care about. */
3711 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3713 reg
= gen_rtx_REG (DImode
, regno
);
3714 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3717 /* Locate the bottom of the register save area. */
3718 cfa_off
= (current_frame_info
.spill_cfa_off
3719 + current_frame_info
.spill_size
3720 + current_frame_info
.extra_spill_size
);
3722 /* Save the predicate register block either in a register or in memory. */
3723 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3725 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3726 if (current_frame_info
.r
[reg_save_pr
] != 0)
3728 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3729 reg_emitted (reg_save_pr
);
3730 insn
= emit_move_insn (alt_reg
, reg
);
3732 /* ??? Denote pr spill/fill by a DImode move that modifies all
3733 64 hard registers. */
3734 RTX_FRAME_RELATED_P (insn
) = 1;
3735 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3737 /* Even if we're not going to generate an epilogue, we still
3738 need to save the register so that EH works. */
3740 emit_insn (gen_prologue_use (alt_reg
));
3744 alt_regno
= next_scratch_gr_reg ();
3745 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3746 insn
= emit_move_insn (alt_reg
, reg
);
3747 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3752 /* Handle AR regs in numerical order. All of them get special handling. */
3753 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3754 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3756 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3757 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3761 /* The alloc insn already copied ar.pfs into a general register. The
3762 only thing we have to do now is copy that register to a stack slot
3763 if we'd not allocated a local register for the job. */
3764 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3765 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3767 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3768 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3772 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3774 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3775 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3777 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3778 reg_emitted (reg_save_ar_lc
);
3779 insn
= emit_move_insn (alt_reg
, reg
);
3780 RTX_FRAME_RELATED_P (insn
) = 1;
3781 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3783 /* Even if we're not going to generate an epilogue, we still
3784 need to save the register so that EH works. */
3786 emit_insn (gen_prologue_use (alt_reg
));
3790 alt_regno
= next_scratch_gr_reg ();
3791 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3792 emit_move_insn (alt_reg
, reg
);
3793 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3798 /* Save the return pointer. */
3799 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3801 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3802 if (current_frame_info
.r
[reg_save_b0
] != 0)
3804 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3805 reg_emitted (reg_save_b0
);
3806 insn
= emit_move_insn (alt_reg
, reg
);
3807 RTX_FRAME_RELATED_P (insn
) = 1;
3808 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (alt_reg
, pc_rtx
));
3810 /* Even if we're not going to generate an epilogue, we still
3811 need to save the register so that EH works. */
3813 emit_insn (gen_prologue_use (alt_reg
));
3817 alt_regno
= next_scratch_gr_reg ();
3818 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3819 emit_move_insn (alt_reg
, reg
);
3820 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3825 if (current_frame_info
.r
[reg_save_gp
])
3827 reg_emitted (reg_save_gp
);
3828 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3829 current_frame_info
.r
[reg_save_gp
]),
3830 pic_offset_table_rtx
);
3833 /* We should now be at the base of the gr/br/fr spill area. */
3834 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3835 + current_frame_info
.spill_size
));
3837 /* Spill all general registers. */
3838 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3839 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3841 reg
= gen_rtx_REG (DImode
, regno
);
3842 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3846 /* Spill the rest of the BR registers. */
3847 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3848 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3850 alt_regno
= next_scratch_gr_reg ();
3851 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3852 reg
= gen_rtx_REG (DImode
, regno
);
3853 emit_move_insn (alt_reg
, reg
);
3854 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3858 /* Align the frame and spill all FR registers. */
3859 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3860 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3862 gcc_assert (!(cfa_off
& 15));
3863 reg
= gen_rtx_REG (XFmode
, regno
);
3864 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3868 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3870 finish_spill_pointers ();
3873 /* Output the textual info surrounding the prologue. */
3876 ia64_start_function (FILE *file
, const char *fnname
,
3877 tree decl ATTRIBUTE_UNUSED
)
3879 #if TARGET_ABI_OPEN_VMS
3880 vms_start_function (fnname
);
3883 fputs ("\t.proc ", file
);
3884 assemble_name (file
, fnname
);
3886 ASM_OUTPUT_LABEL (file
, fnname
);
3889 /* Called after register allocation to add any instructions needed for the
3890 epilogue. Using an epilogue insn is favored compared to putting all of the
3891 instructions in output_function_prologue(), since it allows the scheduler
3892 to intermix instructions with the saves of the caller saved registers. In
3893 some cases, it might be necessary to emit a barrier instruction as the last
3894 insn to prevent such scheduling. */
3897 ia64_expand_epilogue (int sibcall_p
)
3900 rtx reg
, alt_reg
, ar_unat_save_reg
;
3901 int regno
, alt_regno
, cfa_off
;
3903 ia64_compute_frame_size (get_frame_size ());
3905 /* If there is a frame pointer, then we use it instead of the stack
3906 pointer, so that the stack pointer does not need to be valid when
3907 the epilogue starts. See EXIT_IGNORE_STACK. */
3908 if (frame_pointer_needed
)
3909 setup_spill_pointers (current_frame_info
.n_spilled
,
3910 hard_frame_pointer_rtx
, 0);
3912 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3913 current_frame_info
.total_size
);
3915 if (current_frame_info
.total_size
!= 0)
3917 /* ??? At this point we must generate a magic insn that appears to
3918 modify the spill iterators and the frame pointer. This would
3919 allow the most scheduling freedom. For now, just hard stop. */
3920 emit_insn (gen_blockage ());
3923 /* Locate the bottom of the register save area. */
3924 cfa_off
= (current_frame_info
.spill_cfa_off
3925 + current_frame_info
.spill_size
3926 + current_frame_info
.extra_spill_size
);
3928 /* Restore the predicate registers. */
3929 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3931 if (current_frame_info
.r
[reg_save_pr
] != 0)
3933 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3934 reg_emitted (reg_save_pr
);
3938 alt_regno
= next_scratch_gr_reg ();
3939 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3940 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3943 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3944 emit_move_insn (reg
, alt_reg
);
3947 /* Restore the application registers. */
3949 /* Load the saved unat from the stack, but do not restore it until
3950 after the GRs have been restored. */
3951 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3953 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3956 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3957 reg_emitted (reg_save_ar_unat
);
3961 alt_regno
= next_scratch_gr_reg ();
3962 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3963 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3964 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3969 ar_unat_save_reg
= NULL_RTX
;
3971 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3973 reg_emitted (reg_save_ar_pfs
);
3974 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3975 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3976 emit_move_insn (reg
, alt_reg
);
3978 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3980 alt_regno
= next_scratch_gr_reg ();
3981 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3982 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3984 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3985 emit_move_insn (reg
, alt_reg
);
3988 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3990 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3992 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3993 reg_emitted (reg_save_ar_lc
);
3997 alt_regno
= next_scratch_gr_reg ();
3998 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3999 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4002 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
4003 emit_move_insn (reg
, alt_reg
);
4006 /* Restore the return pointer. */
4007 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4009 if (current_frame_info
.r
[reg_save_b0
] != 0)
4011 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4012 reg_emitted (reg_save_b0
);
4016 alt_regno
= next_scratch_gr_reg ();
4017 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4018 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4021 reg
= gen_rtx_REG (DImode
, BR_REG (0));
4022 emit_move_insn (reg
, alt_reg
);
4025 /* We should now be at the base of the gr/br/fr spill area. */
4026 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
4027 + current_frame_info
.spill_size
));
4029 /* The GP may be stored on the stack in the prologue, but it's
4030 never restored in the epilogue. Skip the stack slot. */
4031 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
4034 /* Restore all general registers. */
4035 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
4036 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4038 reg
= gen_rtx_REG (DImode
, regno
);
4039 do_restore (gen_gr_restore
, reg
, cfa_off
);
4043 /* Restore the branch registers. */
4044 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
4045 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4047 alt_regno
= next_scratch_gr_reg ();
4048 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4049 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4051 reg
= gen_rtx_REG (DImode
, regno
);
4052 emit_move_insn (reg
, alt_reg
);
4055 /* Restore floating point registers. */
4056 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
4057 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4059 gcc_assert (!(cfa_off
& 15));
4060 reg
= gen_rtx_REG (XFmode
, regno
);
4061 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
4065 /* Restore ar.unat for real. */
4066 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
4068 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
4069 emit_move_insn (reg
, ar_unat_save_reg
);
4072 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
4074 finish_spill_pointers ();
4076 if (current_frame_info
.total_size
4077 || cfun
->machine
->ia64_eh_epilogue_sp
4078 || frame_pointer_needed
)
4080 /* ??? At this point we must generate a magic insn that appears to
4081 modify the spill iterators, the stack pointer, and the frame
4082 pointer. This would allow the most scheduling freedom. For now,
4084 emit_insn (gen_blockage ());
4087 if (cfun
->machine
->ia64_eh_epilogue_sp
)
4088 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
4089 else if (frame_pointer_needed
)
4091 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
4092 RTX_FRAME_RELATED_P (insn
) = 1;
4093 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
4095 else if (current_frame_info
.total_size
)
4097 rtx offset
, frame_size_rtx
;
4099 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
4100 if (satisfies_constraint_I (frame_size_rtx
))
4101 offset
= frame_size_rtx
;
4104 regno
= next_scratch_gr_reg ();
4105 offset
= gen_rtx_REG (DImode
, regno
);
4106 emit_move_insn (offset
, frame_size_rtx
);
4109 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
4112 RTX_FRAME_RELATED_P (insn
) = 1;
4113 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
4114 gen_rtx_SET (stack_pointer_rtx
,
4115 gen_rtx_PLUS (DImode
,
4120 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
4121 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
4124 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
4127 int fp
= GR_REG (2);
4128 /* We need a throw away register here, r0 and r1 are reserved,
4129 so r2 is the first available call clobbered register. If
4130 there was a frame_pointer register, we may have swapped the
4131 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4132 sure we're using the string "r2" when emitting the register
4133 name for the assembler. */
4134 if (current_frame_info
.r
[reg_fp
]
4135 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
4136 fp
= HARD_FRAME_POINTER_REGNUM
;
4138 /* We must emit an alloc to force the input registers to become output
4139 registers. Otherwise, if the callee tries to pass its parameters
4140 through to another call without an intervening alloc, then these
4142 /* ??? We don't need to preserve all input registers. We only need to
4143 preserve those input registers used as arguments to the sibling call.
4144 It is unclear how to compute that number here. */
4145 if (current_frame_info
.n_input_regs
!= 0)
4147 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
4149 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
4150 const0_rtx
, const0_rtx
,
4151 n_inputs
, const0_rtx
));
4152 RTX_FRAME_RELATED_P (insn
) = 1;
4154 /* ??? We need to mark the alloc as frame-related so that it gets
4155 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4156 But there's nothing dwarf2 related to be done wrt the register
4157 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4158 the empty parallel means dwarf2out will not see anything. */
4159 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4160 gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (0)));
4165 /* Return 1 if br.ret can do all the work required to return from a
4169 ia64_direct_return (void)
4171 if (reload_completed
&& ! frame_pointer_needed
)
4173 ia64_compute_frame_size (get_frame_size ());
4175 return (current_frame_info
.total_size
== 0
4176 && current_frame_info
.n_spilled
== 0
4177 && current_frame_info
.r
[reg_save_b0
] == 0
4178 && current_frame_info
.r
[reg_save_pr
] == 0
4179 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
4180 && current_frame_info
.r
[reg_save_ar_unat
] == 0
4181 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
4186 /* Return the magic cookie that we use to hold the return address
4187 during early compilation. */
4190 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
4194 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
4197 /* Split this value after reload, now that we know where the return
4198 address is saved. */
4201 ia64_split_return_addr_rtx (rtx dest
)
4205 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4207 if (current_frame_info
.r
[reg_save_b0
] != 0)
4209 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4210 reg_emitted (reg_save_b0
);
4218 /* Compute offset from CFA for BR0. */
4219 /* ??? Must be kept in sync with ia64_expand_prologue. */
4220 off
= (current_frame_info
.spill_cfa_off
4221 + current_frame_info
.spill_size
);
4222 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
4223 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4226 /* Convert CFA offset to a register based offset. */
4227 if (frame_pointer_needed
)
4228 src
= hard_frame_pointer_rtx
;
4231 src
= stack_pointer_rtx
;
4232 off
+= current_frame_info
.total_size
;
4235 /* Load address into scratch register. */
4236 off_r
= GEN_INT (off
);
4237 if (satisfies_constraint_I (off_r
))
4238 emit_insn (gen_adddi3 (dest
, src
, off_r
));
4241 emit_move_insn (dest
, off_r
);
4242 emit_insn (gen_adddi3 (dest
, src
, dest
));
4245 src
= gen_rtx_MEM (Pmode
, dest
);
4249 src
= gen_rtx_REG (DImode
, BR_REG (0));
4251 emit_move_insn (dest
, src
);
4255 ia64_hard_regno_rename_ok (int from
, int to
)
4257 /* Don't clobber any of the registers we reserved for the prologue. */
4260 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
4261 if (to
== current_frame_info
.r
[r
]
4262 || from
== current_frame_info
.r
[r
]
4263 || to
== emitted_frame_related_regs
[r
]
4264 || from
== emitted_frame_related_regs
[r
])
4267 /* Don't use output registers outside the register frame. */
4268 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
4271 /* Retain even/oddness on predicate register pairs. */
4272 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
4273 return (from
& 1) == (to
& 1);
4278 /* Target hook for assembling integer objects. Handle word-sized
4279 aligned objects and detect the cases when @fptr is needed. */
4282 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4284 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
4285 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
4286 && GET_CODE (x
) == SYMBOL_REF
4287 && SYMBOL_REF_FUNCTION_P (x
))
4289 static const char * const directive
[2][2] = {
4290 /* 64-bit pointer */ /* 32-bit pointer */
4291 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4292 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4294 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
4295 output_addr_const (asm_out_file
, x
);
4296 fputs (")\n", asm_out_file
);
4299 return default_assemble_integer (x
, size
, aligned_p
);
4302 /* Emit the function prologue. */
4305 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4307 int mask
, grsave
, grsave_prev
;
4309 if (current_frame_info
.need_regstk
)
4310 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
4311 current_frame_info
.n_input_regs
,
4312 current_frame_info
.n_local_regs
,
4313 current_frame_info
.n_output_regs
,
4314 current_frame_info
.n_rotate_regs
);
4316 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4319 /* Emit the .prologue directive. */
4322 grsave
= grsave_prev
= 0;
4323 if (current_frame_info
.r
[reg_save_b0
] != 0)
4326 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
4328 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
4329 && (grsave_prev
== 0
4330 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
4333 if (grsave_prev
== 0)
4334 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
4335 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4337 if (current_frame_info
.r
[reg_fp
] != 0
4338 && (grsave_prev
== 0
4339 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4342 if (grsave_prev
== 0)
4343 grsave
= HARD_FRAME_POINTER_REGNUM
;
4344 grsave_prev
= current_frame_info
.r
[reg_fp
];
4346 if (current_frame_info
.r
[reg_save_pr
] != 0
4347 && (grsave_prev
== 0
4348 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4351 if (grsave_prev
== 0)
4352 grsave
= current_frame_info
.r
[reg_save_pr
];
4355 if (mask
&& TARGET_GNU_AS
)
4356 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4357 ia64_dbx_register_number (grsave
));
4359 fputs ("\t.prologue\n", file
);
4361 /* Emit a .spill directive, if necessary, to relocate the base of
4362 the register spill area. */
4363 if (current_frame_info
.spill_cfa_off
!= -16)
4364 fprintf (file
, "\t.spill %ld\n",
4365 (long) (current_frame_info
.spill_cfa_off
4366 + current_frame_info
.spill_size
));
4369 /* Emit the .body directive at the scheduled end of the prologue. */
4372 ia64_output_function_end_prologue (FILE *file
)
4374 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4377 fputs ("\t.body\n", file
);
4380 /* Emit the function epilogue. */
4383 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4384 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4388 if (current_frame_info
.r
[reg_fp
])
4390 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4391 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4392 = reg_names
[current_frame_info
.r
[reg_fp
]];
4393 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4394 reg_emitted (reg_fp
);
4396 if (! TARGET_REG_NAMES
)
4398 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4399 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4400 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4401 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4402 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4403 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4406 current_frame_info
.initialized
= 0;
4410 ia64_dbx_register_number (int regno
)
4412 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4413 from its home at loc79 to something inside the register frame. We
4414 must perform the same renumbering here for the debug info. */
4415 if (current_frame_info
.r
[reg_fp
])
4417 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4418 regno
= current_frame_info
.r
[reg_fp
];
4419 else if (regno
== current_frame_info
.r
[reg_fp
])
4420 regno
= HARD_FRAME_POINTER_REGNUM
;
4423 if (IN_REGNO_P (regno
))
4424 return 32 + regno
- IN_REG (0);
4425 else if (LOC_REGNO_P (regno
))
4426 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4427 else if (OUT_REGNO_P (regno
))
4428 return (32 + current_frame_info
.n_input_regs
4429 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4434 /* Implement TARGET_TRAMPOLINE_INIT.
4436 The trampoline should set the static chain pointer to value placed
4437 into the trampoline and should branch to the specified routine.
4438 To make the normal indirect-subroutine calling convention work,
4439 the trampoline must look like a function descriptor; the first
4440 word being the target address and the second being the target's
4443 We abuse the concept of a global pointer by arranging for it
4444 to point to the data we need to load. The complete trampoline
4445 has the following form:
4447 +-------------------+ \
4448 TRAMP: | __ia64_trampoline | |
4449 +-------------------+ > fake function descriptor
4451 +-------------------+ /
4452 | target descriptor |
4453 +-------------------+
4455 +-------------------+
4459 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4461 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4462 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4464 /* The Intel assembler requires that the global __ia64_trampoline symbol
4465 be declared explicitly */
4468 static bool declared_ia64_trampoline
= false;
4470 if (!declared_ia64_trampoline
)
4472 declared_ia64_trampoline
= true;
4473 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4474 "__ia64_trampoline");
4478 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4479 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4480 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4481 static_chain
= convert_memory_address (Pmode
, static_chain
);
4483 /* Load up our iterator. */
4484 addr_reg
= copy_to_reg (addr
);
4485 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4487 /* The first two words are the fake descriptor:
4488 __ia64_trampoline, ADDR+16. */
4489 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4490 if (TARGET_ABI_OPEN_VMS
)
4492 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4493 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4494 relocation against function symbols to make it identical to the
4495 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4496 strict ELF and dereference to get the bare code address. */
4497 rtx reg
= gen_reg_rtx (Pmode
);
4498 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4499 emit_move_insn (reg
, tramp
);
4500 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4503 emit_move_insn (m_tramp
, tramp
);
4504 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4505 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4507 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (Pmode
, addr
, 16)));
4508 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4509 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4511 /* The third word is the target descriptor. */
4512 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4513 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4514 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4516 /* The fourth word is the static chain. */
4517 emit_move_insn (m_tramp
, static_chain
);
4520 /* Do any needed setup for a variadic function. CUM has not been updated
4521 for the last named argument which has type TYPE and mode MODE.
4523 We generate the actual spill instructions during prologue generation. */
4526 ia64_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4527 tree type
, int * pretend_size
,
4528 int second_time ATTRIBUTE_UNUSED
)
4530 CUMULATIVE_ARGS next_cum
= *get_cumulative_args (cum
);
4532 /* Skip the current argument. */
4533 ia64_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
, 1);
4535 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4537 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4538 *pretend_size
= n
* UNITS_PER_WORD
;
4539 cfun
->machine
->n_varargs
= n
;
4543 /* Check whether TYPE is a homogeneous floating point aggregate. If
4544 it is, return the mode of the floating point type that appears
4545 in all leafs. If it is not, return VOIDmode.
4547 An aggregate is a homogeneous floating point aggregate is if all
4548 fields/elements in it have the same floating point type (e.g,
4549 SFmode). 128-bit quad-precision floats are excluded.
4551 Variable sized aggregates should never arrive here, since we should
4552 have already decided to pass them by reference. Top-level zero-sized
4553 aggregates are excluded because our parallels crash the middle-end. */
4556 hfa_element_mode (const_tree type
, bool nested
)
4558 machine_mode element_mode
= VOIDmode
;
4560 enum tree_code code
= TREE_CODE (type
);
4561 int know_element_mode
= 0;
4564 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4569 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4570 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4571 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4572 case LANG_TYPE
: case FUNCTION_TYPE
:
4575 /* Fortran complex types are supposed to be HFAs, so we need to handle
4576 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4579 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4580 && TYPE_MODE (type
) != TCmode
)
4581 return GET_MODE_INNER (TYPE_MODE (type
));
4586 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4587 mode if this is contained within an aggregate. */
4588 if (nested
&& TYPE_MODE (type
) != TFmode
)
4589 return TYPE_MODE (type
);
4594 return hfa_element_mode (TREE_TYPE (type
), 1);
4598 case QUAL_UNION_TYPE
:
4599 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4601 if (TREE_CODE (t
) != FIELD_DECL
)
4604 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4605 if (know_element_mode
)
4607 if (mode
!= element_mode
)
4610 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4614 know_element_mode
= 1;
4615 element_mode
= mode
;
4618 return element_mode
;
4621 /* If we reach here, we probably have some front-end specific type
4622 that the backend doesn't know about. This can happen via the
4623 aggregate_value_p call in init_function_start. All we can do is
4624 ignore unknown tree types. */
4631 /* Return the number of words required to hold a quantity of TYPE and MODE
4632 when passed as an argument. */
4634 ia64_function_arg_words (const_tree type
, machine_mode mode
)
4638 if (mode
== BLKmode
)
4639 words
= int_size_in_bytes (type
);
4641 words
= GET_MODE_SIZE (mode
);
4643 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4646 /* Return the number of registers that should be skipped so the current
4647 argument (described by TYPE and WORDS) will be properly aligned.
4649 Integer and float arguments larger than 8 bytes start at the next
4650 even boundary. Aggregates larger than 8 bytes start at the next
4651 even boundary if the aggregate has 16 byte alignment. Note that
4652 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4653 but are still to be aligned in registers.
4655 ??? The ABI does not specify how to handle aggregates with
4656 alignment from 9 to 15 bytes, or greater than 16. We handle them
4657 all as if they had 16 byte alignment. Such aggregates can occur
4658 only if gcc extensions are used. */
4660 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4661 const_tree type
, int words
)
4663 /* No registers are skipped on VMS. */
4664 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4668 && TREE_CODE (type
) != INTEGER_TYPE
4669 && TREE_CODE (type
) != REAL_TYPE
)
4670 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4675 /* Return rtx for register where argument is passed, or zero if it is passed
4677 /* ??? 128-bit quad-precision floats are always passed in general
4681 ia64_function_arg_1 (cumulative_args_t cum_v
, machine_mode mode
,
4682 const_tree type
, bool named
, bool incoming
)
4684 const CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4686 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4687 int words
= ia64_function_arg_words (type
, mode
);
4688 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4689 machine_mode hfa_mode
= VOIDmode
;
4691 /* For OPEN VMS, emit the instruction setting up the argument register here,
4692 when we know this will be together with the other arguments setup related
4693 insns. This is not the conceptually best place to do this, but this is
4694 the easiest as we have convenient access to cumulative args info. */
4696 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4699 unsigned HOST_WIDE_INT regval
= cum
->words
;
4702 for (i
= 0; i
< 8; i
++)
4703 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4705 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4709 /* If all argument slots are used, then it must go on the stack. */
4710 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4713 /* On OpenVMS argument is either in Rn or Fn. */
4714 if (TARGET_ABI_OPEN_VMS
)
4716 if (FLOAT_MODE_P (mode
))
4717 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4719 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4722 /* Check for and handle homogeneous FP aggregates. */
4724 hfa_mode
= hfa_element_mode (type
, 0);
4726 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4727 and unprototyped hfas are passed specially. */
4728 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4732 int fp_regs
= cum
->fp_regs
;
4733 int int_regs
= cum
->words
+ offset
;
4734 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4738 /* If prototyped, pass it in FR regs then GR regs.
4739 If not prototyped, pass it in both FR and GR regs.
4741 If this is an SFmode aggregate, then it is possible to run out of
4742 FR regs while GR regs are still left. In that case, we pass the
4743 remaining part in the GR regs. */
4745 /* Fill the FP regs. We do this always. We stop if we reach the end
4746 of the argument, the last FP register, or the last argument slot. */
4748 byte_size
= ((mode
== BLKmode
)
4749 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4750 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4752 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4753 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4755 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4756 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4760 args_byte_size
+= hfa_size
;
4764 /* If no prototype, then the whole thing must go in GR regs. */
4765 if (! cum
->prototype
)
4767 /* If this is an SFmode aggregate, then we might have some left over
4768 that needs to go in GR regs. */
4769 else if (byte_size
!= offset
)
4770 int_regs
+= offset
/ UNITS_PER_WORD
;
4772 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4774 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4776 machine_mode gr_mode
= DImode
;
4777 unsigned int gr_size
;
4779 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4780 then this goes in a GR reg left adjusted/little endian, right
4781 adjusted/big endian. */
4782 /* ??? Currently this is handled wrong, because 4-byte hunks are
4783 always right adjusted/little endian. */
4786 /* If we have an even 4 byte hunk because the aggregate is a
4787 multiple of 4 bytes in size, then this goes in a GR reg right
4788 adjusted/little endian. */
4789 else if (byte_size
- offset
== 4)
4792 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4793 gen_rtx_REG (gr_mode
, (basereg
4797 gr_size
= GET_MODE_SIZE (gr_mode
);
4799 if (gr_size
== UNITS_PER_WORD
4800 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4802 else if (gr_size
> UNITS_PER_WORD
)
4803 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4805 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4808 /* Integral and aggregates go in general registers. If we have run out of
4809 FR registers, then FP values must also go in general registers. This can
4810 happen when we have a SFmode HFA. */
4811 else if (mode
== TFmode
|| mode
== TCmode
4812 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4814 int byte_size
= ((mode
== BLKmode
)
4815 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4816 if (BYTES_BIG_ENDIAN
4817 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4818 && byte_size
< UNITS_PER_WORD
4821 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4822 gen_rtx_REG (DImode
,
4823 (basereg
+ cum
->words
4826 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4829 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4833 /* If there is a prototype, then FP values go in a FR register when
4834 named, and in a GR register when unnamed. */
4835 else if (cum
->prototype
)
4838 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4839 /* In big-endian mode, an anonymous SFmode value must be represented
4840 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4841 the value into the high half of the general register. */
4842 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4843 return gen_rtx_PARALLEL (mode
,
4845 gen_rtx_EXPR_LIST (VOIDmode
,
4846 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4849 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4851 /* If there is no prototype, then FP values go in both FR and GR
4855 /* See comment above. */
4856 machine_mode inner_mode
=
4857 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4859 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4860 gen_rtx_REG (mode
, (FR_ARG_FIRST
4863 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4864 gen_rtx_REG (inner_mode
,
4865 (basereg
+ cum
->words
4869 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4873 /* Implement TARGET_FUNCION_ARG target hook. */
4876 ia64_function_arg (cumulative_args_t cum
, machine_mode mode
,
4877 const_tree type
, bool named
)
4879 return ia64_function_arg_1 (cum
, mode
, type
, named
, false);
4882 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4885 ia64_function_incoming_arg (cumulative_args_t cum
,
4887 const_tree type
, bool named
)
4889 return ia64_function_arg_1 (cum
, mode
, type
, named
, true);
4892 /* Return number of bytes, at the beginning of the argument, that must be
4893 put in registers. 0 is the argument is entirely in registers or entirely
4897 ia64_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
4898 tree type
, bool named ATTRIBUTE_UNUSED
)
4900 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4902 int words
= ia64_function_arg_words (type
, mode
);
4903 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4905 /* If all argument slots are used, then it must go on the stack. */
4906 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4909 /* It doesn't matter whether the argument goes in FR or GR regs. If
4910 it fits within the 8 argument slots, then it goes entirely in
4911 registers. If it extends past the last argument slot, then the rest
4912 goes on the stack. */
4914 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4917 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4920 /* Return ivms_arg_type based on machine_mode. */
4922 static enum ivms_arg_type
4923 ia64_arg_type (machine_mode mode
)
4936 /* Update CUM to point after this argument. This is patterned after
4937 ia64_function_arg. */
4940 ia64_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
4941 const_tree type
, bool named
)
4943 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4944 int words
= ia64_function_arg_words (type
, mode
);
4945 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4946 machine_mode hfa_mode
= VOIDmode
;
4948 /* If all arg slots are already full, then there is nothing to do. */
4949 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
4951 cum
->words
+= words
+ offset
;
4955 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
4956 cum
->words
+= words
+ offset
;
4958 /* On OpenVMS argument is either in Rn or Fn. */
4959 if (TARGET_ABI_OPEN_VMS
)
4961 cum
->int_regs
= cum
->words
;
4962 cum
->fp_regs
= cum
->words
;
4966 /* Check for and handle homogeneous FP aggregates. */
4968 hfa_mode
= hfa_element_mode (type
, 0);
4970 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4971 and unprototyped hfas are passed specially. */
4972 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4974 int fp_regs
= cum
->fp_regs
;
4975 /* This is the original value of cum->words + offset. */
4976 int int_regs
= cum
->words
- words
;
4977 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4981 /* If prototyped, pass it in FR regs then GR regs.
4982 If not prototyped, pass it in both FR and GR regs.
4984 If this is an SFmode aggregate, then it is possible to run out of
4985 FR regs while GR regs are still left. In that case, we pass the
4986 remaining part in the GR regs. */
4988 /* Fill the FP regs. We do this always. We stop if we reach the end
4989 of the argument, the last FP register, or the last argument slot. */
4991 byte_size
= ((mode
== BLKmode
)
4992 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4993 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4995 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4996 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
4999 args_byte_size
+= hfa_size
;
5003 cum
->fp_regs
= fp_regs
;
5006 /* Integral and aggregates go in general registers. So do TFmode FP values.
5007 If we have run out of FR registers, then other FP values must also go in
5008 general registers. This can happen when we have a SFmode HFA. */
5009 else if (mode
== TFmode
|| mode
== TCmode
5010 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
5011 cum
->int_regs
= cum
->words
;
5013 /* If there is a prototype, then FP values go in a FR register when
5014 named, and in a GR register when unnamed. */
5015 else if (cum
->prototype
)
5018 cum
->int_regs
= cum
->words
;
5020 /* ??? Complex types should not reach here. */
5021 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5023 /* If there is no prototype, then FP values go in both FR and GR
5027 /* ??? Complex types should not reach here. */
5028 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5029 cum
->int_regs
= cum
->words
;
5033 /* Arguments with alignment larger than 8 bytes start at the next even
5034 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5035 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5038 ia64_function_arg_boundary (machine_mode mode
, const_tree type
)
5040 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
5041 return PARM_BOUNDARY
* 2;
5045 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
5046 return PARM_BOUNDARY
* 2;
5048 return PARM_BOUNDARY
;
5051 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
5052 return PARM_BOUNDARY
* 2;
5054 return PARM_BOUNDARY
;
5057 /* True if it is OK to do sibling call optimization for the specified
5058 call expression EXP. DECL will be the called function, or NULL if
5059 this is an indirect call. */
5061 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5063 /* We can't perform a sibcall if the current function has the syscall_linkage
5065 if (lookup_attribute ("syscall_linkage",
5066 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5069 /* We must always return with our current GP. This means we can
5070 only sibcall to functions defined in the current module unless
5071 TARGET_CONST_GP is set to true. */
5072 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
5076 /* Implement va_arg. */
5079 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
5082 /* Variable sized types are passed by reference. */
5083 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
5085 tree ptrtype
= build_pointer_type (type
);
5086 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
5087 return build_va_arg_indirect_ref (addr
);
5090 /* Aggregate arguments with alignment larger than 8 bytes start at
5091 the next even boundary. Integer and floating point arguments
5092 do so if they are larger than 8 bytes, whether or not they are
5093 also aligned larger than 8 bytes. */
5094 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
5095 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
5097 tree t
= fold_build_pointer_plus_hwi (valist
, 2 * UNITS_PER_WORD
- 1);
5098 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5099 build_int_cst (TREE_TYPE (t
), -2 * UNITS_PER_WORD
));
5100 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
5103 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5106 /* Return 1 if function return value returned in memory. Return 0 if it is
5110 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
5113 machine_mode hfa_mode
;
5114 HOST_WIDE_INT byte_size
;
5116 mode
= TYPE_MODE (valtype
);
5117 byte_size
= GET_MODE_SIZE (mode
);
5118 if (mode
== BLKmode
)
5120 byte_size
= int_size_in_bytes (valtype
);
5125 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5127 hfa_mode
= hfa_element_mode (valtype
, 0);
5128 if (hfa_mode
!= VOIDmode
)
5130 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5132 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
5137 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
5143 /* Return rtx for register that holds the function return value. */
5146 ia64_function_value (const_tree valtype
,
5147 const_tree fn_decl_or_type
,
5148 bool outgoing ATTRIBUTE_UNUSED
)
5151 machine_mode hfa_mode
;
5153 const_tree func
= fn_decl_or_type
;
5156 && !DECL_P (fn_decl_or_type
))
5159 mode
= TYPE_MODE (valtype
);
5160 hfa_mode
= hfa_element_mode (valtype
, 0);
5162 if (hfa_mode
!= VOIDmode
)
5170 hfa_size
= GET_MODE_SIZE (hfa_mode
);
5171 byte_size
= ((mode
== BLKmode
)
5172 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
5174 for (i
= 0; offset
< byte_size
; i
++)
5176 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5177 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
5181 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5183 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
5184 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
5187 bool need_parallel
= false;
5189 /* In big-endian mode, we need to manage the layout of aggregates
5190 in the registers so that we get the bits properly aligned in
5191 the highpart of the registers. */
5192 if (BYTES_BIG_ENDIAN
5193 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
5194 need_parallel
= true;
5196 /* Something like struct S { long double x; char a[0] } is not an
5197 HFA structure, and therefore doesn't go in fp registers. But
5198 the middle-end will give it XFmode anyway, and XFmode values
5199 don't normally fit in integer registers. So we need to smuggle
5200 the value inside a parallel. */
5201 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
5202 need_parallel
= true;
5212 bytesize
= int_size_in_bytes (valtype
);
5213 /* An empty PARALLEL is invalid here, but the return value
5214 doesn't matter for empty structs. */
5216 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5217 for (i
= 0; offset
< bytesize
; i
++)
5219 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5220 gen_rtx_REG (DImode
,
5223 offset
+= UNITS_PER_WORD
;
5225 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5228 mode
= promote_function_mode (valtype
, mode
, &unsignedp
,
5229 func
? TREE_TYPE (func
) : NULL_TREE
,
5232 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5236 /* Worker function for TARGET_LIBCALL_VALUE. */
5239 ia64_libcall_value (machine_mode mode
,
5240 const_rtx fun ATTRIBUTE_UNUSED
)
5242 return gen_rtx_REG (mode
,
5243 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
5244 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5245 && (mode
) != TFmode
)
5246 ? FR_RET_FIRST
: GR_RET_FIRST
));
5249 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5252 ia64_function_value_regno_p (const unsigned int regno
)
5254 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
5255 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
5258 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5259 We need to emit DTP-relative relocations. */
5262 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5264 gcc_assert (size
== 4 || size
== 8);
5266 fputs ("\tdata4.ua\t@dtprel(", file
);
5268 fputs ("\tdata8.ua\t@dtprel(", file
);
5269 output_addr_const (file
, x
);
5273 /* Print a memory address as an operand to reference that memory location. */
5275 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5276 also call this from ia64_print_operand for memory addresses. */
5279 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
5280 rtx address ATTRIBUTE_UNUSED
)
5284 /* Print an operand to an assembler instruction.
5285 C Swap and print a comparison operator.
5286 D Print an FP comparison operator.
5287 E Print 32 - constant, for SImode shifts as extract.
5288 e Print 64 - constant, for DImode rotates.
5289 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5290 a floating point register emitted normally.
5291 G A floating point constant.
5292 I Invert a predicate register by adding 1.
5293 J Select the proper predicate register for a condition.
5294 j Select the inverse predicate register for a condition.
5295 O Append .acq for volatile load.
5296 P Postincrement of a MEM.
5297 Q Append .rel for volatile store.
5298 R Print .s .d or nothing for a single, double or no truncation.
5299 S Shift amount for shladd instruction.
5300 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5301 for Intel assembler.
5302 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5303 for Intel assembler.
5304 X A pair of floating point registers.
5305 r Print register name, or constant 0 as r0. HP compatibility for
5307 v Print vector constant value as an 8-byte integer value. */
5310 ia64_print_operand (FILE * file
, rtx x
, int code
)
5317 /* Handled below. */
5322 enum rtx_code c
= swap_condition (GET_CODE (x
));
5323 fputs (GET_RTX_NAME (c
), file
);
5328 switch (GET_CODE (x
))
5355 str
= GET_RTX_NAME (GET_CODE (x
));
5362 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5366 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5370 if (x
== CONST0_RTX (GET_MODE (x
)))
5371 str
= reg_names
[FR_REG (0)];
5372 else if (x
== CONST1_RTX (GET_MODE (x
)))
5373 str
= reg_names
[FR_REG (1)];
5376 gcc_assert (GET_CODE (x
) == REG
);
5377 str
= reg_names
[REGNO (x
)];
5386 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
5387 real_to_target (val
, &rv
, GET_MODE (x
));
5388 if (GET_MODE (x
) == SFmode
)
5389 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5390 else if (GET_MODE (x
) == DFmode
)
5391 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5393 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5396 output_operand_lossage ("invalid %%G mode");
5401 fputs (reg_names
[REGNO (x
) + 1], file
);
5407 unsigned int regno
= REGNO (XEXP (x
, 0));
5408 if (GET_CODE (x
) == EQ
)
5412 fputs (reg_names
[regno
], file
);
5417 if (MEM_VOLATILE_P (x
))
5418 fputs(".acq", file
);
5423 HOST_WIDE_INT value
;
5425 switch (GET_CODE (XEXP (x
, 0)))
5431 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5432 if (GET_CODE (x
) == CONST_INT
)
5436 gcc_assert (GET_CODE (x
) == REG
);
5437 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5443 value
= GET_MODE_SIZE (GET_MODE (x
));
5447 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5451 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5456 if (MEM_VOLATILE_P (x
))
5457 fputs(".rel", file
);
5461 if (x
== CONST0_RTX (GET_MODE (x
)))
5463 else if (x
== CONST1_RTX (GET_MODE (x
)))
5465 else if (x
== CONST2_RTX (GET_MODE (x
)))
5468 output_operand_lossage ("invalid %%R value");
5472 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5476 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5478 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5484 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5486 const char *prefix
= "0x";
5487 if (INTVAL (x
) & 0x80000000)
5489 fprintf (file
, "0xffffffff");
5492 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5499 unsigned int regno
= REGNO (x
);
5500 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5505 /* If this operand is the constant zero, write it as register zero.
5506 Any register, zero, or CONST_INT value is OK here. */
5507 if (GET_CODE (x
) == REG
)
5508 fputs (reg_names
[REGNO (x
)], file
);
5509 else if (x
== CONST0_RTX (GET_MODE (x
)))
5511 else if (GET_CODE (x
) == CONST_INT
)
5512 output_addr_const (file
, x
);
5514 output_operand_lossage ("invalid %%r value");
5518 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5519 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5526 /* For conditional branches, returns or calls, substitute
5527 sptk, dptk, dpnt, or spnt for %s. */
5528 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5531 int pred_val
= XINT (x
, 0);
5533 /* Guess top and bottom 10% statically predicted. */
5534 if (pred_val
< REG_BR_PROB_BASE
/ 50
5535 && br_prob_note_reliable_p (x
))
5537 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5539 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5540 || !br_prob_note_reliable_p (x
))
5545 else if (CALL_P (current_output_insn
))
5550 fputs (which
, file
);
5555 x
= current_insn_predicate
;
5558 unsigned int regno
= REGNO (XEXP (x
, 0));
5559 if (GET_CODE (x
) == EQ
)
5561 fprintf (file
, "(%s) ", reg_names
[regno
]);
5566 output_operand_lossage ("ia64_print_operand: unknown code");
5570 switch (GET_CODE (x
))
5572 /* This happens for the spill/restore instructions. */
5577 /* ... fall through ... */
5580 fputs (reg_names
[REGNO (x
)], file
);
5585 rtx addr
= XEXP (x
, 0);
5586 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5587 addr
= XEXP (addr
, 0);
5588 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5593 output_addr_const (file
, x
);
5600 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5603 ia64_print_operand_punct_valid_p (unsigned char code
)
5605 return (code
== '+' || code
== ',');
5608 /* Compute a (partial) cost for rtx X. Return true if the complete
5609 cost has been computed, and false if subexpressions should be
5610 scanned. In either case, *TOTAL contains the cost result. */
5611 /* ??? This is incomplete. */
5614 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
5615 int *total
, bool speed ATTRIBUTE_UNUSED
)
5623 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5626 if (satisfies_constraint_I (x
))
5628 else if (satisfies_constraint_J (x
))
5631 *total
= COSTS_N_INSNS (1);
5634 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5637 *total
= COSTS_N_INSNS (1);
5642 *total
= COSTS_N_INSNS (1);
5648 *total
= COSTS_N_INSNS (3);
5652 *total
= COSTS_N_INSNS (4);
5656 /* For multiplies wider than HImode, we have to go to the FPU,
5657 which normally involves copies. Plus there's the latency
5658 of the multiply itself, and the latency of the instructions to
5659 transfer integer regs to FP regs. */
5660 if (FLOAT_MODE_P (GET_MODE (x
)))
5661 *total
= COSTS_N_INSNS (4);
5662 else if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
5663 *total
= COSTS_N_INSNS (10);
5665 *total
= COSTS_N_INSNS (2);
5670 if (FLOAT_MODE_P (GET_MODE (x
)))
5672 *total
= COSTS_N_INSNS (4);
5680 *total
= COSTS_N_INSNS (1);
5687 /* We make divide expensive, so that divide-by-constant will be
5688 optimized to a multiply. */
5689 *total
= COSTS_N_INSNS (60);
5697 /* Calculate the cost of moving data from a register in class FROM to
5698 one in class TO, using MODE. */
5701 ia64_register_move_cost (machine_mode mode
, reg_class_t from
,
5704 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5705 if (to
== ADDL_REGS
)
5707 if (from
== ADDL_REGS
)
5710 /* All costs are symmetric, so reduce cases by putting the
5711 lower number class as the destination. */
5714 reg_class_t tmp
= to
;
5715 to
= from
, from
= tmp
;
5718 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5719 so that we get secondary memory reloads. Between FR_REGS,
5720 we have to make this at least as expensive as memory_move_cost
5721 to avoid spectacularly poor register class preferencing. */
5722 if (mode
== XFmode
|| mode
== RFmode
)
5724 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5725 return memory_move_cost (mode
, to
, false);
5733 /* Moving between PR registers takes two insns. */
5734 if (from
== PR_REGS
)
5736 /* Moving between PR and anything but GR is impossible. */
5737 if (from
!= GR_REGS
)
5738 return memory_move_cost (mode
, to
, false);
5742 /* Moving between BR and anything but GR is impossible. */
5743 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5744 return memory_move_cost (mode
, to
, false);
5749 /* Moving between AR and anything but GR is impossible. */
5750 if (from
!= GR_REGS
)
5751 return memory_move_cost (mode
, to
, false);
5757 case GR_AND_FR_REGS
:
5758 case GR_AND_BR_REGS
:
5769 /* Calculate the cost of moving data of MODE from a register to or from
5773 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
5775 bool in ATTRIBUTE_UNUSED
)
5777 if (rclass
== GENERAL_REGS
5778 || rclass
== FR_REGS
5779 || rclass
== FP_REGS
5780 || rclass
== GR_AND_FR_REGS
)
5786 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5787 on RCLASS to use when copying X into that class. */
5790 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5796 /* Don't allow volatile mem reloads into floating point registers.
5797 This is defined to force reload to choose the r/m case instead
5798 of the f/f case when reloading (set (reg fX) (mem/v)). */
5799 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5802 /* Force all unrecognized constants into the constant pool. */
5820 /* This function returns the register class required for a secondary
5821 register when copying between one of the registers in RCLASS, and X,
5822 using MODE. A return value of NO_REGS means that no secondary register
5826 ia64_secondary_reload_class (enum reg_class rclass
,
5827 machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5831 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5832 regno
= true_regnum (x
);
5839 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5840 interaction. We end up with two pseudos with overlapping lifetimes
5841 both of which are equiv to the same constant, and both which need
5842 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5843 changes depending on the path length, which means the qty_first_reg
5844 check in make_regs_eqv can give different answers at different times.
5845 At some point I'll probably need a reload_indi pattern to handle
5848 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5849 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5850 non-general registers for good measure. */
5851 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5854 /* This is needed if a pseudo used as a call_operand gets spilled to a
5856 if (GET_CODE (x
) == MEM
)
5862 /* Need to go through general registers to get to other class regs. */
5863 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5866 /* This can happen when a paradoxical subreg is an operand to the
5868 /* ??? This shouldn't be necessary after instruction scheduling is
5869 enabled, because paradoxical subregs are not accepted by
5870 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5871 stop the paradoxical subreg stupidity in the *_operand functions
5873 if (GET_CODE (x
) == MEM
5874 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5875 || GET_MODE (x
) == QImode
))
5878 /* This can happen because of the ior/and/etc patterns that accept FP
5879 registers as operands. If the third operand is a constant, then it
5880 needs to be reloaded into a FP register. */
5881 if (GET_CODE (x
) == CONST_INT
)
5884 /* This can happen because of register elimination in a muldi3 insn.
5885 E.g. `26107 * (unsigned long)&u'. */
5886 if (GET_CODE (x
) == PLUS
)
5891 /* ??? This happens if we cse/gcse a BImode value across a call,
5892 and the function has a nonlocal goto. This is because global
5893 does not allocate call crossing pseudos to hard registers when
5894 crtl->has_nonlocal_goto is true. This is relatively
5895 common for C++ programs that use exceptions. To reproduce,
5896 return NO_REGS and compile libstdc++. */
5897 if (GET_CODE (x
) == MEM
)
5900 /* This can happen when we take a BImode subreg of a DImode value,
5901 and that DImode value winds up in some non-GR register. */
5902 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5914 /* Implement targetm.unspec_may_trap_p hook. */
5916 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5918 switch (XINT (x
, 1))
5924 case UNSPEC_CHKACLR
:
5926 /* These unspecs are just wrappers. */
5927 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5930 return default_unspec_may_trap_p (x
, flags
);
5934 /* Parse the -mfixed-range= option string. */
5937 fix_range (const char *const_str
)
5940 char *str
, *dash
, *comma
;
5942 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5943 REG2 are either register names or register numbers. The effect
5944 of this option is to mark the registers in the range from REG1 to
5945 REG2 as ``fixed'' so they won't be used by the compiler. This is
5946 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5948 i
= strlen (const_str
);
5949 str
= (char *) alloca (i
+ 1);
5950 memcpy (str
, const_str
, i
+ 1);
5954 dash
= strchr (str
, '-');
5957 warning (0, "value of -mfixed-range must have form REG1-REG2");
5962 comma
= strchr (dash
+ 1, ',');
5966 first
= decode_reg_name (str
);
5969 warning (0, "unknown register name: %s", str
);
5973 last
= decode_reg_name (dash
+ 1);
5976 warning (0, "unknown register name: %s", dash
+ 1);
5984 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5988 for (i
= first
; i
<= last
; ++i
)
5989 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5999 /* Implement TARGET_OPTION_OVERRIDE. */
6002 ia64_option_override (void)
6005 cl_deferred_option
*opt
;
6006 vec
<cl_deferred_option
> *v
6007 = (vec
<cl_deferred_option
> *) ia64_deferred_options
;
6010 FOR_EACH_VEC_ELT (*v
, i
, opt
)
6012 switch (opt
->opt_index
)
6014 case OPT_mfixed_range_
:
6015 fix_range (opt
->arg
);
6023 if (TARGET_AUTO_PIC
)
6024 target_flags
|= MASK_CONST_GP
;
6026 /* Numerous experiment shows that IRA based loop pressure
6027 calculation works better for RTL loop invariant motion on targets
6028 with enough (>= 32) registers. It is an expensive optimization.
6029 So it is on only for peak performance. */
6031 flag_ira_loop_pressure
= 1;
6034 ia64_section_threshold
= (global_options_set
.x_g_switch_value
6036 : IA64_DEFAULT_GVALUE
);
6038 init_machine_status
= ia64_init_machine_status
;
6040 if (align_functions
<= 0)
6041 align_functions
= 64;
6042 if (align_loops
<= 0)
6044 if (TARGET_ABI_OPEN_VMS
)
6047 ia64_override_options_after_change();
6050 /* Implement targetm.override_options_after_change. */
6053 ia64_override_options_after_change (void)
6056 && !global_options_set
.x_flag_selective_scheduling
6057 && !global_options_set
.x_flag_selective_scheduling2
)
6059 flag_selective_scheduling2
= 1;
6060 flag_sel_sched_pipelining
= 1;
6062 if (mflag_sched_control_spec
== 2)
6064 /* Control speculation is on by default for the selective scheduler,
6065 but not for the Haifa scheduler. */
6066 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
6068 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
6070 /* FIXME: remove this when we'd implement breaking autoinsns as
6071 a transformation. */
6072 flag_auto_inc_dec
= 0;
6076 /* Initialize the record of emitted frame related registers. */
6078 void ia64_init_expanders (void)
6080 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
6083 static struct machine_function
*
6084 ia64_init_machine_status (void)
6086 return ggc_cleared_alloc
<machine_function
> ();
6089 static enum attr_itanium_class
ia64_safe_itanium_class (rtx_insn
*);
6090 static enum attr_type
ia64_safe_type (rtx_insn
*);
6092 static enum attr_itanium_class
6093 ia64_safe_itanium_class (rtx_insn
*insn
)
6095 if (recog_memoized (insn
) >= 0)
6096 return get_attr_itanium_class (insn
);
6097 else if (DEBUG_INSN_P (insn
))
6098 return ITANIUM_CLASS_IGNORE
;
6100 return ITANIUM_CLASS_UNKNOWN
;
6103 static enum attr_type
6104 ia64_safe_type (rtx_insn
*insn
)
6106 if (recog_memoized (insn
) >= 0)
6107 return get_attr_type (insn
);
6109 return TYPE_UNKNOWN
;
6112 /* The following collection of routines emit instruction group stop bits as
6113 necessary to avoid dependencies. */
6115 /* Need to track some additional registers as far as serialization is
6116 concerned so we can properly handle br.call and br.ret. We could
6117 make these registers visible to gcc, but since these registers are
6118 never explicitly used in gcc generated code, it seems wasteful to
6119 do so (plus it would make the call and return patterns needlessly
6121 #define REG_RP (BR_REG (0))
6122 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6123 /* This is used for volatile asms which may require a stop bit immediately
6124 before and after them. */
6125 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6126 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6127 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6129 /* For each register, we keep track of how it has been written in the
6130 current instruction group.
6132 If a register is written unconditionally (no qualifying predicate),
6133 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6135 If a register is written if its qualifying predicate P is true, we
6136 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6137 may be written again by the complement of P (P^1) and when this happens,
6138 WRITE_COUNT gets set to 2.
6140 The result of this is that whenever an insn attempts to write a register
6141 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6143 If a predicate register is written by a floating-point insn, we set
6144 WRITTEN_BY_FP to true.
6146 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6147 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6149 #if GCC_VERSION >= 4000
6150 #define RWS_FIELD_TYPE __extension__ unsigned short
6152 #define RWS_FIELD_TYPE unsigned int
6154 struct reg_write_state
6156 RWS_FIELD_TYPE write_count
: 2;
6157 RWS_FIELD_TYPE first_pred
: 10;
6158 RWS_FIELD_TYPE written_by_fp
: 1;
6159 RWS_FIELD_TYPE written_by_and
: 1;
6160 RWS_FIELD_TYPE written_by_or
: 1;
6163 /* Cumulative info for the current instruction group. */
6164 struct reg_write_state rws_sum
[NUM_REGS
];
6165 #ifdef ENABLE_CHECKING
6166 /* Bitmap whether a register has been written in the current insn. */
6167 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
6168 / HOST_BITS_PER_WIDEST_FAST_INT
];
6171 rws_insn_set (int regno
)
6173 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
6174 SET_HARD_REG_BIT (rws_insn
, regno
);
6178 rws_insn_test (int regno
)
6180 return TEST_HARD_REG_BIT (rws_insn
, regno
);
6183 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6184 unsigned char rws_insn
[2];
6187 rws_insn_set (int regno
)
6189 if (regno
== REG_AR_CFM
)
6191 else if (regno
== REG_VOLATILE
)
6196 rws_insn_test (int regno
)
6198 if (regno
== REG_AR_CFM
)
6200 if (regno
== REG_VOLATILE
)
6206 /* Indicates whether this is the first instruction after a stop bit,
6207 in which case we don't need another stop bit. Without this,
6208 ia64_variable_issue will die when scheduling an alloc. */
6209 static int first_instruction
;
6211 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6212 RTL for one instruction. */
6215 unsigned int is_write
: 1; /* Is register being written? */
6216 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
6217 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
6218 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
6219 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
6220 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
6223 static void rws_update (int, struct reg_flags
, int);
6224 static int rws_access_regno (int, struct reg_flags
, int);
6225 static int rws_access_reg (rtx
, struct reg_flags
, int);
6226 static void update_set_flags (rtx
, struct reg_flags
*);
6227 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
6228 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
6229 static void init_insn_group_barriers (void);
6230 static int group_barrier_needed (rtx_insn
*);
6231 static int safe_group_barrier_needed (rtx_insn
*);
6232 static int in_safe_group_barrier
;
6234 /* Update *RWS for REGNO, which is being written by the current instruction,
6235 with predicate PRED, and associated register flags in FLAGS. */
6238 rws_update (int regno
, struct reg_flags flags
, int pred
)
6241 rws_sum
[regno
].write_count
++;
6243 rws_sum
[regno
].write_count
= 2;
6244 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
6245 /* ??? Not tracking and/or across differing predicates. */
6246 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6247 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6248 rws_sum
[regno
].first_pred
= pred
;
6251 /* Handle an access to register REGNO of type FLAGS using predicate register
6252 PRED. Update rws_sum array. Return 1 if this access creates
6253 a dependency with an earlier instruction in the same group. */
6256 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
6258 int need_barrier
= 0;
6260 gcc_assert (regno
< NUM_REGS
);
6262 if (! PR_REGNO_P (regno
))
6263 flags
.is_and
= flags
.is_or
= 0;
6269 rws_insn_set (regno
);
6270 write_count
= rws_sum
[regno
].write_count
;
6272 switch (write_count
)
6275 /* The register has not been written yet. */
6276 if (!in_safe_group_barrier
)
6277 rws_update (regno
, flags
, pred
);
6281 /* The register has been written via a predicate. Treat
6282 it like a unconditional write and do not try to check
6283 for complementary pred reg in earlier write. */
6284 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6286 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6290 if (!in_safe_group_barrier
)
6291 rws_update (regno
, flags
, pred
);
6295 /* The register has been unconditionally written already. We
6297 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6299 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6303 if (!in_safe_group_barrier
)
6305 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6306 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6316 if (flags
.is_branch
)
6318 /* Branches have several RAW exceptions that allow to avoid
6321 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6322 /* RAW dependencies on branch regs are permissible as long
6323 as the writer is a non-branch instruction. Since we
6324 never generate code that uses a branch register written
6325 by a branch instruction, handling this case is
6329 if (REGNO_REG_CLASS (regno
) == PR_REGS
6330 && ! rws_sum
[regno
].written_by_fp
)
6331 /* The predicates of a branch are available within the
6332 same insn group as long as the predicate was written by
6333 something other than a floating-point instruction. */
6337 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6339 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6342 switch (rws_sum
[regno
].write_count
)
6345 /* The register has not been written yet. */
6349 /* The register has been written via a predicate, assume we
6350 need a barrier (don't check for complementary regs). */
6355 /* The register has been unconditionally written already. We
6365 return need_barrier
;
6369 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6371 int regno
= REGNO (reg
);
6372 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
6375 return rws_access_regno (regno
, flags
, pred
);
6378 int need_barrier
= 0;
6380 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6381 return need_barrier
;
6385 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6386 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6389 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6391 rtx src
= SET_SRC (x
);
6393 switch (GET_CODE (src
))
6399 /* There are four cases here:
6400 (1) The destination is (pc), in which case this is a branch,
6401 nothing here applies.
6402 (2) The destination is ar.lc, in which case this is a
6403 doloop_end_internal,
6404 (3) The destination is an fp register, in which case this is
6405 an fselect instruction.
6406 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6407 this is a check load.
6408 In all cases, nothing we do in this function applies. */
6412 if (COMPARISON_P (src
)
6413 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6414 /* Set pflags->is_fp to 1 so that we know we're dealing
6415 with a floating point comparison when processing the
6416 destination of the SET. */
6419 /* Discover if this is a parallel comparison. We only handle
6420 and.orcm and or.andcm at present, since we must retain a
6421 strict inverse on the predicate pair. */
6422 else if (GET_CODE (src
) == AND
)
6424 else if (GET_CODE (src
) == IOR
)
6431 /* Subroutine of rtx_needs_barrier; this function determines whether the
6432 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6433 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6437 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6439 int need_barrier
= 0;
6441 rtx src
= SET_SRC (x
);
6443 if (GET_CODE (src
) == CALL
)
6444 /* We don't need to worry about the result registers that
6445 get written by subroutine call. */
6446 return rtx_needs_barrier (src
, flags
, pred
);
6447 else if (SET_DEST (x
) == pc_rtx
)
6449 /* X is a conditional branch. */
6450 /* ??? This seems redundant, as the caller sets this bit for
6452 if (!ia64_spec_check_src_p (src
))
6453 flags
.is_branch
= 1;
6454 return rtx_needs_barrier (src
, flags
, pred
);
6457 if (ia64_spec_check_src_p (src
))
6458 /* Avoid checking one register twice (in condition
6459 and in 'then' section) for ldc pattern. */
6461 gcc_assert (REG_P (XEXP (src
, 2)));
6462 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6464 /* We process MEM below. */
6465 src
= XEXP (src
, 1);
6468 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6471 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6473 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6474 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6476 return need_barrier
;
6479 /* Handle an access to rtx X of type FLAGS using predicate register
6480 PRED. Return 1 if this access creates a dependency with an earlier
6481 instruction in the same group. */
6484 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6487 int is_complemented
= 0;
6488 int need_barrier
= 0;
6489 const char *format_ptr
;
6490 struct reg_flags new_flags
;
6498 switch (GET_CODE (x
))
6501 update_set_flags (x
, &new_flags
);
6502 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6503 if (GET_CODE (SET_SRC (x
)) != CALL
)
6505 new_flags
.is_write
= 1;
6506 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6511 new_flags
.is_write
= 0;
6512 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6514 /* Avoid multiple register writes, in case this is a pattern with
6515 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6516 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6518 new_flags
.is_write
= 1;
6519 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6520 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6521 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6526 /* X is a predicated instruction. */
6528 cond
= COND_EXEC_TEST (x
);
6530 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6532 if (GET_CODE (cond
) == EQ
)
6533 is_complemented
= 1;
6534 cond
= XEXP (cond
, 0);
6535 gcc_assert (GET_CODE (cond
) == REG
6536 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6537 pred
= REGNO (cond
);
6538 if (is_complemented
)
6541 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6542 return need_barrier
;
6546 /* Clobber & use are for earlier compiler-phases only. */
6551 /* We always emit stop bits for traditional asms. We emit stop bits
6552 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6553 if (GET_CODE (x
) != ASM_OPERANDS
6554 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6556 /* Avoid writing the register multiple times if we have multiple
6557 asm outputs. This avoids a failure in rws_access_reg. */
6558 if (! rws_insn_test (REG_VOLATILE
))
6560 new_flags
.is_write
= 1;
6561 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6566 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6567 We cannot just fall through here since then we would be confused
6568 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6569 traditional asms unlike their normal usage. */
6571 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6572 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6577 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6579 rtx pat
= XVECEXP (x
, 0, i
);
6580 switch (GET_CODE (pat
))
6583 update_set_flags (pat
, &new_flags
);
6584 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6590 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6594 if (REG_P (XEXP (pat
, 0))
6595 && extract_asm_operands (x
) != NULL_RTX
6596 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6598 new_flags
.is_write
= 1;
6599 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6612 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6614 rtx pat
= XVECEXP (x
, 0, i
);
6615 if (GET_CODE (pat
) == SET
)
6617 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6619 new_flags
.is_write
= 1;
6620 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6624 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6625 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6630 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6633 if (REGNO (x
) == AR_UNAT_REGNUM
)
6635 for (i
= 0; i
< 64; ++i
)
6636 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6639 need_barrier
= rws_access_reg (x
, flags
, pred
);
6643 /* Find the regs used in memory address computation. */
6644 new_flags
.is_write
= 0;
6645 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6648 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6649 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6652 /* Operators with side-effects. */
6653 case POST_INC
: case POST_DEC
:
6654 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6656 new_flags
.is_write
= 0;
6657 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6658 new_flags
.is_write
= 1;
6659 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6663 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6665 new_flags
.is_write
= 0;
6666 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6667 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6668 new_flags
.is_write
= 1;
6669 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6672 /* Handle common unary and binary ops for efficiency. */
6673 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6674 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6675 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6676 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6677 case NE
: case EQ
: case GE
: case GT
: case LE
:
6678 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6679 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6680 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6683 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6684 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6685 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6686 case SQRT
: case FFS
: case POPCOUNT
:
6687 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6691 /* VEC_SELECT's second argument is a PARALLEL with integers that
6692 describe the elements selected. On ia64, those integers are
6693 always constants. Avoid walking the PARALLEL so that we don't
6694 get confused with "normal" parallels and then die. */
6695 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6699 switch (XINT (x
, 1))
6701 case UNSPEC_LTOFF_DTPMOD
:
6702 case UNSPEC_LTOFF_DTPREL
:
6704 case UNSPEC_LTOFF_TPREL
:
6706 case UNSPEC_PRED_REL_MUTEX
:
6707 case UNSPEC_PIC_CALL
:
6709 case UNSPEC_FETCHADD_ACQ
:
6710 case UNSPEC_FETCHADD_REL
:
6711 case UNSPEC_BSP_VALUE
:
6712 case UNSPEC_FLUSHRS
:
6713 case UNSPEC_BUNDLE_SELECTOR
:
6716 case UNSPEC_GR_SPILL
:
6717 case UNSPEC_GR_RESTORE
:
6719 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6720 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6722 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6723 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6724 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6729 case UNSPEC_FR_SPILL
:
6730 case UNSPEC_FR_RESTORE
:
6731 case UNSPEC_GETF_EXP
:
6732 case UNSPEC_SETF_EXP
:
6734 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6735 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6740 case UNSPEC_CHKACLR
:
6742 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6745 case UNSPEC_FR_RECIP_APPROX
:
6747 case UNSPEC_COPYSIGN
:
6748 case UNSPEC_FR_RECIP_APPROX_RES
:
6749 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6750 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6753 case UNSPEC_CMPXCHG_ACQ
:
6754 case UNSPEC_CMPXCHG_REL
:
6755 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6756 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6764 case UNSPEC_VOLATILE
:
6765 switch (XINT (x
, 1))
6768 /* Alloc must always be the first instruction of a group.
6769 We force this by always returning true. */
6770 /* ??? We might get better scheduling if we explicitly check for
6771 input/local/output register dependencies, and modify the
6772 scheduler so that alloc is always reordered to the start of
6773 the current group. We could then eliminate all of the
6774 first_instruction code. */
6775 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6777 new_flags
.is_write
= 1;
6778 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6781 case UNSPECV_SET_BSP
:
6782 case UNSPECV_PROBE_STACK_RANGE
:
6786 case UNSPECV_BLOCKAGE
:
6787 case UNSPECV_INSN_GROUP_BARRIER
:
6789 case UNSPECV_PSAC_ALL
:
6790 case UNSPECV_PSAC_NORMAL
:
6793 case UNSPECV_PROBE_STACK_ADDRESS
:
6794 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6803 new_flags
.is_write
= 0;
6804 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6805 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6807 new_flags
.is_write
= 1;
6808 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6809 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6813 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6814 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6815 switch (format_ptr
[i
])
6817 case '0': /* unused field */
6818 case 'i': /* integer */
6819 case 'n': /* note */
6820 case 'w': /* wide integer */
6821 case 's': /* pointer to string */
6822 case 'S': /* optional pointer to string */
6826 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6831 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6832 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6841 return need_barrier
;
6844 /* Clear out the state for group_barrier_needed at the start of a
6845 sequence of insns. */
6848 init_insn_group_barriers (void)
6850 memset (rws_sum
, 0, sizeof (rws_sum
));
6851 first_instruction
= 1;
6854 /* Given the current state, determine whether a group barrier (a stop bit) is
6855 necessary before INSN. Return nonzero if so. This modifies the state to
6856 include the effects of INSN as a side-effect. */
6859 group_barrier_needed (rtx_insn
*insn
)
6862 int need_barrier
= 0;
6863 struct reg_flags flags
;
6865 memset (&flags
, 0, sizeof (flags
));
6866 switch (GET_CODE (insn
))
6873 /* A barrier doesn't imply an instruction group boundary. */
6877 memset (rws_insn
, 0, sizeof (rws_insn
));
6881 flags
.is_branch
= 1;
6882 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6883 memset (rws_insn
, 0, sizeof (rws_insn
));
6885 /* Don't bundle a call following another call. */
6886 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6892 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6896 if (!ia64_spec_check_p (insn
))
6897 flags
.is_branch
= 1;
6899 /* Don't bundle a jump following a call. */
6900 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6908 if (GET_CODE (PATTERN (insn
)) == USE
6909 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6910 /* Don't care about USE and CLOBBER "insns"---those are used to
6911 indicate to the optimizer that it shouldn't get rid of
6912 certain operations. */
6915 pat
= PATTERN (insn
);
6917 /* Ug. Hack hacks hacked elsewhere. */
6918 switch (recog_memoized (insn
))
6920 /* We play dependency tricks with the epilogue in order
6921 to get proper schedules. Undo this for dv analysis. */
6922 case CODE_FOR_epilogue_deallocate_stack
:
6923 case CODE_FOR_prologue_allocate_stack
:
6924 pat
= XVECEXP (pat
, 0, 0);
6927 /* The pattern we use for br.cloop confuses the code above.
6928 The second element of the vector is representative. */
6929 case CODE_FOR_doloop_end_internal
:
6930 pat
= XVECEXP (pat
, 0, 1);
6933 /* Doesn't generate code. */
6934 case CODE_FOR_pred_rel_mutex
:
6935 case CODE_FOR_prologue_use
:
6942 memset (rws_insn
, 0, sizeof (rws_insn
));
6943 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
6945 /* Check to see if the previous instruction was a volatile
6948 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
6956 if (first_instruction
&& important_for_bundling_p (insn
))
6959 first_instruction
= 0;
6962 return need_barrier
;
6965 /* Like group_barrier_needed, but do not clobber the current state. */
6968 safe_group_barrier_needed (rtx_insn
*insn
)
6970 int saved_first_instruction
;
6973 saved_first_instruction
= first_instruction
;
6974 in_safe_group_barrier
= 1;
6976 t
= group_barrier_needed (insn
);
6978 first_instruction
= saved_first_instruction
;
6979 in_safe_group_barrier
= 0;
6984 /* Scan the current function and insert stop bits as necessary to
6985 eliminate dependencies. This function assumes that a final
6986 instruction scheduling pass has been run which has already
6987 inserted most of the necessary stop bits. This function only
6988 inserts new ones at basic block boundaries, since these are
6989 invisible to the scheduler. */
6992 emit_insn_group_barriers (FILE *dump
)
6995 rtx_insn
*last_label
= 0;
6996 int insns_since_last_label
= 0;
6998 init_insn_group_barriers ();
7000 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7004 if (insns_since_last_label
)
7006 insns_since_last_label
= 0;
7008 else if (NOTE_P (insn
)
7009 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
7011 if (insns_since_last_label
)
7013 insns_since_last_label
= 0;
7015 else if (NONJUMP_INSN_P (insn
)
7016 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7017 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7019 init_insn_group_barriers ();
7022 else if (NONDEBUG_INSN_P (insn
))
7024 insns_since_last_label
= 1;
7026 if (group_barrier_needed (insn
))
7031 fprintf (dump
, "Emitting stop before label %d\n",
7032 INSN_UID (last_label
));
7033 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
7036 init_insn_group_barriers ();
7044 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7045 This function has to emit all necessary group barriers. */
7048 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7052 init_insn_group_barriers ();
7054 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7056 if (BARRIER_P (insn
))
7058 rtx_insn
*last
= prev_active_insn (insn
);
7062 if (JUMP_TABLE_DATA_P (last
))
7063 last
= prev_active_insn (last
);
7064 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7065 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7067 init_insn_group_barriers ();
7069 else if (NONDEBUG_INSN_P (insn
))
7071 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7072 init_insn_group_barriers ();
7073 else if (group_barrier_needed (insn
))
7075 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
7076 init_insn_group_barriers ();
7077 group_barrier_needed (insn
);
7085 /* Instruction scheduling support. */
7087 #define NR_BUNDLES 10
7089 /* A list of names of all available bundles. */
7091 static const char *bundle_name
[NR_BUNDLES
] =
7097 #if NR_BUNDLES == 10
7107 /* Nonzero if we should insert stop bits into the schedule. */
7109 int ia64_final_schedule
= 0;
7111 /* Codes of the corresponding queried units: */
7113 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
7114 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
7116 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
7117 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
7119 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
7121 /* The following variable value is an insn group barrier. */
7123 static rtx_insn
*dfa_stop_insn
;
7125 /* The following variable value is the last issued insn. */
7127 static rtx_insn
*last_scheduled_insn
;
7129 /* The following variable value is pointer to a DFA state used as
7130 temporary variable. */
7132 static state_t temp_dfa_state
= NULL
;
7134 /* The following variable value is DFA state after issuing the last
7137 static state_t prev_cycle_state
= NULL
;
7139 /* The following array element values are TRUE if the corresponding
7140 insn requires to add stop bits before it. */
7142 static char *stops_p
= NULL
;
7144 /* The following variable is used to set up the mentioned above array. */
7146 static int stop_before_p
= 0;
7148 /* The following variable value is length of the arrays `clocks' and
7151 static int clocks_length
;
7153 /* The following variable value is number of data speculations in progress. */
7154 static int pending_data_specs
= 0;
7156 /* Number of memory references on current and three future processor cycles. */
7157 static char mem_ops_in_group
[4];
7159 /* Number of current processor cycle (from scheduler's point of view). */
7160 static int current_cycle
;
7162 static rtx
ia64_single_set (rtx_insn
*);
7163 static void ia64_emit_insn_before (rtx
, rtx
);
7165 /* Map a bundle number to its pseudo-op. */
7168 get_bundle_name (int b
)
7170 return bundle_name
[b
];
7174 /* Return the maximum number of instructions a cpu can issue. */
7177 ia64_issue_rate (void)
7182 /* Helper function - like single_set, but look inside COND_EXEC. */
7185 ia64_single_set (rtx_insn
*insn
)
7187 rtx x
= PATTERN (insn
), ret
;
7188 if (GET_CODE (x
) == COND_EXEC
)
7189 x
= COND_EXEC_CODE (x
);
7190 if (GET_CODE (x
) == SET
)
7193 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7194 Although they are not classical single set, the second set is there just
7195 to protect it from moving past FP-relative stack accesses. */
7196 switch (recog_memoized (insn
))
7198 case CODE_FOR_prologue_allocate_stack
:
7199 case CODE_FOR_prologue_allocate_stack_pr
:
7200 case CODE_FOR_epilogue_deallocate_stack
:
7201 case CODE_FOR_epilogue_deallocate_stack_pr
:
7202 ret
= XVECEXP (x
, 0, 0);
7206 ret
= single_set_2 (insn
, x
);
7213 /* Adjust the cost of a scheduling dependency.
7214 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7215 COST is the current cost, DW is dependency weakness. */
7217 ia64_adjust_cost_2 (rtx_insn
*insn
, int dep_type1
, rtx_insn
*dep_insn
,
7220 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
7221 enum attr_itanium_class dep_class
;
7222 enum attr_itanium_class insn_class
;
7224 insn_class
= ia64_safe_itanium_class (insn
);
7225 dep_class
= ia64_safe_itanium_class (dep_insn
);
7227 /* Treat true memory dependencies separately. Ignore apparent true
7228 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7229 if (dep_type
== REG_DEP_TRUE
7230 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
7231 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
7234 if (dw
== MIN_DEP_WEAK
)
7235 /* Store and load are likely to alias, use higher cost to avoid stall. */
7236 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
7237 else if (dw
> MIN_DEP_WEAK
)
7239 /* Store and load are less likely to alias. */
7240 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
7241 /* Assume there will be no cache conflict for floating-point data.
7242 For integer data, L1 conflict penalty is huge (17 cycles), so we
7243 never assume it will not cause a conflict. */
7249 if (dep_type
!= REG_DEP_OUTPUT
)
7252 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
7253 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
7259 /* Like emit_insn_before, but skip cycle_display notes.
7260 ??? When cycle display notes are implemented, update this. */
7263 ia64_emit_insn_before (rtx insn
, rtx before
)
7265 emit_insn_before (insn
, before
);
7268 /* The following function marks insns who produce addresses for load
7269 and store insns. Such insns will be placed into M slots because it
7270 decrease latency time for Itanium1 (see function
7271 `ia64_produce_address_p' and the DFA descriptions). */
7274 ia64_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
7276 rtx_insn
*insn
, *next
, *next_tail
;
7278 /* Before reload, which_alternative is not set, which means that
7279 ia64_safe_itanium_class will produce wrong results for (at least)
7280 move instructions. */
7281 if (!reload_completed
)
7284 next_tail
= NEXT_INSN (tail
);
7285 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7288 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7290 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
7292 sd_iterator_def sd_it
;
7294 bool has_mem_op_consumer_p
= false;
7296 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
7298 enum attr_itanium_class c
;
7300 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
7303 next
= DEP_CON (dep
);
7304 c
= ia64_safe_itanium_class (next
);
7305 if ((c
== ITANIUM_CLASS_ST
7306 || c
== ITANIUM_CLASS_STF
)
7307 && ia64_st_address_bypass_p (insn
, next
))
7309 has_mem_op_consumer_p
= true;
7312 else if ((c
== ITANIUM_CLASS_LD
7313 || c
== ITANIUM_CLASS_FLD
7314 || c
== ITANIUM_CLASS_FLDP
)
7315 && ia64_ld_address_bypass_p (insn
, next
))
7317 has_mem_op_consumer_p
= true;
7322 insn
->call
= has_mem_op_consumer_p
;
7326 /* We're beginning a new block. Initialize data structures as necessary. */
7329 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7330 int sched_verbose ATTRIBUTE_UNUSED
,
7331 int max_ready ATTRIBUTE_UNUSED
)
7333 #ifdef ENABLE_CHECKING
7336 if (!sel_sched_p () && reload_completed
)
7337 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
7338 insn
!= current_sched_info
->next_tail
;
7339 insn
= NEXT_INSN (insn
))
7340 gcc_assert (!SCHED_GROUP_P (insn
));
7342 last_scheduled_insn
= NULL
;
7343 init_insn_group_barriers ();
7346 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7349 /* We're beginning a scheduling pass. Check assertion. */
7352 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7353 int sched_verbose ATTRIBUTE_UNUSED
,
7354 int max_ready ATTRIBUTE_UNUSED
)
7356 gcc_assert (pending_data_specs
== 0);
7359 /* Scheduling pass is now finished. Free/reset static variable. */
7361 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7362 int sched_verbose ATTRIBUTE_UNUSED
)
7364 gcc_assert (pending_data_specs
== 0);
7367 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7368 speculation check), FALSE otherwise. */
7370 is_load_p (rtx_insn
*insn
)
7372 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7375 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7376 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7379 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7380 (taking account for 3-cycle cache reference postponing for stores: Intel
7381 Itanium 2 Reference Manual for Software Development and Optimization,
7384 record_memory_reference (rtx_insn
*insn
)
7386 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7388 switch (insn_class
) {
7389 case ITANIUM_CLASS_FLD
:
7390 case ITANIUM_CLASS_LD
:
7391 mem_ops_in_group
[current_cycle
% 4]++;
7393 case ITANIUM_CLASS_STF
:
7394 case ITANIUM_CLASS_ST
:
7395 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7401 /* We are about to being issuing insns for this clock cycle.
7402 Override the default sort algorithm to better slot instructions. */
7405 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7406 int *pn_ready
, int clock_var
,
7410 int n_ready
= *pn_ready
;
7411 rtx_insn
**e_ready
= ready
+ n_ready
;
7415 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7417 if (reorder_type
== 0)
7419 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7421 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7422 if (insnp
< e_ready
)
7424 rtx_insn
*insn
= *insnp
;
7425 enum attr_type t
= ia64_safe_type (insn
);
7426 if (t
== TYPE_UNKNOWN
)
7428 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7429 || asm_noperands (PATTERN (insn
)) >= 0)
7431 rtx_insn
*lowest
= ready
[n_asms
];
7432 ready
[n_asms
] = insn
;
7438 rtx_insn
*highest
= ready
[n_ready
- 1];
7439 ready
[n_ready
- 1] = insn
;
7446 if (n_asms
< n_ready
)
7448 /* Some normal insns to process. Skip the asms. */
7452 else if (n_ready
> 0)
7456 if (ia64_final_schedule
)
7459 int nr_need_stop
= 0;
7461 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7462 if (safe_group_barrier_needed (*insnp
))
7465 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7467 if (reorder_type
== 0)
7470 /* Move down everything that needs a stop bit, preserving
7472 while (insnp
-- > ready
+ deleted
)
7473 while (insnp
>= ready
+ deleted
)
7475 rtx_insn
*insn
= *insnp
;
7476 if (! safe_group_barrier_needed (insn
))
7478 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7486 current_cycle
= clock_var
;
7487 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7492 /* Move down loads/stores, preserving relative order. */
7493 while (insnp
-- > ready
+ moved
)
7494 while (insnp
>= ready
+ moved
)
7496 rtx_insn
*insn
= *insnp
;
7497 if (! is_load_p (insn
))
7499 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7510 /* We are about to being issuing insns for this clock cycle. Override
7511 the default sort algorithm to better slot instructions. */
7514 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7515 int *pn_ready
, int clock_var
)
7517 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7518 pn_ready
, clock_var
, 0);
7521 /* Like ia64_sched_reorder, but called after issuing each insn.
7522 Override the default sort algorithm to better slot instructions. */
7525 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7526 int sched_verbose ATTRIBUTE_UNUSED
, rtx_insn
**ready
,
7527 int *pn_ready
, int clock_var
)
7529 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7533 /* We are about to issue INSN. Return the number of insns left on the
7534 ready queue that can be issued this cycle. */
7537 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7538 int sched_verbose ATTRIBUTE_UNUSED
,
7540 int can_issue_more ATTRIBUTE_UNUSED
)
7542 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7543 /* Modulo scheduling does not extend h_i_d when emitting
7544 new instructions. Don't use h_i_d, if we don't have to. */
7546 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7547 pending_data_specs
++;
7548 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7549 pending_data_specs
--;
7552 if (DEBUG_INSN_P (insn
))
7555 last_scheduled_insn
= insn
;
7556 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7557 if (reload_completed
)
7559 int needed
= group_barrier_needed (insn
);
7561 gcc_assert (!needed
);
7563 init_insn_group_barriers ();
7564 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7567 record_memory_reference (insn
);
7572 /* We are choosing insn from the ready queue. Return zero if INSN
7576 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
7578 gcc_assert (insn
&& INSN_P (insn
));
7580 /* Size of ALAT is 32. As far as we perform conservative
7581 data speculation, we keep ALAT half-empty. */
7582 if (pending_data_specs
>= 16 && (TODO_SPEC (insn
) & BEGIN_DATA
))
7583 return ready_index
== 0 ? -1 : 1;
7585 if (ready_index
== 0)
7588 if ((!reload_completed
7589 || !safe_group_barrier_needed (insn
))
7590 && (!mflag_sched_mem_insns_hard_limit
7591 || !is_load_p (insn
)
7592 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
))
7598 /* The following variable value is pseudo-insn used by the DFA insn
7599 scheduler to change the DFA state when the simulated clock is
7602 static rtx_insn
*dfa_pre_cycle_insn
;
7604 /* Returns 1 when a meaningful insn was scheduled between the last group
7605 barrier and LAST. */
7607 scheduled_good_insn (rtx_insn
*last
)
7609 if (last
&& recog_memoized (last
) >= 0)
7613 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7614 && !stops_p
[INSN_UID (last
)];
7615 last
= PREV_INSN (last
))
7616 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7617 the ebb we're scheduling. */
7618 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7624 /* We are about to being issuing INSN. Return nonzero if we cannot
7625 issue it on given cycle CLOCK and return zero if we should not sort
7626 the ready queue on the next clock start. */
7629 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx_insn
*insn
, int last_clock
,
7630 int clock
, int *sort_p
)
7632 gcc_assert (insn
&& INSN_P (insn
));
7634 if (DEBUG_INSN_P (insn
))
7637 /* When a group barrier is needed for insn, last_scheduled_insn
7639 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7640 || last_scheduled_insn
);
7642 if ((reload_completed
7643 && (safe_group_barrier_needed (insn
)
7644 || (mflag_sched_stop_bits_after_every_cycle
7645 && last_clock
!= clock
7646 && last_scheduled_insn
7647 && scheduled_good_insn (last_scheduled_insn
))))
7648 || (last_scheduled_insn
7649 && (CALL_P (last_scheduled_insn
)
7650 || unknown_for_bundling_p (last_scheduled_insn
))))
7652 init_insn_group_barriers ();
7654 if (verbose
&& dump
)
7655 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7656 last_clock
== clock
? " + cycle advance" : "");
7659 current_cycle
= clock
;
7660 mem_ops_in_group
[current_cycle
% 4] = 0;
7662 if (last_clock
== clock
)
7664 state_transition (curr_state
, dfa_stop_insn
);
7665 if (TARGET_EARLY_STOP_BITS
)
7666 *sort_p
= (last_scheduled_insn
== NULL_RTX
7667 || ! CALL_P (last_scheduled_insn
));
7673 if (last_scheduled_insn
)
7675 if (unknown_for_bundling_p (last_scheduled_insn
))
7676 state_reset (curr_state
);
7679 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7680 state_transition (curr_state
, dfa_stop_insn
);
7681 state_transition (curr_state
, dfa_pre_cycle_insn
);
7682 state_transition (curr_state
, NULL
);
7689 /* Implement targetm.sched.h_i_d_extended hook.
7690 Extend internal data structures. */
7692 ia64_h_i_d_extended (void)
7694 if (stops_p
!= NULL
)
7696 int new_clocks_length
= get_max_uid () * 3 / 2;
7697 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7698 clocks_length
= new_clocks_length
;
7703 /* This structure describes the data used by the backend to guide scheduling.
7704 When the current scheduling point is switched, this data should be saved
7705 and restored later, if the scheduler returns to this point. */
7706 struct _ia64_sched_context
7708 state_t prev_cycle_state
;
7709 rtx_insn
*last_scheduled_insn
;
7710 struct reg_write_state rws_sum
[NUM_REGS
];
7711 struct reg_write_state rws_insn
[NUM_REGS
];
7712 int first_instruction
;
7713 int pending_data_specs
;
7715 char mem_ops_in_group
[4];
7717 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7719 /* Allocates a scheduling context. */
7721 ia64_alloc_sched_context (void)
7723 return xmalloc (sizeof (struct _ia64_sched_context
));
7726 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7727 the global context otherwise. */
7729 ia64_init_sched_context (void *_sc
, bool clean_p
)
7731 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7733 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7736 state_reset (sc
->prev_cycle_state
);
7737 sc
->last_scheduled_insn
= NULL
;
7738 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7739 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7740 sc
->first_instruction
= 1;
7741 sc
->pending_data_specs
= 0;
7742 sc
->current_cycle
= 0;
7743 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7747 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7748 sc
->last_scheduled_insn
= last_scheduled_insn
;
7749 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7750 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7751 sc
->first_instruction
= first_instruction
;
7752 sc
->pending_data_specs
= pending_data_specs
;
7753 sc
->current_cycle
= current_cycle
;
7754 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7758 /* Sets the global scheduling context to the one pointed to by _SC. */
7760 ia64_set_sched_context (void *_sc
)
7762 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7764 gcc_assert (sc
!= NULL
);
7766 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7767 last_scheduled_insn
= sc
->last_scheduled_insn
;
7768 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7769 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7770 first_instruction
= sc
->first_instruction
;
7771 pending_data_specs
= sc
->pending_data_specs
;
7772 current_cycle
= sc
->current_cycle
;
7773 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7776 /* Clears the data in the _SC scheduling context. */
7778 ia64_clear_sched_context (void *_sc
)
7780 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7782 free (sc
->prev_cycle_state
);
7783 sc
->prev_cycle_state
= NULL
;
7786 /* Frees the _SC scheduling context. */
7788 ia64_free_sched_context (void *_sc
)
7790 gcc_assert (_sc
!= NULL
);
7795 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7797 /* Return a function that will generate a load of mode MODE_NO
7798 with speculation types TS. */
7800 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7802 static gen_func_t gen_ld_
[] = {
7812 gen_zero_extendqidi2
,
7813 gen_zero_extendhidi2
,
7814 gen_zero_extendsidi2
,
7817 static gen_func_t gen_ld_a
[] = {
7827 gen_zero_extendqidi2_advanced
,
7828 gen_zero_extendhidi2_advanced
,
7829 gen_zero_extendsidi2_advanced
,
7831 static gen_func_t gen_ld_s
[] = {
7832 gen_movbi_speculative
,
7833 gen_movqi_speculative
,
7834 gen_movhi_speculative
,
7835 gen_movsi_speculative
,
7836 gen_movdi_speculative
,
7837 gen_movsf_speculative
,
7838 gen_movdf_speculative
,
7839 gen_movxf_speculative
,
7840 gen_movti_speculative
,
7841 gen_zero_extendqidi2_speculative
,
7842 gen_zero_extendhidi2_speculative
,
7843 gen_zero_extendsidi2_speculative
,
7845 static gen_func_t gen_ld_sa
[] = {
7846 gen_movbi_speculative_advanced
,
7847 gen_movqi_speculative_advanced
,
7848 gen_movhi_speculative_advanced
,
7849 gen_movsi_speculative_advanced
,
7850 gen_movdi_speculative_advanced
,
7851 gen_movsf_speculative_advanced
,
7852 gen_movdf_speculative_advanced
,
7853 gen_movxf_speculative_advanced
,
7854 gen_movti_speculative_advanced
,
7855 gen_zero_extendqidi2_speculative_advanced
,
7856 gen_zero_extendhidi2_speculative_advanced
,
7857 gen_zero_extendsidi2_speculative_advanced
,
7859 static gen_func_t gen_ld_s_a
[] = {
7860 gen_movbi_speculative_a
,
7861 gen_movqi_speculative_a
,
7862 gen_movhi_speculative_a
,
7863 gen_movsi_speculative_a
,
7864 gen_movdi_speculative_a
,
7865 gen_movsf_speculative_a
,
7866 gen_movdf_speculative_a
,
7867 gen_movxf_speculative_a
,
7868 gen_movti_speculative_a
,
7869 gen_zero_extendqidi2_speculative_a
,
7870 gen_zero_extendhidi2_speculative_a
,
7871 gen_zero_extendsidi2_speculative_a
,
7876 if (ts
& BEGIN_DATA
)
7878 if (ts
& BEGIN_CONTROL
)
7883 else if (ts
& BEGIN_CONTROL
)
7885 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7886 || ia64_needs_block_p (ts
))
7889 gen_ld
= gen_ld_s_a
;
7896 return gen_ld
[mode_no
];
7899 /* Constants that help mapping 'machine_mode' to int. */
7902 SPEC_MODE_INVALID
= -1,
7903 SPEC_MODE_FIRST
= 0,
7904 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7905 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7911 /* Offset to reach ZERO_EXTEND patterns. */
7912 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7915 /* Return index of the MODE. */
7917 ia64_mode_to_int (machine_mode mode
)
7921 case BImode
: return 0; /* SPEC_MODE_FIRST */
7922 case QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7923 case HImode
: return 2;
7924 case SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7925 case DImode
: return 4;
7926 case SFmode
: return 5;
7927 case DFmode
: return 6;
7928 case XFmode
: return 7;
7930 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7931 mentioned in itanium[12].md. Predicate fp_register_operand also
7932 needs to be defined. Bottom line: better disable for now. */
7933 return SPEC_MODE_INVALID
;
7934 default: return SPEC_MODE_INVALID
;
7938 /* Provide information about speculation capabilities. */
7940 ia64_set_sched_flags (spec_info_t spec_info
)
7942 unsigned int *flags
= &(current_sched_info
->flags
);
7944 if (*flags
& SCHED_RGN
7945 || *flags
& SCHED_EBB
7946 || *flags
& SEL_SCHED
)
7950 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
7951 || (mflag_sched_ar_data_spec
&& reload_completed
))
7956 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
7957 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
7961 if (mflag_sched_control_spec
7963 || reload_completed
))
7965 mask
|= BEGIN_CONTROL
;
7967 if (!sel_sched_p () && mflag_sched_in_control_spec
)
7968 mask
|= BE_IN_CONTROL
;
7971 spec_info
->mask
= mask
;
7975 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
7977 if (mask
& BE_IN_SPEC
)
7980 spec_info
->flags
= 0;
7982 if ((mask
& CONTROL_SPEC
)
7983 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
7984 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
7986 if (sched_verbose
>= 1)
7987 spec_info
->dump
= sched_dump
;
7989 spec_info
->dump
= 0;
7991 if (mflag_sched_count_spec_in_critical_path
)
7992 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
7996 spec_info
->mask
= 0;
7999 /* If INSN is an appropriate load return its mode.
8000 Return -1 otherwise. */
8002 get_mode_no_for_insn (rtx_insn
*insn
)
8004 rtx reg
, mem
, mode_rtx
;
8008 extract_insn_cached (insn
);
8010 /* We use WHICH_ALTERNATIVE only after reload. This will
8011 guarantee that reload won't touch a speculative insn. */
8013 if (recog_data
.n_operands
!= 2)
8016 reg
= recog_data
.operand
[0];
8017 mem
= recog_data
.operand
[1];
8019 /* We should use MEM's mode since REG's mode in presence of
8020 ZERO_EXTEND will always be DImode. */
8021 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
8022 /* Process non-speculative ld. */
8024 if (!reload_completed
)
8026 /* Do not speculate into regs like ar.lc. */
8027 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
8034 rtx mem_reg
= XEXP (mem
, 0);
8036 if (!REG_P (mem_reg
))
8042 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
8044 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8050 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
8051 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
8052 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
8053 /* Process speculative ld or ld.c. */
8055 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8060 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
8062 if (attr_class
== ITANIUM_CLASS_CHK_A
8063 || attr_class
== ITANIUM_CLASS_CHK_S_I
8064 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
8071 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
8073 if (mode_no
== SPEC_MODE_INVALID
)
8076 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
8080 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
8081 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
8084 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
8090 /* If X is an unspec part of a speculative load, return its code.
8091 Return -1 otherwise. */
8093 get_spec_unspec_code (const_rtx x
)
8095 if (GET_CODE (x
) != UNSPEC
)
8117 /* Implement skip_rtx_p hook. */
8119 ia64_skip_rtx_p (const_rtx x
)
8121 return get_spec_unspec_code (x
) != -1;
8124 /* If INSN is a speculative load, return its UNSPEC code.
8125 Return -1 otherwise. */
8127 get_insn_spec_code (const_rtx insn
)
8131 pat
= PATTERN (insn
);
8133 if (GET_CODE (pat
) == COND_EXEC
)
8134 pat
= COND_EXEC_CODE (pat
);
8136 if (GET_CODE (pat
) != SET
)
8139 reg
= SET_DEST (pat
);
8143 mem
= SET_SRC (pat
);
8144 if (GET_CODE (mem
) == ZERO_EXTEND
)
8145 mem
= XEXP (mem
, 0);
8147 return get_spec_unspec_code (mem
);
8150 /* If INSN is a speculative load, return a ds with the speculation types.
8151 Otherwise [if INSN is a normal instruction] return 0. */
8153 ia64_get_insn_spec_ds (rtx_insn
*insn
)
8155 int code
= get_insn_spec_code (insn
);
8164 return BEGIN_CONTROL
;
8167 return BEGIN_DATA
| BEGIN_CONTROL
;
8174 /* If INSN is a speculative load return a ds with the speculation types that
8176 Otherwise [if INSN is a normal instruction] return 0. */
8178 ia64_get_insn_checked_ds (rtx_insn
*insn
)
8180 int code
= get_insn_spec_code (insn
);
8185 return BEGIN_DATA
| BEGIN_CONTROL
;
8188 return BEGIN_CONTROL
;
8192 return BEGIN_DATA
| BEGIN_CONTROL
;
8199 /* If GEN_P is true, calculate the index of needed speculation check and return
8200 speculative pattern for INSN with speculative mode TS, machine mode
8201 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8202 If GEN_P is false, just calculate the index of needed speculation check. */
8204 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
8207 gen_func_t gen_load
;
8209 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
8211 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
8212 copy_rtx (recog_data
.operand
[1]));
8214 pat
= PATTERN (insn
);
8215 if (GET_CODE (pat
) == COND_EXEC
)
8216 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8223 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
8224 ds_t ds ATTRIBUTE_UNUSED
)
8229 /* Implement targetm.sched.speculate_insn hook.
8230 Check if the INSN can be TS speculative.
8231 If 'no' - return -1.
8232 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8233 If current pattern of the INSN already provides TS speculation,
8236 ia64_speculate_insn (rtx_insn
*insn
, ds_t ts
, rtx
*new_pat
)
8241 gcc_assert (!(ts
& ~SPECULATIVE
));
8243 if (ia64_spec_check_p (insn
))
8246 if ((ts
& BE_IN_SPEC
)
8247 && !insn_can_be_in_speculative_p (insn
, ts
))
8250 mode_no
= get_mode_no_for_insn (insn
);
8252 if (mode_no
!= SPEC_MODE_INVALID
)
8254 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
8259 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
8268 /* Return a function that will generate a check for speculation TS with mode
8270 If simple check is needed, pass true for SIMPLE_CHECK_P.
8271 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8273 get_spec_check_gen_function (ds_t ts
, int mode_no
,
8274 bool simple_check_p
, bool clearing_check_p
)
8276 static gen_func_t gen_ld_c_clr
[] = {
8286 gen_zero_extendqidi2_clr
,
8287 gen_zero_extendhidi2_clr
,
8288 gen_zero_extendsidi2_clr
,
8290 static gen_func_t gen_ld_c_nc
[] = {
8300 gen_zero_extendqidi2_nc
,
8301 gen_zero_extendhidi2_nc
,
8302 gen_zero_extendsidi2_nc
,
8304 static gen_func_t gen_chk_a_clr
[] = {
8305 gen_advanced_load_check_clr_bi
,
8306 gen_advanced_load_check_clr_qi
,
8307 gen_advanced_load_check_clr_hi
,
8308 gen_advanced_load_check_clr_si
,
8309 gen_advanced_load_check_clr_di
,
8310 gen_advanced_load_check_clr_sf
,
8311 gen_advanced_load_check_clr_df
,
8312 gen_advanced_load_check_clr_xf
,
8313 gen_advanced_load_check_clr_ti
,
8314 gen_advanced_load_check_clr_di
,
8315 gen_advanced_load_check_clr_di
,
8316 gen_advanced_load_check_clr_di
,
8318 static gen_func_t gen_chk_a_nc
[] = {
8319 gen_advanced_load_check_nc_bi
,
8320 gen_advanced_load_check_nc_qi
,
8321 gen_advanced_load_check_nc_hi
,
8322 gen_advanced_load_check_nc_si
,
8323 gen_advanced_load_check_nc_di
,
8324 gen_advanced_load_check_nc_sf
,
8325 gen_advanced_load_check_nc_df
,
8326 gen_advanced_load_check_nc_xf
,
8327 gen_advanced_load_check_nc_ti
,
8328 gen_advanced_load_check_nc_di
,
8329 gen_advanced_load_check_nc_di
,
8330 gen_advanced_load_check_nc_di
,
8332 static gen_func_t gen_chk_s
[] = {
8333 gen_speculation_check_bi
,
8334 gen_speculation_check_qi
,
8335 gen_speculation_check_hi
,
8336 gen_speculation_check_si
,
8337 gen_speculation_check_di
,
8338 gen_speculation_check_sf
,
8339 gen_speculation_check_df
,
8340 gen_speculation_check_xf
,
8341 gen_speculation_check_ti
,
8342 gen_speculation_check_di
,
8343 gen_speculation_check_di
,
8344 gen_speculation_check_di
,
8347 gen_func_t
*gen_check
;
8349 if (ts
& BEGIN_DATA
)
8351 /* We don't need recovery because even if this is ld.sa
8352 ALAT entry will be allocated only if NAT bit is set to zero.
8353 So it is enough to use ld.c here. */
8357 gcc_assert (mflag_sched_spec_ldc
);
8359 if (clearing_check_p
)
8360 gen_check
= gen_ld_c_clr
;
8362 gen_check
= gen_ld_c_nc
;
8366 if (clearing_check_p
)
8367 gen_check
= gen_chk_a_clr
;
8369 gen_check
= gen_chk_a_nc
;
8372 else if (ts
& BEGIN_CONTROL
)
8375 /* We might want to use ld.sa -> ld.c instead of
8378 gcc_assert (!ia64_needs_block_p (ts
));
8380 if (clearing_check_p
)
8381 gen_check
= gen_ld_c_clr
;
8383 gen_check
= gen_ld_c_nc
;
8387 gen_check
= gen_chk_s
;
8393 gcc_assert (mode_no
>= 0);
8394 return gen_check
[mode_no
];
8397 /* Return nonzero, if INSN needs branchy recovery check. */
8399 ia64_needs_block_p (ds_t ts
)
8401 if (ts
& BEGIN_DATA
)
8402 return !mflag_sched_spec_ldc
;
8404 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8406 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8409 /* Generate (or regenerate) a recovery check for INSN. */
8411 ia64_gen_spec_check (rtx_insn
*insn
, rtx_insn
*label
, ds_t ds
)
8413 rtx op1
, pat
, check_pat
;
8414 gen_func_t gen_check
;
8417 mode_no
= get_mode_no_for_insn (insn
);
8418 gcc_assert (mode_no
>= 0);
8424 gcc_assert (!ia64_needs_block_p (ds
));
8425 op1
= copy_rtx (recog_data
.operand
[1]);
8428 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8431 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8433 pat
= PATTERN (insn
);
8434 if (GET_CODE (pat
) == COND_EXEC
)
8435 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8441 /* Return nonzero, if X is branchy recovery check. */
8443 ia64_spec_check_p (rtx x
)
8446 if (GET_CODE (x
) == COND_EXEC
)
8447 x
= COND_EXEC_CODE (x
);
8448 if (GET_CODE (x
) == SET
)
8449 return ia64_spec_check_src_p (SET_SRC (x
));
8453 /* Return nonzero, if SRC belongs to recovery check. */
8455 ia64_spec_check_src_p (rtx src
)
8457 if (GET_CODE (src
) == IF_THEN_ELSE
)
8462 if (GET_CODE (t
) == NE
)
8466 if (GET_CODE (t
) == UNSPEC
)
8472 if (code
== UNSPEC_LDCCLR
8473 || code
== UNSPEC_LDCNC
8474 || code
== UNSPEC_CHKACLR
8475 || code
== UNSPEC_CHKANC
8476 || code
== UNSPEC_CHKS
)
8478 gcc_assert (code
!= 0);
8488 /* The following page contains abstract data `bundle states' which are
8489 used for bundling insns (inserting nops and template generation). */
8491 /* The following describes state of insn bundling. */
8495 /* Unique bundle state number to identify them in the debugging
8498 rtx_insn
*insn
; /* corresponding insn, NULL for the 1st and the last state */
8499 /* number nops before and after the insn */
8500 short before_nops_num
, after_nops_num
;
8501 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8503 int cost
; /* cost of the state in cycles */
8504 int accumulated_insns_num
; /* number of all previous insns including
8505 nops. L is considered as 2 insns */
8506 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8507 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8508 struct bundle_state
*next
; /* next state with the same insn_num */
8509 struct bundle_state
*originator
; /* originator (previous insn state) */
8510 /* All bundle states are in the following chain. */
8511 struct bundle_state
*allocated_states_chain
;
8512 /* The DFA State after issuing the insn and the nops. */
8516 /* The following is map insn number to the corresponding bundle state. */
8518 static struct bundle_state
**index_to_bundle_states
;
8520 /* The unique number of next bundle state. */
8522 static int bundle_states_num
;
8524 /* All allocated bundle states are in the following chain. */
8526 static struct bundle_state
*allocated_bundle_states_chain
;
8528 /* All allocated but not used bundle states are in the following
8531 static struct bundle_state
*free_bundle_state_chain
;
8534 /* The following function returns a free bundle state. */
8536 static struct bundle_state
*
8537 get_free_bundle_state (void)
8539 struct bundle_state
*result
;
8541 if (free_bundle_state_chain
!= NULL
)
8543 result
= free_bundle_state_chain
;
8544 free_bundle_state_chain
= result
->next
;
8548 result
= XNEW (struct bundle_state
);
8549 result
->dfa_state
= xmalloc (dfa_state_size
);
8550 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8551 allocated_bundle_states_chain
= result
;
8553 result
->unique_num
= bundle_states_num
++;
8558 /* The following function frees given bundle state. */
8561 free_bundle_state (struct bundle_state
*state
)
8563 state
->next
= free_bundle_state_chain
;
8564 free_bundle_state_chain
= state
;
8567 /* Start work with abstract data `bundle states'. */
8570 initiate_bundle_states (void)
8572 bundle_states_num
= 0;
8573 free_bundle_state_chain
= NULL
;
8574 allocated_bundle_states_chain
= NULL
;
8577 /* Finish work with abstract data `bundle states'. */
8580 finish_bundle_states (void)
8582 struct bundle_state
*curr_state
, *next_state
;
8584 for (curr_state
= allocated_bundle_states_chain
;
8586 curr_state
= next_state
)
8588 next_state
= curr_state
->allocated_states_chain
;
8589 free (curr_state
->dfa_state
);
8594 /* Hashtable helpers. */
8596 struct bundle_state_hasher
: typed_noop_remove
<bundle_state
>
8598 typedef bundle_state
*value_type
;
8599 typedef bundle_state
*compare_type
;
8600 static inline hashval_t
hash (const bundle_state
*);
8601 static inline bool equal (const bundle_state
*, const bundle_state
*);
8604 /* The function returns hash of BUNDLE_STATE. */
8607 bundle_state_hasher::hash (const bundle_state
*state
)
8611 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8612 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8613 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8614 return result
+ state
->insn_num
;
8617 /* The function returns nonzero if the bundle state keys are equal. */
8620 bundle_state_hasher::equal (const bundle_state
*state1
,
8621 const bundle_state
*state2
)
8623 return (state1
->insn_num
== state2
->insn_num
8624 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8625 dfa_state_size
) == 0);
8628 /* Hash table of the bundle states. The key is dfa_state and insn_num
8629 of the bundle states. */
8631 static hash_table
<bundle_state_hasher
> *bundle_state_table
;
8633 /* The function inserts the BUNDLE_STATE into the hash table. The
8634 function returns nonzero if the bundle has been inserted into the
8635 table. The table contains the best bundle state with given key. */
8638 insert_bundle_state (struct bundle_state
*bundle_state
)
8640 struct bundle_state
**entry_ptr
;
8642 entry_ptr
= bundle_state_table
->find_slot (bundle_state
, INSERT
);
8643 if (*entry_ptr
== NULL
)
8645 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8646 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8647 *entry_ptr
= bundle_state
;
8650 else if (bundle_state
->cost
< (*entry_ptr
)->cost
8651 || (bundle_state
->cost
== (*entry_ptr
)->cost
8652 && ((*entry_ptr
)->accumulated_insns_num
8653 > bundle_state
->accumulated_insns_num
8654 || ((*entry_ptr
)->accumulated_insns_num
8655 == bundle_state
->accumulated_insns_num
8656 && ((*entry_ptr
)->branch_deviation
8657 > bundle_state
->branch_deviation
8658 || ((*entry_ptr
)->branch_deviation
8659 == bundle_state
->branch_deviation
8660 && (*entry_ptr
)->middle_bundle_stops
8661 > bundle_state
->middle_bundle_stops
))))))
8664 struct bundle_state temp
;
8667 **entry_ptr
= *bundle_state
;
8668 (*entry_ptr
)->next
= temp
.next
;
8669 *bundle_state
= temp
;
8674 /* Start work with the hash table. */
8677 initiate_bundle_state_table (void)
8679 bundle_state_table
= new hash_table
<bundle_state_hasher
> (50);
8682 /* Finish work with the hash table. */
8685 finish_bundle_state_table (void)
8687 delete bundle_state_table
;
8688 bundle_state_table
= NULL
;
8693 /* The following variable is a insn `nop' used to check bundle states
8694 with different number of inserted nops. */
8696 static rtx_insn
*ia64_nop
;
8698 /* The following function tries to issue NOPS_NUM nops for the current
8699 state without advancing processor cycle. If it failed, the
8700 function returns FALSE and frees the current state. */
8703 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8707 for (i
= 0; i
< nops_num
; i
++)
8708 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8710 free_bundle_state (curr_state
);
8716 /* The following function tries to issue INSN for the current
8717 state without advancing processor cycle. If it failed, the
8718 function returns FALSE and frees the current state. */
8721 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8723 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8725 free_bundle_state (curr_state
);
8731 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8732 starting with ORIGINATOR without advancing processor cycle. If
8733 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8734 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8735 If it was successful, the function creates new bundle state and
8736 insert into the hash table and into `index_to_bundle_states'. */
8739 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8740 rtx_insn
*insn
, int try_bundle_end_p
,
8741 int only_bundle_end_p
)
8743 struct bundle_state
*curr_state
;
8745 curr_state
= get_free_bundle_state ();
8746 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8747 curr_state
->insn
= insn
;
8748 curr_state
->insn_num
= originator
->insn_num
+ 1;
8749 curr_state
->cost
= originator
->cost
;
8750 curr_state
->originator
= originator
;
8751 curr_state
->before_nops_num
= before_nops_num
;
8752 curr_state
->after_nops_num
= 0;
8753 curr_state
->accumulated_insns_num
8754 = originator
->accumulated_insns_num
+ before_nops_num
;
8755 curr_state
->branch_deviation
= originator
->branch_deviation
;
8756 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8758 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8760 gcc_assert (GET_MODE (insn
) != TImode
);
8761 if (!try_issue_nops (curr_state
, before_nops_num
))
8763 if (!try_issue_insn (curr_state
, insn
))
8765 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8766 if (curr_state
->accumulated_insns_num
% 3 != 0)
8767 curr_state
->middle_bundle_stops
++;
8768 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8769 && curr_state
->accumulated_insns_num
% 3 != 0)
8771 free_bundle_state (curr_state
);
8775 else if (GET_MODE (insn
) != TImode
)
8777 if (!try_issue_nops (curr_state
, before_nops_num
))
8779 if (!try_issue_insn (curr_state
, insn
))
8781 curr_state
->accumulated_insns_num
++;
8782 gcc_assert (!unknown_for_bundling_p (insn
));
8784 if (ia64_safe_type (insn
) == TYPE_L
)
8785 curr_state
->accumulated_insns_num
++;
8789 /* If this is an insn that must be first in a group, then don't allow
8790 nops to be emitted before it. Currently, alloc is the only such
8791 supported instruction. */
8792 /* ??? The bundling automatons should handle this for us, but they do
8793 not yet have support for the first_insn attribute. */
8794 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8796 free_bundle_state (curr_state
);
8800 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8801 state_transition (curr_state
->dfa_state
, NULL
);
8803 if (!try_issue_nops (curr_state
, before_nops_num
))
8805 if (!try_issue_insn (curr_state
, insn
))
8807 curr_state
->accumulated_insns_num
++;
8808 if (unknown_for_bundling_p (insn
))
8810 /* Finish bundle containing asm insn. */
8811 curr_state
->after_nops_num
8812 = 3 - curr_state
->accumulated_insns_num
% 3;
8813 curr_state
->accumulated_insns_num
8814 += 3 - curr_state
->accumulated_insns_num
% 3;
8816 else if (ia64_safe_type (insn
) == TYPE_L
)
8817 curr_state
->accumulated_insns_num
++;
8819 if (ia64_safe_type (insn
) == TYPE_B
)
8820 curr_state
->branch_deviation
8821 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8822 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8824 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8827 struct bundle_state
*curr_state1
;
8828 struct bundle_state
*allocated_states_chain
;
8830 curr_state1
= get_free_bundle_state ();
8831 dfa_state
= curr_state1
->dfa_state
;
8832 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8833 *curr_state1
= *curr_state
;
8834 curr_state1
->dfa_state
= dfa_state
;
8835 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8836 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8838 curr_state
= curr_state1
;
8840 if (!try_issue_nops (curr_state
,
8841 3 - curr_state
->accumulated_insns_num
% 3))
8843 curr_state
->after_nops_num
8844 = 3 - curr_state
->accumulated_insns_num
% 3;
8845 curr_state
->accumulated_insns_num
8846 += 3 - curr_state
->accumulated_insns_num
% 3;
8848 if (!insert_bundle_state (curr_state
))
8849 free_bundle_state (curr_state
);
8853 /* The following function returns position in the two window bundle
8857 get_max_pos (state_t state
)
8859 if (cpu_unit_reservation_p (state
, pos_6
))
8861 else if (cpu_unit_reservation_p (state
, pos_5
))
8863 else if (cpu_unit_reservation_p (state
, pos_4
))
8865 else if (cpu_unit_reservation_p (state
, pos_3
))
8867 else if (cpu_unit_reservation_p (state
, pos_2
))
8869 else if (cpu_unit_reservation_p (state
, pos_1
))
8875 /* The function returns code of a possible template for given position
8876 and state. The function should be called only with 2 values of
8877 position equal to 3 or 6. We avoid generating F NOPs by putting
8878 templates containing F insns at the end of the template search
8879 because undocumented anomaly in McKinley derived cores which can
8880 cause stalls if an F-unit insn (including a NOP) is issued within a
8881 six-cycle window after reading certain application registers (such
8882 as ar.bsp). Furthermore, power-considerations also argue against
8883 the use of F-unit instructions unless they're really needed. */
8886 get_template (state_t state
, int pos
)
8891 if (cpu_unit_reservation_p (state
, _0mmi_
))
8893 else if (cpu_unit_reservation_p (state
, _0mii_
))
8895 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8897 else if (cpu_unit_reservation_p (state
, _0mib_
))
8899 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8901 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8903 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8905 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8907 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8909 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8914 if (cpu_unit_reservation_p (state
, _1mmi_
))
8916 else if (cpu_unit_reservation_p (state
, _1mii_
))
8918 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8920 else if (cpu_unit_reservation_p (state
, _1mib_
))
8922 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8924 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8926 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8928 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8930 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8932 else if (cpu_unit_reservation_p (state
, _1mlx_
))
8941 /* True when INSN is important for bundling. */
8944 important_for_bundling_p (rtx_insn
*insn
)
8946 return (INSN_P (insn
)
8947 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8948 && GET_CODE (PATTERN (insn
)) != USE
8949 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8952 /* The following function returns an insn important for insn bundling
8953 followed by INSN and before TAIL. */
8956 get_next_important_insn (rtx_insn
*insn
, rtx_insn
*tail
)
8958 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
8959 if (important_for_bundling_p (insn
))
8964 /* True when INSN is unknown, but important, for bundling. */
8967 unknown_for_bundling_p (rtx_insn
*insn
)
8969 return (INSN_P (insn
)
8970 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_UNKNOWN
8971 && GET_CODE (PATTERN (insn
)) != USE
8972 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8975 /* Add a bundle selector TEMPLATE0 before INSN. */
8978 ia64_add_bundle_selector_before (int template0
, rtx_insn
*insn
)
8980 rtx b
= gen_bundle_selector (GEN_INT (template0
));
8982 ia64_emit_insn_before (b
, insn
);
8983 #if NR_BUNDLES == 10
8984 if ((template0
== 4 || template0
== 5)
8985 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
8988 rtx note
= NULL_RTX
;
8990 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8991 first or second slot. If it is and has REG_EH_NOTE set, copy it
8992 to following nops, as br.call sets rp to the address of following
8993 bundle and therefore an EH region end must be on a bundle
8995 insn
= PREV_INSN (insn
);
8996 for (i
= 0; i
< 3; i
++)
8999 insn
= next_active_insn (insn
);
9000 while (NONJUMP_INSN_P (insn
)
9001 && get_attr_empty (insn
) == EMPTY_YES
);
9003 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
9008 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
9009 || code
== CODE_FOR_nop_b
);
9010 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
9013 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
9020 /* The following function does insn bundling. Bundling means
9021 inserting templates and nop insns to fit insn groups into permitted
9022 templates. Instruction scheduling uses NDFA (non-deterministic
9023 finite automata) encoding informations about the templates and the
9024 inserted nops. Nondeterminism of the automata permits follows
9025 all possible insn sequences very fast.
9027 Unfortunately it is not possible to get information about inserting
9028 nop insns and used templates from the automata states. The
9029 automata only says that we can issue an insn possibly inserting
9030 some nops before it and using some template. Therefore insn
9031 bundling in this function is implemented by using DFA
9032 (deterministic finite automata). We follow all possible insn
9033 sequences by inserting 0-2 nops (that is what the NDFA describe for
9034 insn scheduling) before/after each insn being bundled. We know the
9035 start of simulated processor cycle from insn scheduling (insn
9036 starting a new cycle has TImode).
9038 Simple implementation of insn bundling would create enormous
9039 number of possible insn sequences satisfying information about new
9040 cycle ticks taken from the insn scheduling. To make the algorithm
9041 practical we use dynamic programming. Each decision (about
9042 inserting nops and implicitly about previous decisions) is described
9043 by structure bundle_state (see above). If we generate the same
9044 bundle state (key is automaton state after issuing the insns and
9045 nops for it), we reuse already generated one. As consequence we
9046 reject some decisions which cannot improve the solution and
9047 reduce memory for the algorithm.
9049 When we reach the end of EBB (extended basic block), we choose the
9050 best sequence and then, moving back in EBB, insert templates for
9051 the best alternative. The templates are taken from querying
9052 automaton state for each insn in chosen bundle states.
9054 So the algorithm makes two (forward and backward) passes through
9058 bundling (FILE *dump
, int verbose
, rtx_insn
*prev_head_insn
, rtx_insn
*tail
)
9060 struct bundle_state
*curr_state
, *next_state
, *best_state
;
9061 rtx_insn
*insn
, *next_insn
;
9063 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
9064 int pos
= 0, max_pos
, template0
, template1
;
9066 enum attr_type type
;
9069 /* Count insns in the EBB. */
9070 for (insn
= NEXT_INSN (prev_head_insn
);
9071 insn
&& insn
!= tail
;
9072 insn
= NEXT_INSN (insn
))
9078 dfa_clean_insn_cache ();
9079 initiate_bundle_state_table ();
9080 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
9081 /* First (forward) pass -- generation of bundle states. */
9082 curr_state
= get_free_bundle_state ();
9083 curr_state
->insn
= NULL
;
9084 curr_state
->before_nops_num
= 0;
9085 curr_state
->after_nops_num
= 0;
9086 curr_state
->insn_num
= 0;
9087 curr_state
->cost
= 0;
9088 curr_state
->accumulated_insns_num
= 0;
9089 curr_state
->branch_deviation
= 0;
9090 curr_state
->middle_bundle_stops
= 0;
9091 curr_state
->next
= NULL
;
9092 curr_state
->originator
= NULL
;
9093 state_reset (curr_state
->dfa_state
);
9094 index_to_bundle_states
[0] = curr_state
;
9096 /* Shift cycle mark if it is put on insn which could be ignored. */
9097 for (insn
= NEXT_INSN (prev_head_insn
);
9099 insn
= NEXT_INSN (insn
))
9101 && !important_for_bundling_p (insn
)
9102 && GET_MODE (insn
) == TImode
)
9104 PUT_MODE (insn
, VOIDmode
);
9105 for (next_insn
= NEXT_INSN (insn
);
9107 next_insn
= NEXT_INSN (next_insn
))
9108 if (important_for_bundling_p (next_insn
)
9109 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
9111 PUT_MODE (next_insn
, TImode
);
9115 /* Forward pass: generation of bundle states. */
9116 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
9120 gcc_assert (important_for_bundling_p (insn
));
9121 type
= ia64_safe_type (insn
);
9122 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
9124 index_to_bundle_states
[insn_num
] = NULL
;
9125 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
9127 curr_state
= next_state
)
9129 pos
= curr_state
->accumulated_insns_num
% 3;
9130 next_state
= curr_state
->next
;
9131 /* We must fill up the current bundle in order to start a
9132 subsequent asm insn in a new bundle. Asm insn is always
9133 placed in a separate bundle. */
9135 = (next_insn
!= NULL_RTX
9136 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
9137 && unknown_for_bundling_p (next_insn
));
9138 /* We may fill up the current bundle if it is the cycle end
9139 without a group barrier. */
9141 = (only_bundle_end_p
|| next_insn
== NULL_RTX
9142 || (GET_MODE (next_insn
) == TImode
9143 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
9144 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
9146 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
9148 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
9150 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
9153 gcc_assert (index_to_bundle_states
[insn_num
]);
9154 for (curr_state
= index_to_bundle_states
[insn_num
];
9156 curr_state
= curr_state
->next
)
9157 if (verbose
>= 2 && dump
)
9159 /* This structure is taken from generated code of the
9160 pipeline hazard recognizer (see file insn-attrtab.c).
9161 Please don't forget to change the structure if a new
9162 automaton is added to .md file. */
9165 unsigned short one_automaton_state
;
9166 unsigned short oneb_automaton_state
;
9167 unsigned short two_automaton_state
;
9168 unsigned short twob_automaton_state
;
9173 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9174 curr_state
->unique_num
,
9175 (curr_state
->originator
== NULL
9176 ? -1 : curr_state
->originator
->unique_num
),
9178 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9179 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9180 curr_state
->middle_bundle_stops
,
9181 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9186 /* We should find a solution because the 2nd insn scheduling has
9188 gcc_assert (index_to_bundle_states
[insn_num
]);
9189 /* Find a state corresponding to the best insn sequence. */
9191 for (curr_state
= index_to_bundle_states
[insn_num
];
9193 curr_state
= curr_state
->next
)
9194 /* We are just looking at the states with fully filled up last
9195 bundle. The first we prefer insn sequences with minimal cost
9196 then with minimal inserted nops and finally with branch insns
9197 placed in the 3rd slots. */
9198 if (curr_state
->accumulated_insns_num
% 3 == 0
9199 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
9200 || (best_state
->cost
== curr_state
->cost
9201 && (curr_state
->accumulated_insns_num
9202 < best_state
->accumulated_insns_num
9203 || (curr_state
->accumulated_insns_num
9204 == best_state
->accumulated_insns_num
9205 && (curr_state
->branch_deviation
9206 < best_state
->branch_deviation
9207 || (curr_state
->branch_deviation
9208 == best_state
->branch_deviation
9209 && curr_state
->middle_bundle_stops
9210 < best_state
->middle_bundle_stops
)))))))
9211 best_state
= curr_state
;
9212 /* Second (backward) pass: adding nops and templates. */
9213 gcc_assert (best_state
);
9214 insn_num
= best_state
->before_nops_num
;
9215 template0
= template1
= -1;
9216 for (curr_state
= best_state
;
9217 curr_state
->originator
!= NULL
;
9218 curr_state
= curr_state
->originator
)
9220 insn
= curr_state
->insn
;
9221 asm_p
= unknown_for_bundling_p (insn
);
9223 if (verbose
>= 2 && dump
)
9227 unsigned short one_automaton_state
;
9228 unsigned short oneb_automaton_state
;
9229 unsigned short two_automaton_state
;
9230 unsigned short twob_automaton_state
;
9235 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9236 curr_state
->unique_num
,
9237 (curr_state
->originator
== NULL
9238 ? -1 : curr_state
->originator
->unique_num
),
9240 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9241 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9242 curr_state
->middle_bundle_stops
,
9243 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9246 /* Find the position in the current bundle window. The window can
9247 contain at most two bundles. Two bundle window means that
9248 the processor will make two bundle rotation. */
9249 max_pos
= get_max_pos (curr_state
->dfa_state
);
9251 /* The following (negative template number) means that the
9252 processor did one bundle rotation. */
9253 || (max_pos
== 3 && template0
< 0))
9255 /* We are at the end of the window -- find template(s) for
9259 template0
= get_template (curr_state
->dfa_state
, 3);
9262 template1
= get_template (curr_state
->dfa_state
, 3);
9263 template0
= get_template (curr_state
->dfa_state
, 6);
9266 if (max_pos
> 3 && template1
< 0)
9267 /* It may happen when we have the stop inside a bundle. */
9269 gcc_assert (pos
<= 3);
9270 template1
= get_template (curr_state
->dfa_state
, 3);
9274 /* Emit nops after the current insn. */
9275 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
9277 rtx nop_pat
= gen_nop ();
9278 rtx_insn
*nop
= emit_insn_after (nop_pat
, insn
);
9280 gcc_assert (pos
>= 0);
9283 /* We are at the start of a bundle: emit the template
9284 (it should be defined). */
9285 gcc_assert (template0
>= 0);
9286 ia64_add_bundle_selector_before (template0
, nop
);
9287 /* If we have two bundle window, we make one bundle
9288 rotation. Otherwise template0 will be undefined
9289 (negative value). */
9290 template0
= template1
;
9294 /* Move the position backward in the window. Group barrier has
9295 no slot. Asm insn takes all bundle. */
9296 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9297 && !unknown_for_bundling_p (insn
))
9299 /* Long insn takes 2 slots. */
9300 if (ia64_safe_type (insn
) == TYPE_L
)
9302 gcc_assert (pos
>= 0);
9304 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9305 && !unknown_for_bundling_p (insn
))
9307 /* The current insn is at the bundle start: emit the
9309 gcc_assert (template0
>= 0);
9310 ia64_add_bundle_selector_before (template0
, insn
);
9311 b
= PREV_INSN (insn
);
9313 /* See comment above in analogous place for emitting nops
9315 template0
= template1
;
9318 /* Emit nops after the current insn. */
9319 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9321 rtx nop_pat
= gen_nop ();
9322 ia64_emit_insn_before (nop_pat
, insn
);
9323 rtx_insn
*nop
= PREV_INSN (insn
);
9326 gcc_assert (pos
>= 0);
9329 /* See comment above in analogous place for emitting nops
9331 gcc_assert (template0
>= 0);
9332 ia64_add_bundle_selector_before (template0
, insn
);
9333 b
= PREV_INSN (insn
);
9335 template0
= template1
;
9341 #ifdef ENABLE_CHECKING
9343 /* Assert right calculation of middle_bundle_stops. */
9344 int num
= best_state
->middle_bundle_stops
;
9345 bool start_bundle
= true, end_bundle
= false;
9347 for (insn
= NEXT_INSN (prev_head_insn
);
9348 insn
&& insn
!= tail
;
9349 insn
= NEXT_INSN (insn
))
9353 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9354 start_bundle
= true;
9357 rtx_insn
*next_insn
;
9359 for (next_insn
= NEXT_INSN (insn
);
9360 next_insn
&& next_insn
!= tail
;
9361 next_insn
= NEXT_INSN (next_insn
))
9362 if (INSN_P (next_insn
)
9363 && (ia64_safe_itanium_class (next_insn
)
9364 != ITANIUM_CLASS_IGNORE
9365 || recog_memoized (next_insn
)
9366 == CODE_FOR_bundle_selector
)
9367 && GET_CODE (PATTERN (next_insn
)) != USE
9368 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9371 end_bundle
= next_insn
== NULL_RTX
9372 || next_insn
== tail
9373 || (INSN_P (next_insn
)
9374 && recog_memoized (next_insn
)
9375 == CODE_FOR_bundle_selector
);
9376 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9377 && !start_bundle
&& !end_bundle
9379 && !unknown_for_bundling_p (next_insn
))
9382 start_bundle
= false;
9386 gcc_assert (num
== 0);
9390 free (index_to_bundle_states
);
9391 finish_bundle_state_table ();
9393 dfa_clean_insn_cache ();
9396 /* The following function is called at the end of scheduling BB or
9397 EBB. After reload, it inserts stop bits and does insn bundling. */
9400 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9403 fprintf (dump
, "// Finishing schedule.\n");
9404 if (!reload_completed
)
9406 if (reload_completed
)
9408 final_emit_insn_group_barriers (dump
);
9409 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9410 current_sched_info
->next_tail
);
9411 if (sched_verbose
&& dump
)
9412 fprintf (dump
, "// finishing %d-%d\n",
9413 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9414 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9420 /* The following function inserts stop bits in scheduled BB or EBB. */
9423 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9426 int need_barrier_p
= 0;
9427 int seen_good_insn
= 0;
9429 init_insn_group_barriers ();
9431 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9432 insn
!= current_sched_info
->next_tail
;
9433 insn
= NEXT_INSN (insn
))
9435 if (BARRIER_P (insn
))
9437 rtx_insn
*last
= prev_active_insn (insn
);
9441 if (JUMP_TABLE_DATA_P (last
))
9442 last
= prev_active_insn (last
);
9443 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9444 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9446 init_insn_group_barriers ();
9450 else if (NONDEBUG_INSN_P (insn
))
9452 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9454 init_insn_group_barriers ();
9458 else if (need_barrier_p
|| group_barrier_needed (insn
)
9459 || (mflag_sched_stop_bits_after_every_cycle
9460 && GET_MODE (insn
) == TImode
9463 if (TARGET_EARLY_STOP_BITS
)
9468 last
!= current_sched_info
->prev_head
;
9469 last
= PREV_INSN (last
))
9470 if (INSN_P (last
) && GET_MODE (last
) == TImode
9471 && stops_p
[INSN_UID (last
)])
9473 if (last
== current_sched_info
->prev_head
)
9475 last
= prev_active_insn (last
);
9477 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9478 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9480 init_insn_group_barriers ();
9481 for (last
= NEXT_INSN (last
);
9483 last
= NEXT_INSN (last
))
9486 group_barrier_needed (last
);
9487 if (recog_memoized (last
) >= 0
9488 && important_for_bundling_p (last
))
9494 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9496 init_insn_group_barriers ();
9499 group_barrier_needed (insn
);
9500 if (recog_memoized (insn
) >= 0
9501 && important_for_bundling_p (insn
))
9504 else if (recog_memoized (insn
) >= 0
9505 && important_for_bundling_p (insn
))
9507 need_barrier_p
= (CALL_P (insn
) || unknown_for_bundling_p (insn
));
9514 /* If the following function returns TRUE, we will use the DFA
9518 ia64_first_cycle_multipass_dfa_lookahead (void)
9520 return (reload_completed
? 6 : 4);
9523 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9526 ia64_init_dfa_pre_cycle_insn (void)
9528 if (temp_dfa_state
== NULL
)
9530 dfa_state_size
= state_size ();
9531 temp_dfa_state
= xmalloc (dfa_state_size
);
9532 prev_cycle_state
= xmalloc (dfa_state_size
);
9534 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9535 SET_PREV_INSN (dfa_pre_cycle_insn
) = SET_NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9536 recog_memoized (dfa_pre_cycle_insn
);
9537 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9538 SET_PREV_INSN (dfa_stop_insn
) = SET_NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9539 recog_memoized (dfa_stop_insn
);
9542 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9543 used by the DFA insn scheduler. */
9546 ia64_dfa_pre_cycle_insn (void)
9548 return dfa_pre_cycle_insn
;
9551 /* The following function returns TRUE if PRODUCER (of type ilog or
9552 ld) produces address for CONSUMER (of type st or stf). */
9555 ia64_st_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9559 gcc_assert (producer
&& consumer
);
9560 dest
= ia64_single_set (producer
);
9562 reg
= SET_DEST (dest
);
9564 if (GET_CODE (reg
) == SUBREG
)
9565 reg
= SUBREG_REG (reg
);
9566 gcc_assert (GET_CODE (reg
) == REG
);
9568 dest
= ia64_single_set (consumer
);
9570 mem
= SET_DEST (dest
);
9571 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9572 return reg_mentioned_p (reg
, mem
);
9575 /* The following function returns TRUE if PRODUCER (of type ilog or
9576 ld) produces address for CONSUMER (of type ld or fld). */
9579 ia64_ld_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9581 rtx dest
, src
, reg
, mem
;
9583 gcc_assert (producer
&& consumer
);
9584 dest
= ia64_single_set (producer
);
9586 reg
= SET_DEST (dest
);
9588 if (GET_CODE (reg
) == SUBREG
)
9589 reg
= SUBREG_REG (reg
);
9590 gcc_assert (GET_CODE (reg
) == REG
);
9592 src
= ia64_single_set (consumer
);
9594 mem
= SET_SRC (src
);
9597 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9598 mem
= XVECEXP (mem
, 0, 0);
9599 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9600 /* ??? Is this bypass necessary for ld.c? */
9602 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9603 mem
= XEXP (mem
, 1);
9606 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9607 mem
= XEXP (mem
, 0);
9609 if (GET_CODE (mem
) == UNSPEC
)
9611 int c
= XINT (mem
, 1);
9613 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9614 || c
== UNSPEC_LDSA
);
9615 mem
= XVECEXP (mem
, 0, 0);
9618 /* Note that LO_SUM is used for GOT loads. */
9619 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9621 return reg_mentioned_p (reg
, mem
);
9624 /* The following function returns TRUE if INSN produces address for a
9625 load/store insn. We will place such insns into M slot because it
9626 decreases its latency time. */
9629 ia64_produce_address_p (rtx insn
)
9635 /* Emit pseudo-ops for the assembler to describe predicate relations.
9636 At present this assumes that we only consider predicate pairs to
9637 be mutex, and that the assembler can deduce proper values from
9638 straight-line code. */
9641 emit_predicate_relation_info (void)
9645 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9648 rtx_insn
*head
= BB_HEAD (bb
);
9650 /* We only need such notes at code labels. */
9651 if (! LABEL_P (head
))
9653 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9654 head
= NEXT_INSN (head
);
9656 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9657 grabbing the entire block of predicate registers. */
9658 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9659 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9661 rtx p
= gen_rtx_REG (BImode
, r
);
9662 rtx_insn
*n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9663 if (head
== BB_END (bb
))
9669 /* Look for conditional calls that do not return, and protect predicate
9670 relations around them. Otherwise the assembler will assume the call
9671 returns, and complain about uses of call-clobbered predicates after
9673 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9675 rtx_insn
*insn
= BB_HEAD (bb
);
9680 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9681 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9684 emit_insn_before (gen_safe_across_calls_all (), insn
);
9685 rtx_insn
*a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9686 if (BB_HEAD (bb
) == insn
)
9688 if (BB_END (bb
) == insn
)
9692 if (insn
== BB_END (bb
))
9694 insn
= NEXT_INSN (insn
);
9699 /* Perform machine dependent operations on the rtl chain INSNS. */
9704 /* We are freeing block_for_insn in the toplev to keep compatibility
9705 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9706 compute_bb_for_insn ();
9708 /* If optimizing, we'll have split before scheduling. */
9712 if (optimize
&& flag_schedule_insns_after_reload
9713 && dbg_cnt (ia64_sched2
))
9716 timevar_push (TV_SCHED2
);
9717 ia64_final_schedule
= 1;
9719 /* We can't let modulo-sched prevent us from scheduling any bbs,
9720 since we need the final schedule to produce bundle information. */
9721 FOR_EACH_BB_FN (bb
, cfun
)
9722 bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
9724 initiate_bundle_states ();
9725 ia64_nop
= make_insn_raw (gen_nop ());
9726 SET_PREV_INSN (ia64_nop
) = SET_NEXT_INSN (ia64_nop
) = NULL_RTX
;
9727 recog_memoized (ia64_nop
);
9728 clocks_length
= get_max_uid () + 1;
9729 stops_p
= XCNEWVEC (char, clocks_length
);
9731 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9733 pos_1
= get_cpu_unit_code ("2_1");
9734 pos_2
= get_cpu_unit_code ("2_2");
9735 pos_3
= get_cpu_unit_code ("2_3");
9736 pos_4
= get_cpu_unit_code ("2_4");
9737 pos_5
= get_cpu_unit_code ("2_5");
9738 pos_6
= get_cpu_unit_code ("2_6");
9739 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9740 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9741 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9742 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9743 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9744 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9745 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9746 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9747 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9748 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9749 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9750 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9751 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9752 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9753 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9754 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9755 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9756 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9757 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9758 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9762 pos_1
= get_cpu_unit_code ("1_1");
9763 pos_2
= get_cpu_unit_code ("1_2");
9764 pos_3
= get_cpu_unit_code ("1_3");
9765 pos_4
= get_cpu_unit_code ("1_4");
9766 pos_5
= get_cpu_unit_code ("1_5");
9767 pos_6
= get_cpu_unit_code ("1_6");
9768 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9769 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9770 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9771 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9772 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9773 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9774 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9775 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9776 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9777 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9778 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9779 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9780 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9781 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9782 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9783 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9784 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9785 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9786 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9787 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9790 if (flag_selective_scheduling2
9791 && !maybe_skip_selective_scheduling ())
9792 run_selective_scheduling ();
9796 /* Redo alignment computation, as it might gone wrong. */
9797 compute_alignments ();
9799 /* We cannot reuse this one because it has been corrupted by the
9801 finish_bundle_states ();
9804 emit_insn_group_barriers (dump_file
);
9806 ia64_final_schedule
= 0;
9807 timevar_pop (TV_SCHED2
);
9810 emit_all_insn_group_barriers (dump_file
);
9814 /* A call must not be the last instruction in a function, so that the
9815 return address is still within the function, so that unwinding works
9816 properly. Note that IA-64 differs from dwarf2 on this point. */
9817 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9822 insn
= get_last_insn ();
9823 if (! INSN_P (insn
))
9824 insn
= prev_active_insn (insn
);
9827 /* Skip over insns that expand to nothing. */
9828 while (NONJUMP_INSN_P (insn
)
9829 && get_attr_empty (insn
) == EMPTY_YES
)
9831 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9832 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9834 insn
= prev_active_insn (insn
);
9839 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9840 emit_insn (gen_break_f ());
9841 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9846 emit_predicate_relation_info ();
9848 if (flag_var_tracking
)
9850 timevar_push (TV_VAR_TRACKING
);
9851 variable_tracking_main ();
9852 timevar_pop (TV_VAR_TRACKING
);
9854 df_finish_pass (false);
9857 /* Return true if REGNO is used by the epilogue. */
9860 ia64_epilogue_uses (int regno
)
9865 /* With a call to a function in another module, we will write a new
9866 value to "gp". After returning from such a call, we need to make
9867 sure the function restores the original gp-value, even if the
9868 function itself does not use the gp anymore. */
9869 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9871 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9872 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9873 /* For functions defined with the syscall_linkage attribute, all
9874 input registers are marked as live at all function exits. This
9875 prevents the register allocator from using the input registers,
9876 which in turn makes it possible to restart a system call after
9877 an interrupt without having to save/restore the input registers.
9878 This also prevents kernel data from leaking to application code. */
9879 return lookup_attribute ("syscall_linkage",
9880 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9883 /* Conditional return patterns can't represent the use of `b0' as
9884 the return address, so we force the value live this way. */
9888 /* Likewise for ar.pfs, which is used by br.ret. */
9896 /* Return true if REGNO is used by the frame unwinder. */
9899 ia64_eh_uses (int regno
)
9903 if (! reload_completed
)
9909 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9910 if (regno
== current_frame_info
.r
[r
]
9911 || regno
== emitted_frame_related_regs
[r
])
9917 /* Return true if this goes in small data/bss. */
9919 /* ??? We could also support own long data here. Generating movl/add/ld8
9920 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9921 code faster because there is one less load. This also includes incomplete
9922 types which can't go in sdata/sbss. */
9925 ia64_in_small_data_p (const_tree exp
)
9927 if (TARGET_NO_SDATA
)
9930 /* We want to merge strings, so we never consider them small data. */
9931 if (TREE_CODE (exp
) == STRING_CST
)
9934 /* Functions are never small data. */
9935 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9938 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9940 const char *section
= DECL_SECTION_NAME (exp
);
9942 if (strcmp (section
, ".sdata") == 0
9943 || strncmp (section
, ".sdata.", 7) == 0
9944 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
9945 || strcmp (section
, ".sbss") == 0
9946 || strncmp (section
, ".sbss.", 6) == 0
9947 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
9952 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
9954 /* If this is an incomplete type with size 0, then we can't put it
9955 in sdata because it might be too big when completed. */
9956 if (size
> 0 && size
<= ia64_section_threshold
)
9963 /* Output assembly directives for prologue regions. */
9965 /* The current basic block number. */
9967 static bool last_block
;
9969 /* True if we need a copy_state command at the start of the next block. */
9971 static bool need_copy_state
;
9973 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9974 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9977 /* The function emits unwind directives for the start of an epilogue. */
9980 process_epilogue (FILE *asm_out_file
, rtx insn ATTRIBUTE_UNUSED
,
9981 bool unwind
, bool frame ATTRIBUTE_UNUSED
)
9983 /* If this isn't the last block of the function, then we need to label the
9984 current state, and copy it back in at the start of the next block. */
9989 fprintf (asm_out_file
, "\t.label_state %d\n",
9990 ++cfun
->machine
->state_num
);
9991 need_copy_state
= true;
9995 fprintf (asm_out_file
, "\t.restore sp\n");
9998 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10001 process_cfa_adjust_cfa (FILE *asm_out_file
, rtx pat
, rtx insn
,
10002 bool unwind
, bool frame
)
10004 rtx dest
= SET_DEST (pat
);
10005 rtx src
= SET_SRC (pat
);
10007 if (dest
== stack_pointer_rtx
)
10009 if (GET_CODE (src
) == PLUS
)
10011 rtx op0
= XEXP (src
, 0);
10012 rtx op1
= XEXP (src
, 1);
10014 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
10016 if (INTVAL (op1
) < 0)
10018 gcc_assert (!frame_pointer_needed
);
10020 fprintf (asm_out_file
,
10021 "\t.fframe " HOST_WIDE_INT_PRINT_DEC
"\n",
10025 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10029 gcc_assert (src
== hard_frame_pointer_rtx
);
10030 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10033 else if (dest
== hard_frame_pointer_rtx
)
10035 gcc_assert (src
== stack_pointer_rtx
);
10036 gcc_assert (frame_pointer_needed
);
10039 fprintf (asm_out_file
, "\t.vframe r%d\n",
10040 ia64_dbx_register_number (REGNO (dest
)));
10043 gcc_unreachable ();
10046 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10049 process_cfa_register (FILE *asm_out_file
, rtx pat
, bool unwind
)
10051 rtx dest
= SET_DEST (pat
);
10052 rtx src
= SET_SRC (pat
);
10053 int dest_regno
= REGNO (dest
);
10058 /* Saving return address pointer. */
10060 fprintf (asm_out_file
, "\t.save rp, r%d\n",
10061 ia64_dbx_register_number (dest_regno
));
10065 src_regno
= REGNO (src
);
10070 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
10072 fprintf (asm_out_file
, "\t.save pr, r%d\n",
10073 ia64_dbx_register_number (dest_regno
));
10076 case AR_UNAT_REGNUM
:
10077 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
10079 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
10080 ia64_dbx_register_number (dest_regno
));
10084 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
10086 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
10087 ia64_dbx_register_number (dest_regno
));
10091 /* Everything else should indicate being stored to memory. */
10092 gcc_unreachable ();
10096 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10099 process_cfa_offset (FILE *asm_out_file
, rtx pat
, bool unwind
)
10101 rtx dest
= SET_DEST (pat
);
10102 rtx src
= SET_SRC (pat
);
10103 int src_regno
= REGNO (src
);
10104 const char *saveop
;
10108 gcc_assert (MEM_P (dest
));
10109 if (GET_CODE (XEXP (dest
, 0)) == REG
)
10111 base
= XEXP (dest
, 0);
10116 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
10117 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
10118 base
= XEXP (XEXP (dest
, 0), 0);
10119 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
10122 if (base
== hard_frame_pointer_rtx
)
10124 saveop
= ".savepsp";
10129 gcc_assert (base
== stack_pointer_rtx
);
10130 saveop
= ".savesp";
10133 src_regno
= REGNO (src
);
10137 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
10139 fprintf (asm_out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
10144 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
10146 fprintf (asm_out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
10151 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
10153 fprintf (asm_out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
10157 case AR_PFS_REGNUM
:
10158 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
10160 fprintf (asm_out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
10164 case AR_UNAT_REGNUM
:
10165 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
10167 fprintf (asm_out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
10176 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
10177 1 << (src_regno
- GR_REG (4)));
10186 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
10187 1 << (src_regno
- BR_REG (1)));
10195 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
10196 1 << (src_regno
- FR_REG (2)));
10199 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10200 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10201 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10202 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10204 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
10205 1 << (src_regno
- FR_REG (12)));
10209 /* ??? For some reason we mark other general registers, even those
10210 we can't represent in the unwind info. Ignore them. */
10215 /* This function looks at a single insn and emits any directives
10216 required to unwind this insn. */
10219 ia64_asm_unwind_emit (FILE *asm_out_file
, rtx_insn
*insn
)
10221 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
10222 bool frame
= dwarf2out_do_frame ();
10226 if (!unwind
&& !frame
)
10229 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
10231 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
10232 == EXIT_BLOCK_PTR_FOR_FN (cfun
);
10234 /* Restore unwind state from immediately before the epilogue. */
10235 if (need_copy_state
)
10239 fprintf (asm_out_file
, "\t.body\n");
10240 fprintf (asm_out_file
, "\t.copy_state %d\n",
10241 cfun
->machine
->state_num
);
10243 need_copy_state
= false;
10247 if (NOTE_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
10250 /* Look for the ALLOC insn. */
10251 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10253 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10254 int dest_regno
= REGNO (dest
);
10256 /* If this is the final destination for ar.pfs, then this must
10257 be the alloc in the prologue. */
10258 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10261 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
10262 ia64_dbx_register_number (dest_regno
));
10266 /* This must be an alloc before a sibcall. We must drop the
10267 old frame info. The easiest way to drop the old frame
10268 info is to ensure we had a ".restore sp" directive
10269 followed by a new prologue. If the procedure doesn't
10270 have a memory-stack frame, we'll issue a dummy ".restore
10272 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10273 /* if haven't done process_epilogue() yet, do it now */
10274 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10276 fprintf (asm_out_file
, "\t.prologue\n");
10281 handled_one
= false;
10282 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10283 switch (REG_NOTE_KIND (note
))
10285 case REG_CFA_ADJUST_CFA
:
10286 pat
= XEXP (note
, 0);
10288 pat
= PATTERN (insn
);
10289 process_cfa_adjust_cfa (asm_out_file
, pat
, insn
, unwind
, frame
);
10290 handled_one
= true;
10293 case REG_CFA_OFFSET
:
10294 pat
= XEXP (note
, 0);
10296 pat
= PATTERN (insn
);
10297 process_cfa_offset (asm_out_file
, pat
, unwind
);
10298 handled_one
= true;
10301 case REG_CFA_REGISTER
:
10302 pat
= XEXP (note
, 0);
10304 pat
= PATTERN (insn
);
10305 process_cfa_register (asm_out_file
, pat
, unwind
);
10306 handled_one
= true;
10309 case REG_FRAME_RELATED_EXPR
:
10310 case REG_CFA_DEF_CFA
:
10311 case REG_CFA_EXPRESSION
:
10312 case REG_CFA_RESTORE
:
10313 case REG_CFA_SET_VDRAP
:
10314 /* Not used in the ia64 port. */
10315 gcc_unreachable ();
10318 /* Not a frame-related note. */
10322 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10323 explicit action to take. No guessing required. */
10324 gcc_assert (handled_one
);
10327 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10330 ia64_asm_emit_except_personality (rtx personality
)
10332 fputs ("\t.personality\t", asm_out_file
);
10333 output_addr_const (asm_out_file
, personality
);
10334 fputc ('\n', asm_out_file
);
10337 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10340 ia64_asm_init_sections (void)
10342 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10346 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10348 static enum unwind_info_type
10349 ia64_debug_unwind_info (void)
10357 IA64_BUILTIN_COPYSIGNQ
,
10358 IA64_BUILTIN_FABSQ
,
10359 IA64_BUILTIN_FLUSHRS
,
10361 IA64_BUILTIN_HUGE_VALQ
,
10365 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10368 ia64_init_builtins (void)
10374 /* The __fpreg type. */
10375 fpreg_type
= make_node (REAL_TYPE
);
10376 TYPE_PRECISION (fpreg_type
) = 82;
10377 layout_type (fpreg_type
);
10378 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10380 /* The __float80 type. */
10381 float80_type
= make_node (REAL_TYPE
);
10382 TYPE_PRECISION (float80_type
) = 80;
10383 layout_type (float80_type
);
10384 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10386 /* The __float128 type. */
10390 tree float128_type
= make_node (REAL_TYPE
);
10392 TYPE_PRECISION (float128_type
) = 128;
10393 layout_type (float128_type
);
10394 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
10396 /* TFmode support builtins. */
10397 ftype
= build_function_type_list (float128_type
, NULL_TREE
);
10398 decl
= add_builtin_function ("__builtin_infq", ftype
,
10399 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10401 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10403 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10404 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10406 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10408 ftype
= build_function_type_list (float128_type
,
10411 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10412 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10413 "__fabstf2", NULL_TREE
);
10414 TREE_READONLY (decl
) = 1;
10415 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10417 ftype
= build_function_type_list (float128_type
,
10421 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10422 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10423 "__copysigntf3", NULL_TREE
);
10424 TREE_READONLY (decl
) = 1;
10425 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10428 /* Under HPUX, this is a synonym for "long double". */
10429 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10432 /* Fwrite on VMS is non-standard. */
10433 #if TARGET_ABI_OPEN_VMS
10434 vms_patch_builtins ();
10437 #define def_builtin(name, type, code) \
10438 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10441 decl
= def_builtin ("__builtin_ia64_bsp",
10442 build_function_type_list (ptr_type_node
, NULL_TREE
),
10444 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10446 decl
= def_builtin ("__builtin_ia64_flushrs",
10447 build_function_type_list (void_type_node
, NULL_TREE
),
10448 IA64_BUILTIN_FLUSHRS
);
10449 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10455 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
10456 set_user_assembler_name (decl
, "_Isfinite");
10457 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
10458 set_user_assembler_name (decl
, "_Isfinitef");
10459 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEL
)) != NULL_TREE
)
10460 set_user_assembler_name (decl
, "_Isfinitef128");
10465 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10466 machine_mode mode ATTRIBUTE_UNUSED
,
10467 int ignore ATTRIBUTE_UNUSED
)
10469 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10470 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
10474 case IA64_BUILTIN_BSP
:
10475 if (! target
|| ! register_operand (target
, DImode
))
10476 target
= gen_reg_rtx (DImode
);
10477 emit_insn (gen_bsp_value (target
));
10478 #ifdef POINTERS_EXTEND_UNSIGNED
10479 target
= convert_memory_address (ptr_mode
, target
);
10483 case IA64_BUILTIN_FLUSHRS
:
10484 emit_insn (gen_flushrs ());
10487 case IA64_BUILTIN_INFQ
:
10488 case IA64_BUILTIN_HUGE_VALQ
:
10490 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10491 REAL_VALUE_TYPE inf
;
10495 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, target_mode
);
10497 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10500 target
= gen_reg_rtx (target_mode
);
10502 emit_move_insn (target
, tmp
);
10506 case IA64_BUILTIN_FABSQ
:
10507 case IA64_BUILTIN_COPYSIGNQ
:
10508 return expand_call (exp
, target
, ignore
);
10511 gcc_unreachable ();
10517 /* Return the ia64 builtin for CODE. */
10520 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10522 if (code
>= IA64_BUILTIN_max
)
10523 return error_mark_node
;
10525 return ia64_builtins
[code
];
10528 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10529 most significant bits of the stack slot. */
10532 ia64_hpux_function_arg_padding (machine_mode mode
, const_tree type
)
10534 /* Exception to normal case for structures/unions/etc. */
10536 if (type
&& AGGREGATE_TYPE_P (type
)
10537 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10540 /* Fall back to the default. */
10541 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
10544 /* Emit text to declare externally defined variables and functions, because
10545 the Intel assembler does not support undefined externals. */
10548 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10550 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10551 set in order to avoid putting out names that are never really
10553 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10555 /* maybe_assemble_visibility will return 1 if the assembler
10556 visibility directive is output. */
10557 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10558 && maybe_assemble_visibility (decl
));
10560 /* GNU as does not need anything here, but the HP linker does
10561 need something for external functions. */
10562 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10563 && TREE_CODE (decl
) == FUNCTION_DECL
)
10564 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10565 else if (need_visibility
&& !TARGET_GNU_AS
)
10566 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10570 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10571 modes of word_mode and larger. Rename the TFmode libfuncs using the
10572 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10573 backward compatibility. */
10576 ia64_init_libfuncs (void)
10578 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10579 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10580 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10581 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10583 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10584 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10585 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10586 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10587 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10589 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10590 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10591 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10592 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10593 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10594 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10596 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10597 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10598 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10599 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10600 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10602 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10603 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10604 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10605 /* HP-UX 11.23 libc does not have a function for unsigned
10606 SImode-to-TFmode conversion. */
10607 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10610 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10613 ia64_hpux_init_libfuncs (void)
10615 ia64_init_libfuncs ();
10617 /* The HP SI millicode division and mod functions expect DI arguments.
10618 By turning them off completely we avoid using both libgcc and the
10619 non-standard millicode routines and use the HP DI millicode routines
10622 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10623 set_optab_libfunc (udiv_optab
, SImode
, 0);
10624 set_optab_libfunc (smod_optab
, SImode
, 0);
10625 set_optab_libfunc (umod_optab
, SImode
, 0);
10627 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10628 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10629 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10630 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10632 /* HP-UX libc has TF min/max/abs routines in it. */
10633 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10634 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10635 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10637 /* ia64_expand_compare uses this. */
10638 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10640 /* These should never be used. */
10641 set_optab_libfunc (eq_optab
, TFmode
, 0);
10642 set_optab_libfunc (ne_optab
, TFmode
, 0);
10643 set_optab_libfunc (gt_optab
, TFmode
, 0);
10644 set_optab_libfunc (ge_optab
, TFmode
, 0);
10645 set_optab_libfunc (lt_optab
, TFmode
, 0);
10646 set_optab_libfunc (le_optab
, TFmode
, 0);
10649 /* Rename the division and modulus functions in VMS. */
10652 ia64_vms_init_libfuncs (void)
10654 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10655 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10656 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10657 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10658 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10659 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10660 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10661 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10662 abort_libfunc
= init_one_libfunc ("decc$abort");
10663 memcmp_libfunc
= init_one_libfunc ("decc$memcmp");
10664 #ifdef MEM_LIBFUNCS_INIT
10669 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10670 the HPUX conventions. */
10673 ia64_sysv4_init_libfuncs (void)
10675 ia64_init_libfuncs ();
10677 /* These functions are not part of the HPUX TFmode interface. We
10678 use them instead of _U_Qfcmp, which doesn't work the way we
10680 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10681 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10682 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10683 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10684 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10685 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10687 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10688 glibc doesn't have them. */
10694 ia64_soft_fp_init_libfuncs (void)
10699 ia64_vms_valid_pointer_mode (machine_mode mode
)
10701 return (mode
== SImode
|| mode
== DImode
);
10704 /* For HPUX, it is illegal to have relocations in shared segments. */
10707 ia64_hpux_reloc_rw_mask (void)
10712 /* For others, relax this so that relocations to local data goes in
10713 read-only segments, but we still cannot allow global relocations
10714 in read-only segments. */
10717 ia64_reloc_rw_mask (void)
10719 return flag_pic
? 3 : 2;
10722 /* Return the section to use for X. The only special thing we do here
10723 is to honor small data. */
10726 ia64_select_rtx_section (machine_mode mode
, rtx x
,
10727 unsigned HOST_WIDE_INT align
)
10729 if (GET_MODE_SIZE (mode
) > 0
10730 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10731 && !TARGET_NO_SDATA
)
10732 return sdata_section
;
10734 return default_elf_select_rtx_section (mode
, x
, align
);
10737 static unsigned int
10738 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10740 unsigned int flags
= 0;
10742 if (strcmp (name
, ".sdata") == 0
10743 || strncmp (name
, ".sdata.", 7) == 0
10744 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10745 || strncmp (name
, ".sdata2.", 8) == 0
10746 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10747 || strcmp (name
, ".sbss") == 0
10748 || strncmp (name
, ".sbss.", 6) == 0
10749 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10750 flags
= SECTION_SMALL
;
10752 flags
|= default_section_type_flags (decl
, name
, reloc
);
10756 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10757 structure type and that the address of that type should be passed
10758 in out0, rather than in r8. */
10761 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10763 tree ret_type
= TREE_TYPE (fntype
);
10765 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10766 as the structure return address parameter, if the return value
10767 type has a non-trivial copy constructor or destructor. It is not
10768 clear if this same convention should be used for other
10769 programming languages. Until G++ 3.4, we incorrectly used r8 for
10770 these return values. */
10771 return (abi_version_at_least (2)
10773 && TYPE_MODE (ret_type
) == BLKmode
10774 && TREE_ADDRESSABLE (ret_type
)
10775 && lang_GNU_CXX ());
10778 /* Output the assembler code for a thunk function. THUNK_DECL is the
10779 declaration for the thunk function itself, FUNCTION is the decl for
10780 the target function. DELTA is an immediate constant offset to be
10781 added to THIS. If VCALL_OFFSET is nonzero, the word at
10782 *(*this + vcall_offset) should be added to THIS. */
10785 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10786 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10789 rtx this_rtx
, funexp
;
10791 unsigned int this_parmno
;
10792 unsigned int this_regno
;
10795 reload_completed
= 1;
10796 epilogue_completed
= 1;
10798 /* Set things up as ia64_expand_prologue might. */
10799 last_scratch_gr_reg
= 15;
10801 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10802 current_frame_info
.spill_cfa_off
= -16;
10803 current_frame_info
.n_input_regs
= 1;
10804 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10806 /* Mark the end of the (empty) prologue. */
10807 emit_note (NOTE_INSN_PROLOGUE_END
);
10809 /* Figure out whether "this" will be the first parameter (the
10810 typical case) or the second parameter (as happens when the
10811 virtual function returns certain class objects). */
10813 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10815 this_regno
= IN_REG (this_parmno
);
10816 if (!TARGET_REG_NAMES
)
10817 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10819 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10821 /* Apply the constant offset, if required. */
10822 delta_rtx
= GEN_INT (delta
);
10825 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10826 REG_POINTER (tmp
) = 1;
10827 if (delta
&& satisfies_constraint_I (delta_rtx
))
10829 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10833 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10837 if (!satisfies_constraint_I (delta_rtx
))
10839 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10840 emit_move_insn (tmp
, delta_rtx
);
10843 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10846 /* Apply the offset from the vtable, if required. */
10849 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10850 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10854 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10855 REG_POINTER (t
) = 1;
10856 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10857 if (satisfies_constraint_I (vcall_offset_rtx
))
10859 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10863 emit_insn (gen_ptr_extend (tmp
, t
));
10866 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10870 if (!satisfies_constraint_J (vcall_offset_rtx
))
10872 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10873 emit_move_insn (tmp2
, vcall_offset_rtx
);
10874 vcall_offset_rtx
= tmp2
;
10876 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
10880 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
10882 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
10884 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
10887 /* Generate a tail call to the target function. */
10888 if (! TREE_USED (function
))
10890 assemble_external (function
);
10891 TREE_USED (function
) = 1;
10893 funexp
= XEXP (DECL_RTL (function
), 0);
10894 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
10895 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
10896 insn
= get_last_insn ();
10897 SIBLING_CALL_P (insn
) = 1;
10899 /* Code generation for calls relies on splitting. */
10900 reload_completed
= 1;
10901 epilogue_completed
= 1;
10902 try_split (PATTERN (insn
), insn
, 0);
10906 /* Run just enough of rest_of_compilation to get the insns emitted.
10907 There's not really enough bulk here to make other passes such as
10908 instruction scheduling worth while. Note that use_thunk calls
10909 assemble_start_function and assemble_end_function. */
10911 emit_all_insn_group_barriers (NULL
);
10912 insn
= get_insns ();
10913 shorten_branches (insn
);
10914 final_start_function (insn
, file
, 1);
10915 final (insn
, file
, 1);
10916 final_end_function ();
10918 reload_completed
= 0;
10919 epilogue_completed
= 0;
10922 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10925 ia64_struct_value_rtx (tree fntype
,
10926 int incoming ATTRIBUTE_UNUSED
)
10928 if (TARGET_ABI_OPEN_VMS
||
10929 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
10931 return gen_rtx_REG (Pmode
, GR_REG (8));
10935 ia64_scalar_mode_supported_p (machine_mode mode
)
10961 ia64_vector_mode_supported_p (machine_mode mode
)
10978 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */
10981 ia64_libgcc_floating_mode_supported_p (machine_mode mode
)
10990 #ifdef IA64_NO_LIBGCC_XFMODE
10997 #ifdef IA64_NO_LIBGCC_TFMODE
11008 /* Implement the FUNCTION_PROFILER macro. */
11011 ia64_output_function_profiler (FILE *file
, int labelno
)
11013 bool indirect_call
;
11015 /* If the function needs a static chain and the static chain
11016 register is r15, we use an indirect call so as to bypass
11017 the PLT stub in case the executable is dynamically linked,
11018 because the stub clobbers r15 as per 5.3.6 of the psABI.
11019 We don't need to do that in non canonical PIC mode. */
11021 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
11023 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
11024 indirect_call
= true;
11027 indirect_call
= false;
11030 fputs ("\t.prologue 4, r40\n", file
);
11032 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
11033 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
11035 if (NO_PROFILE_COUNTERS
)
11036 fputs ("\tmov out3 = r0\n", file
);
11040 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11042 if (TARGET_AUTO_PIC
)
11043 fputs ("\tmovl out3 = @gprel(", file
);
11045 fputs ("\taddl out3 = @ltoff(", file
);
11046 assemble_name (file
, buf
);
11047 if (TARGET_AUTO_PIC
)
11048 fputs (")\n", file
);
11050 fputs ("), r1\n", file
);
11054 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
11055 fputs ("\t;;\n", file
);
11057 fputs ("\t.save rp, r42\n", file
);
11058 fputs ("\tmov out2 = b0\n", file
);
11060 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
11061 fputs ("\t.body\n", file
);
11062 fputs ("\tmov out1 = r1\n", file
);
11065 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
11066 fputs ("\tmov b6 = r16\n", file
);
11067 fputs ("\tld8 r1 = [r14]\n", file
);
11068 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
11071 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
11074 static GTY(()) rtx mcount_func_rtx
;
11076 gen_mcount_func_rtx (void)
11078 if (!mcount_func_rtx
)
11079 mcount_func_rtx
= init_one_libfunc ("_mcount");
11080 return mcount_func_rtx
;
11084 ia64_profile_hook (int labelno
)
11088 if (NO_PROFILE_COUNTERS
)
11089 label
= const0_rtx
;
11093 const char *label_name
;
11094 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11095 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
11096 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
11097 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
11099 ip
= gen_reg_rtx (Pmode
);
11100 emit_insn (gen_ip_value (ip
));
11101 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
11103 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
11108 /* Return the mangling of TYPE if it is an extended fundamental type. */
11110 static const char *
11111 ia64_mangle_type (const_tree type
)
11113 type
= TYPE_MAIN_VARIANT (type
);
11115 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
11116 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
11119 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11121 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
11123 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11124 an extended mangling. Elsewhere, "e" is available since long
11125 double is 80 bits. */
11126 if (TYPE_MODE (type
) == XFmode
)
11127 return TARGET_HPUX
? "u9__float80" : "e";
11128 if (TYPE_MODE (type
) == RFmode
)
11129 return "u7__fpreg";
11133 /* Return the diagnostic message string if conversion from FROMTYPE to
11134 TOTYPE is not allowed, NULL otherwise. */
11135 static const char *
11136 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
11138 /* Reject nontrivial conversion to or from __fpreg. */
11139 if (TYPE_MODE (fromtype
) == RFmode
11140 && TYPE_MODE (totype
) != RFmode
11141 && TYPE_MODE (totype
) != VOIDmode
)
11142 return N_("invalid conversion from %<__fpreg%>");
11143 if (TYPE_MODE (totype
) == RFmode
11144 && TYPE_MODE (fromtype
) != RFmode
)
11145 return N_("invalid conversion to %<__fpreg%>");
11149 /* Return the diagnostic message string if the unary operation OP is
11150 not permitted on TYPE, NULL otherwise. */
11151 static const char *
11152 ia64_invalid_unary_op (int op
, const_tree type
)
11154 /* Reject operations on __fpreg other than unary + or &. */
11155 if (TYPE_MODE (type
) == RFmode
11156 && op
!= CONVERT_EXPR
11157 && op
!= ADDR_EXPR
)
11158 return N_("invalid operation on %<__fpreg%>");
11162 /* Return the diagnostic message string if the binary operation OP is
11163 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11164 static const char *
11165 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
11167 /* Reject operations on __fpreg. */
11168 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
11169 return N_("invalid operation on %<__fpreg%>");
11173 /* HP-UX version_id attribute.
11174 For object foo, if the version_id is set to 1234 put out an alias
11175 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11176 other than an alias statement because it is an illegal symbol name. */
11179 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
11180 tree name ATTRIBUTE_UNUSED
,
11182 int flags ATTRIBUTE_UNUSED
,
11183 bool *no_add_attrs
)
11185 tree arg
= TREE_VALUE (args
);
11187 if (TREE_CODE (arg
) != STRING_CST
)
11189 error("version attribute is not a string");
11190 *no_add_attrs
= true;
11196 /* Target hook for c_mode_for_suffix. */
11198 static machine_mode
11199 ia64_c_mode_for_suffix (char suffix
)
11209 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11212 ia64_dconst_0_5 (void)
11214 if (! ia64_dconst_0_5_rtx
)
11216 REAL_VALUE_TYPE rv
;
11217 real_from_string (&rv
, "0.5");
11218 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11220 return ia64_dconst_0_5_rtx
;
11223 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11226 ia64_dconst_0_375 (void)
11228 if (! ia64_dconst_0_375_rtx
)
11230 REAL_VALUE_TYPE rv
;
11231 real_from_string (&rv
, "0.375");
11232 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11234 return ia64_dconst_0_375_rtx
;
11237 static machine_mode
11238 ia64_get_reg_raw_mode (int regno
)
11240 if (FR_REGNO_P (regno
))
11242 return default_get_reg_raw_mode(regno
);
11245 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11249 ia64_member_type_forces_blk (const_tree
, machine_mode mode
)
11251 return TARGET_HPUX
&& mode
== TFmode
;
11254 /* Always default to .text section until HP-UX linker is fixed. */
11256 ATTRIBUTE_UNUSED
static section
*
11257 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11258 enum node_frequency freq ATTRIBUTE_UNUSED
,
11259 bool startup ATTRIBUTE_UNUSED
,
11260 bool exit ATTRIBUTE_UNUSED
)
11265 /* Construct (set target (vec_select op0 (parallel perm))) and
11266 return true if that's a valid instruction in the active ISA. */
11269 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
11271 rtx rperm
[MAX_VECT_LEN
], x
;
11274 for (i
= 0; i
< nelt
; ++i
)
11275 rperm
[i
] = GEN_INT (perm
[i
]);
11277 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
11278 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
11279 x
= gen_rtx_SET (target
, x
);
11281 rtx_insn
*insn
= emit_insn (x
);
11282 if (recog_memoized (insn
) < 0)
11284 remove_insn (insn
);
11290 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11293 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
11294 const unsigned char *perm
, unsigned nelt
)
11296 machine_mode v2mode
;
11299 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
11300 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
11301 return expand_vselect (target
, x
, perm
, nelt
);
11304 /* Try to expand a no-op permutation. */
11307 expand_vec_perm_identity (struct expand_vec_perm_d
*d
)
11309 unsigned i
, nelt
= d
->nelt
;
11311 for (i
= 0; i
< nelt
; ++i
)
11312 if (d
->perm
[i
] != i
)
11316 emit_move_insn (d
->target
, d
->op0
);
11321 /* Try to expand D via a shrp instruction. */
11324 expand_vec_perm_shrp (struct expand_vec_perm_d
*d
)
11326 unsigned i
, nelt
= d
->nelt
, shift
, mask
;
11329 /* ??? Don't force V2SFmode into the integer registers. */
11330 if (d
->vmode
== V2SFmode
)
11333 mask
= (d
->one_operand_p
? nelt
- 1 : 2 * nelt
- 1);
11335 shift
= d
->perm
[0];
11336 if (BYTES_BIG_ENDIAN
&& shift
> nelt
)
11339 for (i
= 1; i
< nelt
; ++i
)
11340 if (d
->perm
[i
] != ((shift
+ i
) & mask
))
11346 hi
= shift
< nelt
? d
->op1
: d
->op0
;
11347 lo
= shift
< nelt
? d
->op0
: d
->op1
;
11351 shift
*= GET_MODE_UNIT_SIZE (d
->vmode
) * BITS_PER_UNIT
;
11353 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11354 gcc_assert (IN_RANGE (shift
, 1, 63));
11356 /* Recall that big-endian elements are numbered starting at the top of
11357 the register. Ideally we'd have a shift-left-pair. But since we
11358 don't, convert to a shift the other direction. */
11359 if (BYTES_BIG_ENDIAN
)
11360 shift
= 64 - shift
;
11362 tmp
= gen_reg_rtx (DImode
);
11363 hi
= gen_lowpart (DImode
, hi
);
11364 lo
= gen_lowpart (DImode
, lo
);
11365 emit_insn (gen_shrp (tmp
, hi
, lo
, GEN_INT (shift
)));
11367 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, tmp
));
11371 /* Try to instantiate D in a single instruction. */
11374 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
11376 unsigned i
, nelt
= d
->nelt
;
11377 unsigned char perm2
[MAX_VECT_LEN
];
11379 /* Try single-operand selections. */
11380 if (d
->one_operand_p
)
11382 if (expand_vec_perm_identity (d
))
11384 if (expand_vselect (d
->target
, d
->op0
, d
->perm
, nelt
))
11388 /* Try two operand selections. */
11389 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
11392 /* Recognize interleave style patterns with reversed operands. */
11393 if (!d
->one_operand_p
)
11395 for (i
= 0; i
< nelt
; ++i
)
11397 unsigned e
= d
->perm
[i
];
11405 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
11409 if (expand_vec_perm_shrp (d
))
11412 /* ??? Look for deposit-like permutations where most of the result
11413 comes from one vector unchanged and the rest comes from a
11414 sequential hunk of the other vector. */
11419 /* Pattern match broadcast permutations. */
11422 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
11424 unsigned i
, elt
, nelt
= d
->nelt
;
11425 unsigned char perm2
[2];
11429 if (!d
->one_operand_p
)
11433 for (i
= 1; i
< nelt
; ++i
)
11434 if (d
->perm
[i
] != elt
)
11441 /* Implementable by interleave. */
11443 perm2
[1] = elt
+ 2;
11444 ok
= expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, 2);
11449 /* Implementable by extract + broadcast. */
11450 if (BYTES_BIG_ENDIAN
)
11452 elt
*= BITS_PER_UNIT
;
11453 temp
= gen_reg_rtx (DImode
);
11454 emit_insn (gen_extzv (temp
, gen_lowpart (DImode
, d
->op0
),
11455 GEN_INT (8), GEN_INT (elt
)));
11456 emit_insn (gen_mux1_brcst_qi (d
->target
, gen_lowpart (QImode
, temp
)));
11460 /* Should have been matched directly by vec_select. */
11462 gcc_unreachable ();
11468 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11469 two vector permutation into a single vector permutation by using
11470 an interleave operation to merge the vectors. */
11473 expand_vec_perm_interleave_2 (struct expand_vec_perm_d
*d
)
11475 struct expand_vec_perm_d dremap
, dfinal
;
11476 unsigned char remap
[2 * MAX_VECT_LEN
];
11477 unsigned contents
, i
, nelt
, nelt2
;
11478 unsigned h0
, h1
, h2
, h3
;
11482 if (d
->one_operand_p
)
11488 /* Examine from whence the elements come. */
11490 for (i
= 0; i
< nelt
; ++i
)
11491 contents
|= 1u << d
->perm
[i
];
11493 memset (remap
, 0xff, sizeof (remap
));
11496 h0
= (1u << nelt2
) - 1;
11499 h3
= h0
<< (nelt
+ nelt2
);
11501 if ((contents
& (h0
| h2
)) == contents
) /* punpck even halves */
11503 for (i
= 0; i
< nelt
; ++i
)
11505 unsigned which
= i
/ 2 + (i
& 1 ? nelt
: 0);
11507 dremap
.perm
[i
] = which
;
11510 else if ((contents
& (h1
| h3
)) == contents
) /* punpck odd halves */
11512 for (i
= 0; i
< nelt
; ++i
)
11514 unsigned which
= i
/ 2 + nelt2
+ (i
& 1 ? nelt
: 0);
11516 dremap
.perm
[i
] = which
;
11519 else if ((contents
& 0x5555) == contents
) /* mix even elements */
11521 for (i
= 0; i
< nelt
; ++i
)
11523 unsigned which
= (i
& ~1) + (i
& 1 ? nelt
: 0);
11525 dremap
.perm
[i
] = which
;
11528 else if ((contents
& 0xaaaa) == contents
) /* mix odd elements */
11530 for (i
= 0; i
< nelt
; ++i
)
11532 unsigned which
= (i
| 1) + (i
& 1 ? nelt
: 0);
11534 dremap
.perm
[i
] = which
;
11537 else if (floor_log2 (contents
) - ctz_hwi (contents
) < (int)nelt
) /* shrp */
11539 unsigned shift
= ctz_hwi (contents
);
11540 for (i
= 0; i
< nelt
; ++i
)
11542 unsigned which
= (i
+ shift
) & (2 * nelt
- 1);
11544 dremap
.perm
[i
] = which
;
11550 /* Use the remapping array set up above to move the elements from their
11551 swizzled locations into their final destinations. */
11553 for (i
= 0; i
< nelt
; ++i
)
11555 unsigned e
= remap
[d
->perm
[i
]];
11556 gcc_assert (e
< nelt
);
11557 dfinal
.perm
[i
] = e
;
11560 dfinal
.op0
= gen_raw_REG (dfinal
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11562 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
11563 dfinal
.op1
= dfinal
.op0
;
11564 dfinal
.one_operand_p
= true;
11565 dremap
.target
= dfinal
.op0
;
11567 /* Test if the final remap can be done with a single insn. For V4HImode
11568 this *will* succeed. For V8QImode or V2SImode it may not. */
11570 ok
= expand_vec_perm_1 (&dfinal
);
11571 seq
= get_insns ();
11578 ok
= expand_vec_perm_1 (&dremap
);
11585 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11586 constant permutation via two mux2 and a merge. */
11589 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d
*d
)
11591 unsigned char perm2
[4];
11594 rtx t0
, t1
, mask
, x
;
11597 if (d
->vmode
!= V4HImode
|| d
->one_operand_p
)
11602 for (i
= 0; i
< 4; ++i
)
11604 perm2
[i
] = d
->perm
[i
] & 3;
11605 rmask
[i
] = (d
->perm
[i
] & 4 ? const0_rtx
: constm1_rtx
);
11607 mask
= gen_rtx_CONST_VECTOR (V4HImode
, gen_rtvec_v (4, rmask
));
11608 mask
= force_reg (V4HImode
, mask
);
11610 t0
= gen_reg_rtx (V4HImode
);
11611 t1
= gen_reg_rtx (V4HImode
);
11613 ok
= expand_vselect (t0
, d
->op0
, perm2
, 4);
11615 ok
= expand_vselect (t1
, d
->op1
, perm2
, 4);
11618 x
= gen_rtx_AND (V4HImode
, mask
, t0
);
11619 emit_insn (gen_rtx_SET (t0
, x
));
11621 x
= gen_rtx_NOT (V4HImode
, mask
);
11622 x
= gen_rtx_AND (V4HImode
, x
, t1
);
11623 emit_insn (gen_rtx_SET (t1
, x
));
11625 x
= gen_rtx_IOR (V4HImode
, t0
, t1
);
11626 emit_insn (gen_rtx_SET (d
->target
, x
));
11631 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11632 With all of the interface bits taken care of, perform the expansion
11633 in D and return true on success. */
11636 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
11638 if (expand_vec_perm_1 (d
))
11640 if (expand_vec_perm_broadcast (d
))
11642 if (expand_vec_perm_interleave_2 (d
))
11644 if (expand_vec_perm_v4hi_5 (d
))
11650 ia64_expand_vec_perm_const (rtx operands
[4])
11652 struct expand_vec_perm_d d
;
11653 unsigned char perm
[MAX_VECT_LEN
];
11654 int i
, nelt
, which
;
11657 d
.target
= operands
[0];
11658 d
.op0
= operands
[1];
11659 d
.op1
= operands
[2];
11662 d
.vmode
= GET_MODE (d
.target
);
11663 gcc_assert (VECTOR_MODE_P (d
.vmode
));
11664 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11665 d
.testing_p
= false;
11667 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
11668 gcc_assert (XVECLEN (sel
, 0) == nelt
);
11669 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
11671 for (i
= which
= 0; i
< nelt
; ++i
)
11673 rtx e
= XVECEXP (sel
, 0, i
);
11674 int ei
= INTVAL (e
) & (2 * nelt
- 1);
11676 which
|= (ei
< nelt
? 1 : 2);
11687 if (!rtx_equal_p (d
.op0
, d
.op1
))
11689 d
.one_operand_p
= false;
11693 /* The elements of PERM do not suggest that only the first operand
11694 is used, but both operands are identical. Allow easier matching
11695 of the permutation by folding the permutation into the single
11697 for (i
= 0; i
< nelt
; ++i
)
11698 if (d
.perm
[i
] >= nelt
)
11704 d
.one_operand_p
= true;
11708 for (i
= 0; i
< nelt
; ++i
)
11711 d
.one_operand_p
= true;
11715 if (ia64_expand_vec_perm_const_1 (&d
))
11718 /* If the mask says both arguments are needed, but they are the same,
11719 the above tried to expand with one_operand_p true. If that didn't
11720 work, retry with one_operand_p false, as that's what we used in _ok. */
11721 if (which
== 3 && d
.one_operand_p
)
11723 memcpy (d
.perm
, perm
, sizeof (perm
));
11724 d
.one_operand_p
= false;
11725 return ia64_expand_vec_perm_const_1 (&d
);
11731 /* Implement targetm.vectorize.vec_perm_const_ok. */
11734 ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
11735 const unsigned char *sel
)
11737 struct expand_vec_perm_d d
;
11738 unsigned int i
, nelt
, which
;
11742 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11743 d
.testing_p
= true;
11745 /* Extract the values from the vector CST into the permutation
11747 memcpy (d
.perm
, sel
, nelt
);
11748 for (i
= which
= 0; i
< nelt
; ++i
)
11750 unsigned char e
= d
.perm
[i
];
11751 gcc_assert (e
< 2 * nelt
);
11752 which
|= (e
< nelt
? 1 : 2);
11755 /* For all elements from second vector, fold the elements to first. */
11757 for (i
= 0; i
< nelt
; ++i
)
11760 /* Check whether the mask can be applied to the vector type. */
11761 d
.one_operand_p
= (which
!= 3);
11763 /* Otherwise we have to go through the motions and see if we can
11764 figure out how to generate the requested permutation. */
11765 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11766 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
11767 if (!d
.one_operand_p
)
11768 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
11771 ret
= ia64_expand_vec_perm_const_1 (&d
);
11778 ia64_expand_vec_setv2sf (rtx operands
[3])
11780 struct expand_vec_perm_d d
;
11781 unsigned int which
;
11784 d
.target
= operands
[0];
11785 d
.op0
= operands
[0];
11786 d
.op1
= gen_reg_rtx (V2SFmode
);
11787 d
.vmode
= V2SFmode
;
11789 d
.one_operand_p
= false;
11790 d
.testing_p
= false;
11792 which
= INTVAL (operands
[2]);
11793 gcc_assert (which
<= 1);
11794 d
.perm
[0] = 1 - which
;
11795 d
.perm
[1] = which
+ 2;
11797 emit_insn (gen_fpack (d
.op1
, operands
[1], CONST0_RTX (SFmode
)));
11799 ok
= ia64_expand_vec_perm_const_1 (&d
);
11804 ia64_expand_vec_perm_even_odd (rtx target
, rtx op0
, rtx op1
, int odd
)
11806 struct expand_vec_perm_d d
;
11807 machine_mode vmode
= GET_MODE (target
);
11808 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
11816 d
.one_operand_p
= false;
11817 d
.testing_p
= false;
11819 for (i
= 0; i
< nelt
; ++i
)
11820 d
.perm
[i
] = i
* 2 + odd
;
11822 ok
= ia64_expand_vec_perm_const_1 (&d
);
11826 #include "gt-ia64.h"